Showing preview only (3,934K chars total). Download the full file or copy to clipboard to get everything.
Repository: pingcap/tidb-ansible
Branch: master
Commit: 61f5ec376c82
Files: 379
Total size: 3.7 MB
Directory structure:
gitextract_3y37qitl/
├── .gitignore
├── LICENSE
├── README.md
├── ansible.cfg
├── bootstrap.yml
├── callback_plugins/
│ ├── help.py
│ └── yaml.py
├── clean_log_cron.yml
├── cloud/
│ └── aws-ansible/
│ ├── aws_bootstrap.yml
│ ├── aws_inventory_file_generate.yml
│ ├── aws_prepare.yml
│ ├── aws_teardown.yml
│ ├── ec2.ini
│ ├── ec2.py
│ ├── files/
│ │ └── sources.list
│ ├── roles/
│ │ └── aws/
│ │ └── tasks/
│ │ └── main.yml
│ ├── templates/
│ │ └── aws.inventory.ini.j2
│ └── vars.yml
├── collect_diagnosis.yml
├── common_tasks/
│ ├── add_evict_leader_scheduler.yml
│ ├── create_grafana_api_keys.yml
│ ├── get_pd_leader.yml
│ ├── get_pd_leader_tls.yml
│ ├── get_pd_name.yml
│ ├── get_pd_name_tls.yml
│ ├── get_pd_tikv_addr.yml
│ ├── get_store_id.yml
│ ├── get_store_id_tls.yml
│ ├── remove_evict_leader_scheduler.yml
│ └── transfer_pd_leader.yml
├── conf/
│ ├── alertmanager.yml
│ ├── drainer.toml
│ ├── pd.yml
│ ├── pump.yml
│ ├── spark-defaults.yml
│ ├── spark-env.yml
│ ├── ssl/
│ │ ├── ca-config.json
│ │ └── ca-csr.json
│ ├── tidb-lightning.yml
│ ├── tidb.yml
│ ├── tiflash-learner.yml
│ ├── tiflash.yml
│ ├── tikv-importer.yml
│ └── tikv.yml
├── create_users.yml
├── deploy.yml
├── deploy_drainer.yml
├── deploy_ntp.yml
├── excessive_rolling_update.yml
├── filter_plugins/
│ └── tags.py
├── graceful_stop.yml
├── group_vars/
│ ├── alertmanager_servers.yml
│ ├── all.yml
│ ├── drainer_servers.yml
│ ├── grafana_servers.yml
│ ├── importer_server.yml
│ ├── lightning_server.yml
│ ├── monitored_servers.yml
│ ├── monitoring_servers.yml
│ ├── pd_servers.yml
│ ├── pump_servers.yml
│ ├── tidb_servers.yml
│ ├── tiflash_servers.yml
│ └── tikv_servers.yml
├── hosts.ini
├── inventory.ini
├── library/
│ ├── coreos_facts
│ ├── docker_facts
│ └── wait_for_pid.py
├── local_prepare.yml
├── log/
│ └── .gitignore
├── migrate_monitor.yml
├── requirements.txt
├── roles/
│ ├── alertmanager/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ └── templates/
│ │ ├── run_alertmanager_binary.sh.j2
│ │ └── run_alertmanager_docker.sh.j2
│ ├── blackbox_exporter/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ └── templates/
│ │ ├── blackbox.yml.j2
│ │ ├── run_blackbox_exporter_binary.sh.j2
│ │ └── run_blackbox_exporter_docker.sh.j2
│ ├── bootstrap/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ └── tasks/
│ │ ├── main.yml
│ │ └── root_tasks.yml
│ ├── check_config_dynamic/
│ │ └── tasks/
│ │ └── main.yml
│ ├── check_config_pd/
│ │ └── tasks/
│ │ └── main.yml
│ ├── check_config_static/
│ │ └── tasks/
│ │ └── main.yml
│ ├── check_config_tidb/
│ │ └── tasks/
│ │ └── main.yml
│ ├── check_config_tikv/
│ │ └── tasks/
│ │ └── main.yml
│ ├── check_system_dynamic/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ └── tasks/
│ │ └── main.yml
│ ├── check_system_optional/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ └── tasks/
│ │ └── main.yml
│ ├── check_system_static/
│ │ └── tasks/
│ │ └── main.yml
│ ├── clean_log_pd/
│ │ └── tasks/
│ │ ├── add_cron.yml
│ │ ├── del_cron.yml
│ │ └── main.yml
│ ├── clean_log_tidb/
│ │ └── tasks/
│ │ ├── add_cron.yml
│ │ ├── del_cron.yml
│ │ └── main.yml
│ ├── clean_log_tikv/
│ │ └── tasks/
│ │ ├── add_cron.yml
│ │ ├── del_cron.yml
│ │ └── main.yml
│ ├── collect_diagnosis/
│ │ ├── meta/
│ │ │ └── main.yml
│ │ └── tasks/
│ │ └── main.yml
│ ├── collector_host/
│ │ └── tasks/
│ │ ├── collect_log.yml
│ │ └── main.yml
│ ├── collector_pd/
│ │ └── tasks/
│ │ ├── collect_config.yml
│ │ ├── collect_log.yml
│ │ └── main.yml
│ ├── collector_prometheus/
│ │ └── tasks/
│ │ └── main.yml
│ ├── collector_pump/
│ │ └── tasks/
│ │ ├── collect_log.yml
│ │ └── main.yml
│ ├── collector_tidb/
│ │ └── tasks/
│ │ ├── collect_config.yml
│ │ ├── collect_log.yml
│ │ └── main.yml
│ ├── collector_tikv/
│ │ └── tasks/
│ │ ├── collect_config.yml
│ │ ├── collect_log.yml
│ │ └── main.yml
│ ├── common_dir/
│ │ └── tasks/
│ │ └── main.yml
│ ├── dashboard_topo/
│ │ ├── tasks/
│ │ │ └── main.yml
│ │ └── templates/
│ │ └── init_dashboard_topo.sh.j2
│ ├── drainer/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── files/
│ │ │ └── make-ssl.sh
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── check_certs.yml
│ │ │ ├── gen_certs.yml
│ │ │ ├── install_certs.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ ├── templates/
│ │ │ └── run_drainer_binary.sh.j2
│ │ └── vars/
│ │ └── default.yml
│ ├── firewalld/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── handlers/
│ │ │ └── main.yml
│ │ └── tasks/
│ │ └── main.yml
│ ├── grafana/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ ├── systemd_deployment.yml
│ │ │ └── tasks.yml
│ │ └── templates/
│ │ ├── data_source.json.j2
│ │ ├── grafana.ini.j2
│ │ ├── run_grafana_binary.sh.j2
│ │ └── run_grafana_docker.sh.j2
│ ├── kafka_exporter/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ └── templates/
│ │ └── run_kafka_exporter_binary.sh.j2
│ ├── local/
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── docker_deployment.yml
│ │ │ └── main.yml
│ │ └── templates/
│ │ ├── binary_packages.yml.j2
│ │ ├── common_packages.yml.j2
│ │ └── docker_packages.yml.j2
│ ├── machine_benchmark/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ └── tasks/
│ │ ├── fio_randread.yml
│ │ ├── fio_randread_write.yml
│ │ ├── fio_randread_write_latency.yml
│ │ └── main.yml
│ ├── node_exporter/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ └── templates/
│ │ ├── run_node_exporter_binary.sh.j2
│ │ └── run_node_exporter_docker.sh.j2
│ ├── ops/
│ │ ├── tasks/
│ │ │ └── main.yml
│ │ └── templates/
│ │ ├── check_tikv.sh.j2
│ │ └── pd-ctl.sh.j2
│ ├── pd/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── files/
│ │ │ └── make-ssl.sh
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── check_certs.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── gen_certs.yml
│ │ │ ├── install_certs.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ ├── templates/
│ │ │ ├── pd.toml.j2
│ │ │ ├── run_pd_binary.sh.j2
│ │ │ └── run_pd_docker.sh.j2
│ │ └── vars/
│ │ └── default.yml
│ ├── perf_tools/
│ │ └── tasks/
│ │ └── main.yml
│ ├── pre-ansible/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ └── tasks/
│ │ ├── coreos.yml
│ │ ├── main.yml
│ │ └── root_tasks.yml
│ ├── prometheus/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── files/
│ │ │ ├── binlog.rules.yml
│ │ │ ├── blacker.rules.yml
│ │ │ ├── bypass.rules.yml
│ │ │ ├── kafka.rules.yml
│ │ │ ├── lightning.rules.yml
│ │ │ ├── node.rules.yml
│ │ │ ├── pd.rules.yml
│ │ │ ├── tidb.rules.yml
│ │ │ ├── tiflash.rules.yml
│ │ │ ├── tikv.accelerate.rules.yml
│ │ │ └── tikv.rules.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ └── templates/
│ │ ├── prometheus.yml.j2
│ │ ├── run_prometheus_binary.sh.j2
│ │ └── run_prometheus_docker.sh.j2
│ ├── pump/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── files/
│ │ │ └── make-ssl.sh
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── check_certs.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── gen_certs.yml
│ │ │ ├── install_certs.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ ├── templates/
│ │ │ ├── pump.toml.j2
│ │ │ ├── run_pump_binary.sh.j2
│ │ │ └── run_pump_docker.sh.j2
│ │ └── vars/
│ │ └── default.yml
│ ├── pushgateway/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ └── templates/
│ │ ├── run_pushgateway_binary.sh.j2
│ │ └── run_pushgateway_docker.sh.j2
│ ├── supervise/
│ │ ├── tasks/
│ │ │ └── main.yml
│ │ └── templates/
│ │ ├── start_role.sh.j2
│ │ └── stop_role.sh.j2
│ ├── systemd/
│ │ ├── tasks/
│ │ │ └── main.yml
│ │ └── templates/
│ │ ├── start_role.sh.j2
│ │ ├── stop_role.sh.j2
│ │ ├── systemd_binary.service.j2
│ │ └── systemd_docker.service.j2
│ ├── tidb/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── files/
│ │ │ └── make-ssl.sh
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── check_certs.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── gen_certs.yml
│ │ │ ├── install_certs.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ ├── templates/
│ │ │ ├── run_tidb_binary.sh.j2
│ │ │ ├── run_tidb_docker.sh.j2
│ │ │ └── tidb.toml.j2
│ │ └── vars/
│ │ └── default.yml
│ ├── tidb_lightning/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ └── main.yml
│ │ ├── templates/
│ │ │ ├── start_lightning_binary.sh.j2
│ │ │ ├── stop_lightning_binary.sh.j2
│ │ │ ├── tidb-lightning.toml.j2
│ │ │ └── tidb_lightning_ctl_binary.sh.j2
│ │ └── vars/
│ │ └── tidb-lightning.yml
│ ├── tiflash/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ ├── templates/
│ │ │ ├── run_tiflash_binary.sh.j2
│ │ │ ├── tiflash.toml.j2
│ │ │ └── tiflash_learner.toml.j2
│ │ └── vars/
│ │ ├── tiflash-learner.yml
│ │ └── tiflash.yml
│ ├── tikv/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── files/
│ │ │ └── make-ssl.sh
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ ├── check_certs.yml
│ │ │ ├── check_filesystem.yml
│ │ │ ├── docker_deployment.yml
│ │ │ ├── gen_certs.yml
│ │ │ ├── install_certs.yml
│ │ │ ├── main.yml
│ │ │ ├── supervise_deployment.yml
│ │ │ └── systemd_deployment.yml
│ │ ├── templates/
│ │ │ ├── run_tikv_binary.sh.j2
│ │ │ ├── run_tikv_docker.sh.j2
│ │ │ └── tikv.toml.j2
│ │ └── vars/
│ │ └── default.yml
│ ├── tikv_importer/
│ │ ├── defaults/
│ │ │ └── main.yml
│ │ ├── meta/
│ │ │ └── main.yml
│ │ ├── tasks/
│ │ │ ├── binary_deployment.yml
│ │ │ └── main.yml
│ │ ├── templates/
│ │ │ ├── start_importer_binary.sh.j2
│ │ │ ├── stop_importer_binary.sh.j2
│ │ │ └── tikv-importer.toml.j2
│ │ └── vars/
│ │ └── tikv-importer.yml
│ └── tispark/
│ ├── tasks/
│ │ └── main.yml
│ └── templates/
│ ├── log4j.properties.j2
│ ├── spark-defaults.conf.j2
│ ├── spark-env.sh.j2
│ └── start-slave.sh.j2
├── rolling_update.yml
├── rolling_update_monitor.yml
├── scripts/
│ ├── binlog.json
│ ├── blackbox_exporter.json
│ ├── br.json
│ ├── check/
│ │ ├── check_cpufreq.py
│ │ ├── epoll_chk.cc
│ │ ├── epollexclusive-amd64
│ │ ├── epollexclusive-arm64
│ │ └── parse_fio_output.py
│ ├── clsrun.sh
│ ├── dashboard_topo.py
│ ├── disk_performance.json
│ ├── funcslower
│ ├── grafana-config-copy.py
│ ├── grafana_pdf.py
│ ├── inventory_check.py
│ ├── iosnoop
│ ├── kafka.json
│ ├── lightning.json
│ ├── loader.json
│ ├── metrics-delete.py
│ ├── montidb.sh
│ ├── node.json
│ ├── overview.json
│ ├── pd.json
│ ├── pdn.json
│ ├── performance_read.json
│ ├── performance_write.json
│ ├── reparo.json
│ ├── syncer.json
│ ├── table-regions-statistic.py
│ ├── table-regions.py
│ ├── tidb.json
│ ├── tidb_summary.json
│ ├── tiflash_proxy_details.json
│ ├── tiflash_proxy_summary.json
│ ├── tiflash_summary.json
│ ├── tikv_details.json
│ ├── tikv_raw.json
│ ├── tikv_summary.json
│ └── tikv_trouble_shooting.json
├── start.yml
├── start_drainer.yml
├── start_spark.yml
├── stop.yml
├── stop_drainer.yml
├── stop_spark.yml
├── templates/
│ └── grafana.dest.json.j2
├── unsafe_cleanup.yml
├── unsafe_cleanup_container.yml
└── unsafe_cleanup_data.yml
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
retry_files/
downloads/
resources/
fact_files/
conf/keys
scripts/dests.json
.vagrant/
*.retry
*.pyc
.vscode
.DS_Store
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
## :warning: End of project :warning:
[](https://github.com/pingcap/tidb-ansible/issues/1365)
**Ansible Playbook for TiDB is superseded by [TiUP](https://tiup.io/), a powerful tool to manage a TiDB cluster.**
And this project [has ended](https://github.com/pingcap/tidb-ansible/issues/1365). All development/maintenance activities have halted.
As it is free software, people are free and welcome to fork and develop the codebase on their own.
However, to avoid any confusion, the original repository is archived and we recommend any further fork/development to proceed with an explicit rename and rebranding first.
We encourage all interested parties to mirror any relevant bits as we can't actively guarantee their existence in the future.
# Ansible Playbook for TiDB
## Overview
Ansible is an IT automation tool. It can configure systems, deploy software, and orchestrate more advanced IT tasks such as continuous deployments or zero downtime rolling updates.
TiDB-Ansible is a TiDB cluster deployment tool developed by PingCAP, based on Ansible playbook. TiDB-Ansible enables you to quickly deploy a new TiDB cluster which includes PD, TiDB, TiKV, and the cluster monitoring modules.
You can use the TiDB-Ansible configuration file to set up the cluster topology, completing all operation tasks with one click, including:
- Initializing the system, including creating the user for deployment, setting up the hostname, etc.
- Deploying the components
- Rolling update, including module survival detection
- Cleaning data
- Cleaning the environment
- Configuring monitoring modules
## Tutorial
- [English](https://docs.pingcap.com/tidb/v3.0/online-deployment-using-ansible)
- [简体中文](https://docs.pingcap.com/zh/tidb/v3.0/online-deployment-using-ansible)
## License
TiDB-Ansible is under the Apache 2.0 license.
================================================
FILE: ansible.cfg
================================================
[defaults]
## Customize this!
inventory = inventory.ini
transport = ssh
# disable SSH key host checking
host_key_checking = False
# gathering = smart
gathering = explicit
fact_caching = jsonfile
fact_caching_connection = fact_files
retry_files_save_path = retry_files
#remote_tmp = /tmp/ansible
# for slow connections
timeout = 10
gather_subset = network,hardware
# if ssh port is not 22
#remote_port = 22
# for fun
# cow_selection = random
stdout_callback = yaml
# log information about executions at the designated location
log_path = log/ansible.log
deprecation_warnings = False
callback_whitelist = help
[ssh_connection]
## AWS key connection
# ssh_args = -i aws.key -C -o ControlMaster=auto -o ControlPersist=60s
## Jumper host connection
# ssh_args = -C -o ControlMaster=auto -o ControlPersist=60s -o ProxyCommand="ssh user@host -p 22 nc %h %p"
## Default
# ssh_args = -C -o ControlMaster=auto -o ControlPersist=60s
## Use custom ssh config file
# ssh_args = -F ssh_config
#scp_if_ssh = True
# close when using a jumper host, or have TTY errors
# Ubuntu is OK, while CentOS may cause errors
# pipelining = True
================================================
FILE: bootstrap.yml
================================================
---
# Copyright 2016 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# This play book is intend for one pass execution
- name: initializing deployment target
hosts: localhost
gather_facts: false
roles:
- check_config_static
- name: check node config
hosts: all
gather_facts: false
become: true
roles:
- pre-ansible
- bootstrap
- name: check system
hosts: all
any_errors_fatal: true
roles:
- check_system_static
- { role: check_system_optional, when: not dev_mode|default(false) }
- name: tikv_servers machine benchmark
hosts: tikv_servers
gather_facts: false
roles:
- { role: machine_benchmark, when: not dev_mode|default(false) }
- name: create ops scripts
hosts: localhost
connection: local
gather_facts: false
roles:
- ops
================================================
FILE: callback_plugins/help.py
================================================
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
DOCUMENTATION = '''
callback: help
type: notification
short_description: print help message
version_added: historical
description:
- This plugin will print help message when tasks fail.
'''
import os
import io
import logging
import yaml
from ansible.plugins.callback import CallbackBase, strip_internal_keys
from ansible.parsing.yaml.dumper import AnsibleDumper
from ansible import constants as C
FAIL_LOGFILE = os.path.dirname(C.DEFAULT_LOG_PATH) + "/fail.log"
class CallbackModule(CallbackBase):
CALLBACK_VERSION = 2.0
CALLBACK_TYPE = 'notification'
CALLBACK_NAME = 'help'
CALLBACK_NEEDS_WHITELIST = True
def __init__(self):
self._play = None
self._last_task_banner = None
self._last_task_name = None
self._task_type_cache = {}
super(CallbackModule, self).__init__()
if not os.path.exists(os.path.dirname(C.DEFAULT_LOG_PATH)):
os.makedirs(os.path.dirname(C.DEFAULT_LOG_PATH))
self.logger = logging.getLogger('fail')
self.logger.setLevel(logging.DEBUG)
self.handler = logging.FileHandler(FAIL_LOGFILE)
self.logger.addHandler(self.handler)
def _format_results(self, result, indent=None, sort_keys=True, keep_invocation=False):
# All result keys stating with _ansible_ are internal, so remove them from the result before we output anything.
abridged_result = strip_internal_keys(result._result)
# remove invocation unless specifically wanting it
if not keep_invocation and self._display.verbosity < 3 and 'invocation' in abridged_result:
del abridged_result['invocation']
# remove diff information from screen output
if self._display.verbosity < 3 and 'diff' in abridged_result:
del abridged_result['diff']
if 'access_control_allow_headers' in abridged_result:
del abridged_result['access_control_allow_headers']
if 'access_control_allow_methods' in abridged_result:
del abridged_result['access_control_allow_methods']
if 'access_control_allow_origin' in abridged_result:
del abridged_result['access_control_allow_origin']
if 'x_content_type_options' in abridged_result:
del abridged_result['x_content_type_options']
# remove exception from screen output
if 'exception' in abridged_result:
del abridged_result['exception']
dumped = ''
dumpd_tile = '[' + str(result._host.name) + ']: Ansible Failed! ==>\n '
# put changed and skipped into a header line
if 'changed' in abridged_result:
dumped += 'changed=' + str(abridged_result['changed']).lower() + ' '
del abridged_result['changed']
if 'skipped' in abridged_result:
dumped += 'skipped=' + str(abridged_result['skipped']).lower() + ' '
del abridged_result['skipped']
# if we already have stdout, we don't need stdout_lines
if 'stdout' in abridged_result and 'stdout_lines' in abridged_result:
abridged_result['stdout_lines'] = '<omitted>'
if abridged_result:
dumped += '\n'
dumped += yaml.dump(abridged_result, width=1000, Dumper=AnsibleDumper, default_flow_style=False)
# indent by a couple of spaces
dumped = '\n '.join(dumped.split('\n')).rstrip()
return dumpd_tile + dumped + '\n'
def print_help_message(self):
self._display.display("Ask TiDB User Group for help:", color=C.COLOR_WARN)
self._display.display(
"It seems that you have encountered some problem. Please describe your operation steps and provide error information as much as possible on https://asktug.com (in Chinese) or https://stackoverflow.com/questions/tagged/tidb (in English). We will do our best to help solve your problem. Thanks. :-)",
color=C.COLOR_WARN)
def v2_runner_on_failed(self, result, ignore_errors=False):
if not ignore_errors:
messages = self._format_results(result)
self.logger.error(messages)
def v2_runner_on_unreachable(self, result):
# self.print_help_message()
self.logger.error('[%s]: Ansible UNREACHABLE! => changed=%s\n playbook: %s\n %s\n stderr: %s\n',
result._host.name, result._result['changed'],
self.playbook, result._task, result._result['msg'])
def v2_playbook_on_start(self, playbook):
self.playbook = playbook._file_name
open(FAIL_LOGFILE, 'w').close()
def v2_playbook_on_stats(self, stats):
if os.path.isfile(FAIL_LOGFILE):
count = -1
with open(FAIL_LOGFILE, 'r') as f:
for count, line in enumerate(f):
pass
count += 1
if count > 0:
self._display.banner("ERROR MESSAGE SUMMARY")
with io.open(FAIL_LOGFILE, 'r', encoding="utf-8") as f:
for _, line in enumerate(f):
self._display.display(line.strip('\n'), color=C.COLOR_ERROR)
self.print_help_message()
else:
self._display.display("Congrats! All goes well. :-)", color=C.COLOR_OK)
================================================
FILE: callback_plugins/yaml.py
================================================
# (c) 2017 Ansible Project
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
# Make coding more python3-ish
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
DOCUMENTATION = '''
callback: yaml
type: stdout
short_description: yaml-ized Ansible screen output
version_added: 2.5
description:
- Ansible output that can be quite a bit easier to read than the
default JSON formatting.
extends_documentation_fragment:
- default_callback
requirements:
- set as stdout in configuration
'''
import yaml
import json
import re
import string
import sys
from ansible.plugins.callback import CallbackBase, strip_internal_keys
from ansible.plugins.callback.default import CallbackModule as Default
from ansible.parsing.yaml.dumper import AnsibleDumper
# from http://stackoverflow.com/a/15423007/115478
def should_use_block(value):
"""Returns true if string should be in block format"""
for c in u"\u000a\u000d\u001c\u001d\u001e\u0085\u2028\u2029":
if c in value:
return True
return False
def my_represent_scalar(self, tag, value, style=None):
"""Uses block style for multi-line strings"""
if style is None:
if should_use_block(value):
style = '|'
# we care more about readable than accuracy, so...
# ...no trailing space
value = value.rstrip()
# ...and non-printable characters
value = ''.join(x for x in value if x in string.printable)
# ...tabs prevent blocks from expanding
value = value.expandtabs()
# ...and odd bits of whitespace
value = re.sub(r'[\x0b\x0c\r]', '', value)
# ...as does trailing space
value = re.sub(r' +\n', '\n', value)
else:
style = self.default_style
node = yaml.representer.ScalarNode(tag, value, style=style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
return node
class CallbackModule(Default):
"""
Variation of the Default output which uses nicely readable YAML instead
of JSON for printing results.
"""
CALLBACK_VERSION = 2.0
CALLBACK_TYPE = 'stdout'
CALLBACK_NAME = 'yaml'
def __init__(self):
super(CallbackModule, self).__init__()
yaml.representer.BaseRepresenter.represent_scalar = my_represent_scalar
def _dump_results(self, result, indent=None, sort_keys=True, keep_invocation=False):
if result.get('_ansible_no_log', False):
return json.dumps(dict(censored="the output has been hidden due to the fact that 'no_log: true' was specified for this result"))
# All result keys stating with _ansible_ are internal, so remove them from the result before we output anything.
abridged_result = strip_internal_keys(result)
# remove invocation unless specifically wanting it
if not keep_invocation and self._display.verbosity < 3 and 'invocation' in result:
del abridged_result['invocation']
# remove diff information from screen output
if self._display.verbosity < 3 and 'diff' in result:
del abridged_result['diff']
# remove exception from screen output
if 'exception' in abridged_result:
del abridged_result['exception']
dumped = ''
# put changed and skipped into a header line
if 'changed' in abridged_result:
dumped += 'changed=' + str(abridged_result['changed']).lower() + ' '
del abridged_result['changed']
if 'skipped' in abridged_result:
dumped += 'skipped=' + str(abridged_result['skipped']).lower() + ' '
del abridged_result['skipped']
# if we already have stdout, we don't need stdout_lines
if 'stdout' in abridged_result and 'stdout_lines' in abridged_result:
abridged_result['stdout_lines'] = '<omitted>'
if abridged_result:
dumped += '\n'
dumped += yaml.dump(abridged_result, width=1000, Dumper=AnsibleDumper, default_flow_style=False)
# indent by a couple of spaces
dumped = '\n '.join(dumped.split('\n')).rstrip()
return dumped
def v2_runner_on_skipped(self, result):
pass
def v2_runner_item_on_skipped(self, result):
pass
================================================
FILE: clean_log_cron.yml
================================================
---
- hosts: pd_servers
tags:
- pd
roles:
- clean_log_pd
- hosts: tikv_servers
tags:
- tikv
roles:
- clean_log_tikv
- hosts: tidb_servers
tags:
- tidb
roles:
- clean_log_tidb
================================================
FILE: cloud/aws-ansible/aws_bootstrap.yml
================================================
---
# This play book is intend for one pass execution
- name: "Group nodes by OS distribution"
hosts: all
gather_facts: true
tasks:
- name: group hosts by distribution
group_by: key="{{ ansible_distribution }}-{{ ansible_distribution_version }}"
changed_when: false
- name: authorized access
hosts: all
gather_facts: true
tags:
- ssh
tasks:
- name: generate rsa key if not exists
shell: |
yes n | ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa || echo ok
- name: Slup
slurp:
src: ".ssh/id_rsa.pub"
register: rsa_pub_result
- set_fact:
rsa_pub_key: "{{ rsa_pub_result.content | b64decode | trim }}"
- name: add to authrized_keys
authorized_key:
user: "{{ ansible_user }}"
key: "{{ hostvars[item].rsa_pub_key }}"
with_items: "{{ groups.all }}"
# ssh-keygen -R {{ item }};
- name: add to know_hosts
shell: |
ssh-keygen -R {{ hostvars[item].ansible_hostname }};
ssh-keygen -R {{ hostvars[item].ansible_default_ipv4.address }};
ssh-keygen -R {{ hostvars[item].ansible_hostname }},{{ hostvars[item].ansible_default_ipv4.address }};
(ssh-keyscan -H {{ hostvars[item].ansible_hostname }};
ssh-keyscan -H {{ hostvars[item].ansible_default_ipv4.address }};
ssh-keyscan -H {{ hostvars[item].ansible_hostname }},{{ hostvars[item].ansible_default_ipv4.address }}) | uniq >> ~/.ssh/known_hosts
with_items: "{{ groups.all }}"
- name: do AWS host preparation
hosts: Ubuntu-14.04
gather_facts: false
tasks:
- name: disable apt key check
lineinfile: >
dest=/etc/apt/apt.conf.d/99skipkeycheck line="APT::Get::AllowUnauthenticated "true";"
create=yes
become: true
- name: change apt mirror.list
copy: src=sources.list dest=/etc/apt/sources.list mode=0644
become: true
- name: add apt-fast to apt sources.list.d
lineinfile: >
dest=/etc/apt/sources.list.d/saiarcot895-myppa-trusty.list
line="deb http://ppa.launchpad.net/saiarcot895/myppa/ubuntu trusty main"
create=yes
become: true
- name: install apt-fast
apt: name={{ item }} update_cache=yes
become: true
with_items:
- apt-fast
- name: add docker to apt sources.list.d
lineinfile: >
dest=/etc/apt/sources.list.d/docker.list
line="deb https://mirrors.tuna.tsinghua.edu.cn/docker/apt/repo ubuntu-trusty main"
create=yes
become: true
- name: update apt cache
shell: apt-fast -y update
# --skip-tags docker
- name: install docker
tags:
- docker
shell: >-
creates=/usr/bin/docker
apt-fast -y install docker-engine
become: true
- name: add user to docker group
tags:
- docker
user: name=ubuntu groups=docker append=yes
become: true
- name: install perf/systemtab/unzip/ntp/zip
shell: >-
apt-fast -y install linux-tools-$(uname -r) systemtap unzip ntp zip iotop htop sysstat
- name: add user to docker group
user: name=ubuntu groups=stapusr append=yes
become: true
- name: add user to docker group
user: name=ubuntu groups=stapdev append=yes
become: true
================================================
FILE: cloud/aws-ansible/aws_inventory_file_generate.yml
================================================
---
# Copyright 2016 PingCAP, Inc.
# The Playbook of TiDB
# Generates
- name: prepare inventory config
hosts: localhost
gather_facts: false
tasks:
- fail: msg="inventory is not empty!"
when: "{{ groups.all }}"
- include_vars:
file: "{{ playbook_dir }}/vars.yml"
- name: Gather EC2 facts.
ec2_remote_facts:
region: cn-north-1
filters:
instance-state-name: running
"tag:ManagedBy": "{{ managed_by }}"
"tag:Creator": "{{ creator }}"
register: aws_ec2_facts
- name: set up deploy servers
add_host:
groups: "{{ item.tags.Type | default('unused') }}_servers"
hostname: "{{ item.public_ip_address }}"
when: item.tags.Type is defined and item.tags.ManagedBy == managed_by
with_items: "{{ aws_ec2_facts.instances | selectattr('state', 'equalto', 'running') | list }}"
- name: set up monitoring server
add_host:
groups: monitoring_servers
hostname: "{{ groups.tidb_servers[0] }}"
when:
- not (groups.monitoring_servers is defined and groups.monitoring_servers)
- groups.tidb_servers is defined and groups.tidb_servers
- name: set up monitored servers
add_host:
groups: monitored_servers
hostname: "{{ item.public_ip_address }}"
when: item.tags.ManagedBy is defined and item.tags.ManagedBy == managed_by
with_items: "{{ aws_ec2_facts.instances | selectattr('state', 'equalto', 'running') | list }}"
- name: write local inventory file to aws.ini.new
template: src=aws.inventory.ini.j2 dest={{ playbook_dir }}/aws.ini.new
- name: finnal message
debug: msg="now copy aws.ini.new to your tidb-ansible project and enjoy deployment!"
================================================
FILE: cloud/aws-ansible/aws_prepare.yml
================================================
---
- name: do AWS preparation
hosts: localhost
gather_facts: false
pre_tasks:
- include_vars:
file: "{{ playbook_dir }}/vars.yml"
roles:
- aws
post_tasks:
- name: display hosts
debug: msg="run `ansible-playbook aws_inventory_file_generate.yml` to get your aws.ini!"
================================================
FILE: cloud/aws-ansible/aws_teardown.yml
================================================
---
- name: test
hosts: localhost
connection: local
gather_facts: false
pre_tasks:
- include_vars:
file: "{{ playbook_dir }}/vars.yml"
tasks:
- name: host facts
ec2_remote_facts:
filters:
"tag:ManagedBy": "{{ managed_by }}"
region: cn-north-1
register: ec2_instances
- name: add hosts
add_host:
name: "{{ item.public_ip_address }}"
groups: sre-to-be-teardown
with_items: "{{ ec2_instances.instances }}"
#when: ec2_instances.skipped | defined and not ec2_instances.skipped
- debug: var=groups['sre-to-be-teardown']
- pause: prompt="Are you sure to tear these down(C to continue, A to Abort)?"
- name: EC2 Instances
hosts: sre-to-be-teardown
gather_facts: false
tasks:
- name: gather facts
ec2_facts:
- name: Terminate instances that were previously launched
delegate_to: localhost
ec2:
state: 'absent'
instance_ids: '{{ ansible_ec2_instance_id }}'
region: cn-north-1
wait: yes
wait_timeout: 500
- name: AWS
hosts: localhost
connection: local
become: false
gather_facts: false
tasks:
- name: terminate security group
ec2_group:
name: "ansible-sg-by-{{ managed_by }}"
description: vpc security group by {{ creator }}
region: cn-north-1
state: absent
- debug: msg="we do not terminate vpc :)"
================================================
FILE: cloud/aws-ansible/ec2.ini
================================================
# Ansible EC2 external inventory script settings
#
[ec2]
# to talk to a private eucalyptus instance uncomment these lines
# and edit edit eucalyptus_host to be the host name of your cloud controller
#eucalyptus = True
#eucalyptus_host = clc.cloud.domain.org
# AWS regions to make calls to. Set this to 'all' to make request to all regions
# in AWS and merge the results together. Alternatively, set this to a comma
# separated list of regions. E.g. 'us-east-1,us-west-1,us-west-2'
regions = cn-north-1
regions_exclude = us-gov-west-1
#,cn-north-1
# When generating inventory, Ansible needs to know how to address a server.
# Each EC2 instance has a lot of variables associated with it. Here is the list:
# http://docs.pythonboto.org/en/latest/ref/ec2.html#module-boto.ec2.instance
# Below are 2 variables that are used as the address of a server:
# - destination_variable
# - vpc_destination_variable
# This is the normal destination variable to use. If you are running Ansible
# from outside EC2, then 'public_dns_name' makes the most sense. If you are
# running Ansible from within EC2, then perhaps you want to use the internal
# address, and should set this to 'private_dns_name'. The key of an EC2 tag
# may optionally be used; however the boto instance variables hold precedence
# in the event of a collision.
destination_variable = public_dns_name
# This allows you to override the inventory_name with an ec2 variable, instead
# of using the destination_variable above. Addressing (aka ansible_ssh_host)
# will still use destination_variable. Tags should be written as 'tag_TAGNAME'.
#hostname_variable = tag_Name
# For server inside a VPC, using DNS names may not make sense. When an instance
# has 'subnet_id' set, this variable is used. If the subnet is public, setting
# this to 'ip_address' will return the public IP address. For instances in a
# private subnet, this should be set to 'private_ip_address', and Ansible must
# be run from within EC2. The key of an EC2 tag may optionally be used; however
# the boto instance variables hold precedence in the event of a collision.
# WARNING: - instances that are in the private vpc, _without_ public ip address
# will not be listed in the inventory until You set:
# vpc_destination_variable = private_ip_address
vpc_destination_variable = ip_address
# The following two settings allow flexible ansible host naming based on a
# python format string and a comma-separated list of ec2 tags. Note that:
#
# 1) If the tags referenced are not present for some instances, empty strings
# will be substituted in the format string.
# 2) This overrides both destination_variable and vpc_destination_variable.
#
#destination_format = {0}.{1}.example.com
#destination_format_tags = Name,environment
# To tag instances on EC2 with the resource records that point to them from
# Route53, uncomment and set 'route53' to True.
route53 = False
# To exclude RDS instances from the inventory, uncomment and set to False.
#rds = False
# To exclude ElastiCache instances from the inventory, uncomment and set to False.
#elasticache = False
# Additionally, you can specify the list of zones to exclude looking up in
# 'route53_excluded_zones' as a comma-separated list.
# route53_excluded_zones = samplezone1.com, samplezone2.com
# By default, only EC2 instances in the 'running' state are returned. Set
# 'all_instances' to True to return all instances regardless of state.
all_instances = False
# By default, only EC2 instances in the 'running' state are returned. Specify
# EC2 instance states to return as a comma-separated list. This
# option is overriden when 'all_instances' is True.
# instance_states = pending, running, shutting-down, terminated, stopping, stopped
# By default, only RDS instances in the 'available' state are returned. Set
# 'all_rds_instances' to True return all RDS instances regardless of state.
all_rds_instances = False
# Include RDS cluster information (Aurora etc.)
include_rds_clusters = False
# By default, only ElastiCache clusters and nodes in the 'available' state
# are returned. Set 'all_elasticache_clusters' and/or 'all_elastic_nodes'
# to True return all ElastiCache clusters and nodes, regardless of state.
#
# Note that all_elasticache_nodes only applies to listed clusters. That means
# if you set all_elastic_clusters to false, no node will be return from
# unavailable clusters, regardless of the state and to what you set for
# all_elasticache_nodes.
all_elasticache_replication_groups = False
all_elasticache_clusters = False
all_elasticache_nodes = False
# API calls to EC2 are slow. For this reason, we cache the results of an API
# call. Set this to the path you want cache files to be written to. Two files
# will be written to this directory:
# - ansible-ec2.cache
# - ansible-ec2.index
cache_path = ~/.ansible/tmp
# The number of seconds a cache file is considered valid. After this many
# seconds, a new API call will be made, and the cache file will be updated.
# To disable the cache, set this value to 0
cache_max_age = 300
# Organize groups into a nested/hierarchy instead of a flat namespace.
nested_groups = False
# Replace - tags when creating groups to avoid issues with ansible
replace_dash_in_groups = True
# If set to true, any tag of the form "a,b,c" is expanded into a list
# and the results are used to create additional tag_* inventory groups.
expand_csv_tags = False
# The EC2 inventory output can become very large. To manage its size,
# configure which groups should be created.
group_by_instance_id = True
group_by_region = True
group_by_availability_zone = True
group_by_ami_id = True
group_by_instance_type = True
group_by_key_pair = True
group_by_vpc_id = True
group_by_security_group = True
group_by_tag_keys = True
group_by_tag_none = True
group_by_route53_names = True
group_by_rds_engine = True
group_by_rds_parameter_group = True
group_by_elasticache_engine = True
group_by_elasticache_cluster = True
group_by_elasticache_parameter_group = True
group_by_elasticache_replication_group = True
# If you only want to include hosts that match a certain regular expression
# pattern_include = staging-*
# If you want to exclude any hosts that match a certain regular expression
# pattern_exclude = staging-*
# Instance filters can be used to control which instances are retrieved for
# inventory. For the full list of possible filters, please read the EC2 API
# docs: http://docs.aws.amazon.com/AWSEC2/latest/APIReference/ApiReference-query-DescribeInstances.html#query-DescribeInstances-filters
# Filters are key/value pairs separated by '=', to list multiple filters use
# a list separated by commas. See examples below.
# Retrieve only instances with (key=value) env=staging tag
# instance_filters = tag:env=staging
# Retrieve only instances with role=webservers OR role=dbservers tag
# instance_filters = tag:role=webservers,tag:role=dbservers
# Retrieve only t1.micro instances OR instances with tag env=staging
# instance_filters = instance-type=t1.micro,tag:env=staging
# You can use wildcards in filter values also. Below will list instances which
# tag Name value matches webservers1*
# (ex. webservers15, webservers1a, webservers123 etc)
# instance_filters = tag:Name=webservers1*
# A boto configuration profile may be used to separate out credentials
# see http://boto.readthedocs.org/en/latest/boto_config_tut.html
# boto_profile = some-boto-profile-name
[credentials]
# The AWS credentials can optionally be specified here. Credentials specified
# here are ignored if the environment variable AWS_ACCESS_KEY_ID or
# AWS_PROFILE is set, or if the boto_profile property above is set.
#
# Supplying AWS credentials here is not recommended, as it introduces
# non-trivial security concerns. When going down this route, please make sure
# to set access permissions for this file correctly, e.g. handle it the same
# way as you would a private SSH key.
#
# Unlike the boto and AWS configure files, this section does not support
# profiles.
#
# aws_access_key_id = AXXXXXXXXXXXXXX
# aws_secret_access_key = XXXXXXXXXXXXXXXXXXX
# aws_security_token = XXXXXXXXXXXXXXXXXXXXXXXXXXXX
================================================
FILE: cloud/aws-ansible/ec2.py
================================================
#!/usr/bin/env python
'''
EC2 external inventory script
=================================
Generates inventory that Ansible can understand by making API request to
AWS EC2 using the Boto library.
NOTE: This script assumes Ansible is being executed where the environment
variables needed for Boto have already been set:
export AWS_ACCESS_KEY_ID='AK123'
export AWS_SECRET_ACCESS_KEY='abc123'
This script also assumes there is an ec2.ini file alongside it. To specify a
different path to ec2.ini, define the EC2_INI_PATH environment variable:
export EC2_INI_PATH=/path/to/my_ec2.ini
If you're using eucalyptus you need to set the above variables and
you need to define:
export EC2_URL=http://hostname_of_your_cc:port/services/Eucalyptus
If you're using boto profiles (requires boto>=2.24.0) you can choose a profile
using the --boto-profile command line argument (e.g. ec2.py --boto-profile prod) or using
the AWS_PROFILE variable:
AWS_PROFILE=prod ansible-playbook -i ec2.py myplaybook.yml
For more details, see: http://docs.pythonboto.org/en/latest/boto_config_tut.html
When run against a specific host, this script returns the following variables:
- ec2_ami_launch_index
- ec2_architecture
- ec2_association
- ec2_attachTime
- ec2_attachment
- ec2_attachmentId
- ec2_block_devices
- ec2_client_token
- ec2_deleteOnTermination
- ec2_description
- ec2_deviceIndex
- ec2_dns_name
- ec2_eventsSet
- ec2_group_name
- ec2_hypervisor
- ec2_id
- ec2_image_id
- ec2_instanceState
- ec2_instance_type
- ec2_ipOwnerId
- ec2_ip_address
- ec2_item
- ec2_kernel
- ec2_key_name
- ec2_launch_time
- ec2_monitored
- ec2_monitoring
- ec2_networkInterfaceId
- ec2_ownerId
- ec2_persistent
- ec2_placement
- ec2_platform
- ec2_previous_state
- ec2_private_dns_name
- ec2_private_ip_address
- ec2_publicIp
- ec2_public_dns_name
- ec2_ramdisk
- ec2_reason
- ec2_region
- ec2_requester_id
- ec2_root_device_name
- ec2_root_device_type
- ec2_security_group_ids
- ec2_security_group_names
- ec2_shutdown_state
- ec2_sourceDestCheck
- ec2_spot_instance_request_id
- ec2_state
- ec2_state_code
- ec2_state_reason
- ec2_status
- ec2_subnet_id
- ec2_tenancy
- ec2_virtualization_type
- ec2_vpc_id
These variables are pulled out of a boto.ec2.instance object. There is a lack of
consistency with variable spellings (camelCase and underscores) since this
just loops through all variables the object exposes. It is preferred to use the
ones with underscores when multiple exist.
In addition, if an instance has AWS Tags associated with it, each tag is a new
variable named:
- ec2_tag_[Key] = [Value]
Security groups are comma-separated in 'ec2_security_group_ids' and
'ec2_security_group_names'.
'''
# (c) 2012, Peter Sankauskas
#
# This file is part of Ansible,
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
######################################################################
import sys
import os
import argparse
import re
from time import time
import boto
from boto import ec2
from boto import rds
from boto import elasticache
from boto import route53
import six
from ansible.module_utils import ec2 as ec2_utils
HAS_BOTO3 = False
try:
import boto3
HAS_BOTO3 = True
except ImportError:
pass
from six.moves import configparser
from collections import defaultdict
try:
import json
except ImportError:
import simplejson as json
class Ec2Inventory(object):
def _empty_inventory(self):
return {"_meta" : {"hostvars" : {}}}
def __init__(self):
''' Main execution path '''
# Inventory grouped by instance IDs, tags, security groups, regions,
# and availability zones
self.inventory = self._empty_inventory()
# Index of hostname (address) to instance ID
self.index = {}
# Boto profile to use (if any)
self.boto_profile = None
# AWS credentials.
self.credentials = {}
# Read settings and parse CLI arguments
self.parse_cli_args()
self.read_settings()
# Make sure that profile_name is not passed at all if not set
# as pre 2.24 boto will fall over otherwise
if self.boto_profile:
if not hasattr(boto.ec2.EC2Connection, 'profile_name'):
self.fail_with_error("boto version must be >= 2.24 to use profile")
# Cache
if self.args.refresh_cache:
self.do_api_calls_update_cache()
elif not self.is_cache_valid():
self.do_api_calls_update_cache()
# Data to print
if self.args.host:
data_to_print = self.get_host_info()
elif self.args.list:
# Display list of instances for inventory
if self.inventory == self._empty_inventory():
data_to_print = self.get_inventory_from_cache()
else:
data_to_print = self.json_format_dict(self.inventory, True)
print(data_to_print)
def is_cache_valid(self):
''' Determines if the cache files have expired, or if it is still valid '''
if os.path.isfile(self.cache_path_cache):
mod_time = os.path.getmtime(self.cache_path_cache)
current_time = time()
if (mod_time + self.cache_max_age) > current_time:
if os.path.isfile(self.cache_path_index):
return True
return False
def read_settings(self):
''' Reads the settings from the ec2.ini file '''
if six.PY3:
config = configparser.ConfigParser()
else:
config = configparser.SafeConfigParser()
ec2_default_ini_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'ec2.ini')
ec2_ini_path = os.path.expanduser(os.path.expandvars(os.environ.get('EC2_INI_PATH', ec2_default_ini_path)))
config.read(ec2_ini_path)
# is eucalyptus?
self.eucalyptus_host = None
self.eucalyptus = False
if config.has_option('ec2', 'eucalyptus'):
self.eucalyptus = config.getboolean('ec2', 'eucalyptus')
if self.eucalyptus and config.has_option('ec2', 'eucalyptus_host'):
self.eucalyptus_host = config.get('ec2', 'eucalyptus_host')
# Regions
self.regions = []
configRegions = config.get('ec2', 'regions')
configRegions_exclude = config.get('ec2', 'regions_exclude')
if (configRegions == 'all'):
if self.eucalyptus_host:
self.regions.append(boto.connect_euca(host=self.eucalyptus_host).region.name, **self.credentials)
else:
for regionInfo in ec2.regions():
if regionInfo.name not in configRegions_exclude:
self.regions.append(regionInfo.name)
else:
self.regions = configRegions.split(",")
# Destination addresses
self.destination_variable = config.get('ec2', 'destination_variable')
self.vpc_destination_variable = config.get('ec2', 'vpc_destination_variable')
if config.has_option('ec2', 'hostname_variable'):
self.hostname_variable = config.get('ec2', 'hostname_variable')
else:
self.hostname_variable = None
if config.has_option('ec2', 'destination_format') and \
config.has_option('ec2', 'destination_format_tags'):
self.destination_format = config.get('ec2', 'destination_format')
self.destination_format_tags = config.get('ec2', 'destination_format_tags').split(',')
else:
self.destination_format = None
self.destination_format_tags = None
# Route53
self.route53_enabled = config.getboolean('ec2', 'route53')
self.route53_excluded_zones = []
if config.has_option('ec2', 'route53_excluded_zones'):
self.route53_excluded_zones.extend(
config.get('ec2', 'route53_excluded_zones', '').split(','))
# Include RDS instances?
self.rds_enabled = True
if config.has_option('ec2', 'rds'):
self.rds_enabled = config.getboolean('ec2', 'rds')
# Include RDS cluster instances?
if config.has_option('ec2', 'include_rds_clusters'):
self.include_rds_clusters = config.getboolean('ec2', 'include_rds_clusters')
else:
self.include_rds_clusters = False
# Include ElastiCache instances?
self.elasticache_enabled = True
if config.has_option('ec2', 'elasticache'):
self.elasticache_enabled = config.getboolean('ec2', 'elasticache')
# Return all EC2 instances?
if config.has_option('ec2', 'all_instances'):
self.all_instances = config.getboolean('ec2', 'all_instances')
else:
self.all_instances = False
# Instance states to be gathered in inventory. Default is 'running'.
# Setting 'all_instances' to 'yes' overrides this option.
ec2_valid_instance_states = [
'pending',
'running',
'shutting-down',
'terminated',
'stopping',
'stopped'
]
self.ec2_instance_states = []
if self.all_instances:
self.ec2_instance_states = ec2_valid_instance_states
elif config.has_option('ec2', 'instance_states'):
for instance_state in config.get('ec2', 'instance_states').split(','):
instance_state = instance_state.strip()
if instance_state not in ec2_valid_instance_states:
continue
self.ec2_instance_states.append(instance_state)
else:
self.ec2_instance_states = ['running']
# Return all RDS instances? (if RDS is enabled)
if config.has_option('ec2', 'all_rds_instances') and self.rds_enabled:
self.all_rds_instances = config.getboolean('ec2', 'all_rds_instances')
else:
self.all_rds_instances = False
# Return all ElastiCache replication groups? (if ElastiCache is enabled)
if config.has_option('ec2', 'all_elasticache_replication_groups') and self.elasticache_enabled:
self.all_elasticache_replication_groups = config.getboolean('ec2', 'all_elasticache_replication_groups')
else:
self.all_elasticache_replication_groups = False
# Return all ElastiCache clusters? (if ElastiCache is enabled)
if config.has_option('ec2', 'all_elasticache_clusters') and self.elasticache_enabled:
self.all_elasticache_clusters = config.getboolean('ec2', 'all_elasticache_clusters')
else:
self.all_elasticache_clusters = False
# Return all ElastiCache nodes? (if ElastiCache is enabled)
if config.has_option('ec2', 'all_elasticache_nodes') and self.elasticache_enabled:
self.all_elasticache_nodes = config.getboolean('ec2', 'all_elasticache_nodes')
else:
self.all_elasticache_nodes = False
# boto configuration profile (prefer CLI argument)
self.boto_profile = self.args.boto_profile
if config.has_option('ec2', 'boto_profile') and not self.boto_profile:
self.boto_profile = config.get('ec2', 'boto_profile')
# AWS credentials (prefer environment variables)
if not (self.boto_profile or os.environ.get('AWS_ACCESS_KEY_ID') or
os.environ.get('AWS_PROFILE')):
if config.has_option('credentials', 'aws_access_key_id'):
aws_access_key_id = config.get('credentials', 'aws_access_key_id')
else:
aws_access_key_id = None
if config.has_option('credentials', 'aws_secret_access_key'):
aws_secret_access_key = config.get('credentials', 'aws_secret_access_key')
else:
aws_secret_access_key = None
if config.has_option('credentials', 'aws_security_token'):
aws_security_token = config.get('credentials', 'aws_security_token')
else:
aws_security_token = None
if aws_access_key_id:
self.credentials = {
'aws_access_key_id': aws_access_key_id,
'aws_secret_access_key': aws_secret_access_key
}
if aws_security_token:
self.credentials['security_token'] = aws_security_token
# Cache related
cache_dir = os.path.expanduser(config.get('ec2', 'cache_path'))
if self.boto_profile:
cache_dir = os.path.join(cache_dir, 'profile_' + self.boto_profile)
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
cache_name = 'ansible-ec2'
aws_profile = lambda: (self.boto_profile or
os.environ.get('AWS_PROFILE') or
os.environ.get('AWS_ACCESS_KEY_ID') or
self.credentials.get('aws_access_key_id', None))
if aws_profile():
cache_name = '%s-%s' % (cache_name, aws_profile())
self.cache_path_cache = cache_dir + "/%s.cache" % cache_name
self.cache_path_index = cache_dir + "/%s.index" % cache_name
self.cache_max_age = config.getint('ec2', 'cache_max_age')
if config.has_option('ec2', 'expand_csv_tags'):
self.expand_csv_tags = config.getboolean('ec2', 'expand_csv_tags')
else:
self.expand_csv_tags = False
# Configure nested groups instead of flat namespace.
if config.has_option('ec2', 'nested_groups'):
self.nested_groups = config.getboolean('ec2', 'nested_groups')
else:
self.nested_groups = False
# Replace dash or not in group names
if config.has_option('ec2', 'replace_dash_in_groups'):
self.replace_dash_in_groups = config.getboolean('ec2', 'replace_dash_in_groups')
else:
self.replace_dash_in_groups = True
# Configure which groups should be created.
group_by_options = [
'group_by_instance_id',
'group_by_region',
'group_by_availability_zone',
'group_by_ami_id',
'group_by_instance_type',
'group_by_key_pair',
'group_by_vpc_id',
'group_by_security_group',
'group_by_tag_keys',
'group_by_tag_none',
'group_by_route53_names',
'group_by_rds_engine',
'group_by_rds_parameter_group',
'group_by_elasticache_engine',
'group_by_elasticache_cluster',
'group_by_elasticache_parameter_group',
'group_by_elasticache_replication_group',
]
for option in group_by_options:
if config.has_option('ec2', option):
setattr(self, option, config.getboolean('ec2', option))
else:
setattr(self, option, True)
# Do we need to just include hosts that match a pattern?
try:
pattern_include = config.get('ec2', 'pattern_include')
if pattern_include and len(pattern_include) > 0:
self.pattern_include = re.compile(pattern_include)
else:
self.pattern_include = None
except configparser.NoOptionError:
self.pattern_include = None
# Do we need to exclude hosts that match a pattern?
try:
pattern_exclude = config.get('ec2', 'pattern_exclude');
if pattern_exclude and len(pattern_exclude) > 0:
self.pattern_exclude = re.compile(pattern_exclude)
else:
self.pattern_exclude = None
except configparser.NoOptionError:
self.pattern_exclude = None
# Instance filters (see boto and EC2 API docs). Ignore invalid filters.
self.ec2_instance_filters = defaultdict(list)
if config.has_option('ec2', 'instance_filters'):
filters = [f for f in config.get('ec2', 'instance_filters').split(',') if f]
for instance_filter in filters:
instance_filter = instance_filter.strip()
if not instance_filter or '=' not in instance_filter:
continue
filter_key, filter_value = [x.strip() for x in instance_filter.split('=', 1)]
if not filter_key:
continue
self.ec2_instance_filters[filter_key].append(filter_value)
def parse_cli_args(self):
''' Command line argument processing '''
parser = argparse.ArgumentParser(description='Produce an Ansible Inventory file based on EC2')
parser.add_argument('--list', action='store_true', default=True,
help='List instances (default: True)')
parser.add_argument('--host', action='store',
help='Get all the variables about a specific instance')
parser.add_argument('--refresh-cache', action='store_true', default=False,
help='Force refresh of cache by making API requests to EC2 (default: False - use cache files)')
parser.add_argument('--profile', '--boto-profile', action='store', dest='boto_profile',
help='Use boto profile for connections to EC2')
self.args = parser.parse_args()
def do_api_calls_update_cache(self):
''' Do API calls to each region, and save data in cache files '''
if self.route53_enabled:
self.get_route53_records()
for region in self.regions:
self.get_instances_by_region(region)
if self.rds_enabled:
self.get_rds_instances_by_region(region)
if self.elasticache_enabled:
self.get_elasticache_clusters_by_region(region)
self.get_elasticache_replication_groups_by_region(region)
if self.include_rds_clusters:
self.include_rds_clusters_by_region(region)
self.write_to_cache(self.inventory, self.cache_path_cache)
self.write_to_cache(self.index, self.cache_path_index)
def connect(self, region):
''' create connection to api server'''
if self.eucalyptus:
conn = boto.connect_euca(host=self.eucalyptus_host, **self.credentials)
conn.APIVersion = '2010-08-31'
else:
conn = self.connect_to_aws(ec2, region)
return conn
def boto_fix_security_token_in_profile(self, connect_args):
''' monkey patch for boto issue boto/boto#2100 '''
profile = 'profile ' + self.boto_profile
if boto.config.has_option(profile, 'aws_security_token'):
connect_args['security_token'] = boto.config.get(profile, 'aws_security_token')
return connect_args
def connect_to_aws(self, module, region):
connect_args = self.credentials
# only pass the profile name if it's set (as it is not supported by older boto versions)
if self.boto_profile:
connect_args['profile_name'] = self.boto_profile
self.boto_fix_security_token_in_profile(connect_args)
conn = module.connect_to_region(region, **connect_args)
# connect_to_region will fail "silently" by returning None if the region name is wrong or not supported
if conn is None:
self.fail_with_error("region name: %s likely not supported, or AWS is down. connection to region failed." % region)
return conn
def get_instances_by_region(self, region):
''' Makes an AWS EC2 API call to the list of instances in a particular
region '''
try:
conn = self.connect(region)
reservations = []
if self.ec2_instance_filters:
for filter_key, filter_values in self.ec2_instance_filters.items():
reservations.extend(conn.get_all_instances(filters = { filter_key : filter_values }))
else:
reservations = conn.get_all_instances()
# Pull the tags back in a second step
# AWS are on record as saying that the tags fetched in the first `get_all_instances` request are not
# reliable and may be missing, and the only way to guarantee they are there is by calling `get_all_tags`
instance_ids = []
for reservation in reservations:
instance_ids.extend([instance.id for instance in reservation.instances])
max_filter_value = 199
tags = []
for i in range(0, len(instance_ids), max_filter_value):
tags.extend(conn.get_all_tags(filters={'resource-type': 'instance', 'resource-id': instance_ids[i:i+max_filter_value]}))
tags_by_instance_id = defaultdict(dict)
for tag in tags:
tags_by_instance_id[tag.res_id][tag.name] = tag.value
for reservation in reservations:
for instance in reservation.instances:
instance.tags = tags_by_instance_id[instance.id]
self.add_instance(instance, region)
except boto.exception.BotoServerError as e:
if e.error_code == 'AuthFailure':
error = self.get_auth_error_message()
else:
backend = 'Eucalyptus' if self.eucalyptus else 'AWS'
error = "Error connecting to %s backend.\n%s" % (backend, e.message)
self.fail_with_error(error, 'getting EC2 instances')
def get_rds_instances_by_region(self, region):
''' Makes an AWS API call to the list of RDS instances in a particular
region '''
try:
conn = self.connect_to_aws(rds, region)
if conn:
marker = None
while True:
instances = conn.get_all_dbinstances(marker=marker)
marker = instances.marker
for instance in instances:
self.add_rds_instance(instance, region)
if not marker:
break
except boto.exception.BotoServerError as e:
error = e.reason
if e.error_code == 'AuthFailure':
error = self.get_auth_error_message()
if not e.reason == "Forbidden":
error = "Looks like AWS RDS is down:\n%s" % e.message
self.fail_with_error(error, 'getting RDS instances')
def include_rds_clusters_by_region(self, region):
if not HAS_BOTO3:
self.fail_with_error("Working with RDS clusters requires boto3 - please install boto3 and try again",
"getting RDS clusters")
client = ec2_utils.boto3_inventory_conn('client', 'rds', region, **self.credentials)
marker, clusters = '', []
while marker is not None:
resp = client.describe_db_clusters(Marker=marker)
clusters.extend(resp["DBClusters"])
marker = resp.get('Marker', None)
account_id = boto.connect_iam().get_user().arn.split(':')[4]
c_dict = {}
for c in clusters:
# remove these datetime objects as there is no serialisation to json
# currently in place and we don't need the data yet
if 'EarliestRestorableTime' in c:
del c['EarliestRestorableTime']
if 'LatestRestorableTime' in c:
del c['LatestRestorableTime']
if self.ec2_instance_filters == {}:
matches_filter = True
else:
matches_filter = False
try:
# arn:aws:rds:<region>:<account number>:<resourcetype>:<name>
tags = client.list_tags_for_resource(
ResourceName='arn:aws:rds:' + region + ':' + account_id + ':cluster:' + c['DBClusterIdentifier'])
c['Tags'] = tags['TagList']
if self.ec2_instance_filters:
for filter_key, filter_values in self.ec2_instance_filters.items():
# get AWS tag key e.g. tag:env will be 'env'
tag_name = filter_key.split(":", 1)[1]
# Filter values is a list (if you put multiple values for the same tag name)
matches_filter = any(d['Key'] == tag_name and d['Value'] in filter_values for d in c['Tags'])
if matches_filter:
# it matches a filter, so stop looking for further matches
break
except Exception as e:
if e.message.find('DBInstanceNotFound') >= 0:
# AWS RDS bug (2016-01-06) means deletion does not fully complete and leave an 'empty' cluster.
# Ignore errors when trying to find tags for these
pass
# ignore empty clusters caused by AWS bug
if len(c['DBClusterMembers']) == 0:
continue
elif matches_filter:
c_dict[c['DBClusterIdentifier']] = c
self.inventory['db_clusters'] = c_dict
def get_elasticache_clusters_by_region(self, region):
''' Makes an AWS API call to the list of ElastiCache clusters (with
nodes' info) in a particular region.'''
# ElastiCache boto module doesn't provide a get_all_intances method,
# that's why we need to call describe directly (it would be called by
# the shorthand method anyway...)
try:
conn = self.connect_to_aws(elasticache, region)
if conn:
# show_cache_node_info = True
# because we also want nodes' information
response = conn.describe_cache_clusters(None, None, None, True)
except boto.exception.BotoServerError as e:
error = e.reason
if e.error_code == 'AuthFailure':
error = self.get_auth_error_message()
if not e.reason == "Forbidden":
error = "Looks like AWS ElastiCache is down:\n%s" % e.message
self.fail_with_error(error, 'getting ElastiCache clusters')
try:
# Boto also doesn't provide wrapper classes to CacheClusters or
# CacheNodes. Because of that wo can't make use of the get_list
# method in the AWSQueryConnection. Let's do the work manually
clusters = response['DescribeCacheClustersResponse']['DescribeCacheClustersResult']['CacheClusters']
except KeyError as e:
error = "ElastiCache query to AWS failed (unexpected format)."
self.fail_with_error(error, 'getting ElastiCache clusters')
for cluster in clusters:
self.add_elasticache_cluster(cluster, region)
def get_elasticache_replication_groups_by_region(self, region):
''' Makes an AWS API call to the list of ElastiCache replication groups
in a particular region.'''
# ElastiCache boto module doesn't provide a get_all_intances method,
# that's why we need to call describe directly (it would be called by
# the shorthand method anyway...)
try:
conn = self.connect_to_aws(elasticache, region)
if conn:
response = conn.describe_replication_groups()
except boto.exception.BotoServerError as e:
error = e.reason
if e.error_code == 'AuthFailure':
error = self.get_auth_error_message()
if not e.reason == "Forbidden":
error = "Looks like AWS ElastiCache [Replication Groups] is down:\n%s" % e.message
self.fail_with_error(error, 'getting ElastiCache clusters')
try:
# Boto also doesn't provide wrapper classes to ReplicationGroups
# Because of that wo can't make use of the get_list method in the
# AWSQueryConnection. Let's do the work manually
replication_groups = response['DescribeReplicationGroupsResponse']['DescribeReplicationGroupsResult']['ReplicationGroups']
except KeyError as e:
error = "ElastiCache [Replication Groups] query to AWS failed (unexpected format)."
self.fail_with_error(error, 'getting ElastiCache clusters')
for replication_group in replication_groups:
self.add_elasticache_replication_group(replication_group, region)
def get_auth_error_message(self):
''' create an informative error message if there is an issue authenticating'''
errors = ["Authentication error retrieving ec2 inventory."]
if None in [os.environ.get('AWS_ACCESS_KEY_ID'), os.environ.get('AWS_SECRET_ACCESS_KEY')]:
errors.append(' - No AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY environment vars found')
else:
errors.append(' - AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment vars found but may not be correct')
boto_paths = ['/etc/boto.cfg', '~/.boto', '~/.aws/credentials']
boto_config_found = list(p for p in boto_paths if os.path.isfile(os.path.expanduser(p)))
if len(boto_config_found) > 0:
errors.append(" - Boto configs found at '%s', but the credentials contained may not be correct" % ', '.join(boto_config_found))
else:
errors.append(" - No Boto config found at any expected location '%s'" % ', '.join(boto_paths))
return '\n'.join(errors)
def fail_with_error(self, err_msg, err_operation=None):
'''log an error to std err for ansible-playbook to consume and exit'''
if err_operation:
err_msg = 'ERROR: "{err_msg}", while: {err_operation}'.format(
err_msg=err_msg, err_operation=err_operation)
sys.stderr.write(err_msg)
sys.exit(1)
def get_instance(self, region, instance_id):
conn = self.connect(region)
reservations = conn.get_all_instances([instance_id])
for reservation in reservations:
for instance in reservation.instances:
return instance
def add_instance(self, instance, region):
''' Adds an instance to the inventory and index, as long as it is
addressable '''
# Only return instances with desired instance states
if instance.state not in self.ec2_instance_states:
return
# Select the best destination address
if self.destination_format and self.destination_format_tags:
dest = self.destination_format.format(*[ getattr(instance, 'tags').get(tag, '') for tag in self.destination_format_tags ])
elif instance.subnet_id:
dest = getattr(instance, self.vpc_destination_variable, None)
if dest is None:
dest = getattr(instance, 'tags').get(self.vpc_destination_variable, None)
else:
dest = getattr(instance, self.destination_variable, None)
if dest is None:
dest = getattr(instance, 'tags').get(self.destination_variable, None)
if not dest:
# Skip instances we cannot address (e.g. private VPC subnet)
return
# Set the inventory name
hostname = None
if self.hostname_variable:
if self.hostname_variable.startswith('tag_'):
hostname = instance.tags.get(self.hostname_variable[4:], None)
else:
hostname = getattr(instance, self.hostname_variable)
# If we can't get a nice hostname, use the destination address
if not hostname:
hostname = dest
else:
hostname = self.to_safe(hostname).lower()
# if we only want to include hosts that match a pattern, skip those that don't
if self.pattern_include and not self.pattern_include.match(hostname):
return
# if we need to exclude hosts that match a pattern, skip those
if self.pattern_exclude and self.pattern_exclude.match(hostname):
return
# Add to index
self.index[hostname] = [region, instance.id]
# Inventory: Group by instance ID (always a group of 1)
if self.group_by_instance_id:
self.inventory[instance.id] = [hostname]
if self.nested_groups:
self.push_group(self.inventory, 'instances', instance.id)
# Inventory: Group by region
if self.group_by_region:
self.push(self.inventory, region, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'regions', region)
# Inventory: Group by availability zone
if self.group_by_availability_zone:
self.push(self.inventory, instance.placement, hostname)
if self.nested_groups:
if self.group_by_region:
self.push_group(self.inventory, region, instance.placement)
self.push_group(self.inventory, 'zones', instance.placement)
# Inventory: Group by Amazon Machine Image (AMI) ID
if self.group_by_ami_id:
ami_id = self.to_safe(instance.image_id)
self.push(self.inventory, ami_id, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'images', ami_id)
# Inventory: Group by instance type
if self.group_by_instance_type:
type_name = self.to_safe('type_' + instance.instance_type)
self.push(self.inventory, type_name, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'types', type_name)
# Inventory: Group by key pair
if self.group_by_key_pair and instance.key_name:
key_name = self.to_safe('key_' + instance.key_name)
self.push(self.inventory, key_name, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'keys', key_name)
# Inventory: Group by VPC
if self.group_by_vpc_id and instance.vpc_id:
vpc_id_name = self.to_safe('vpc_id_' + instance.vpc_id)
self.push(self.inventory, vpc_id_name, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'vpcs', vpc_id_name)
# Inventory: Group by security group
if self.group_by_security_group:
try:
for group in instance.groups:
key = self.to_safe("security_group_" + group.name)
self.push(self.inventory, key, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'security_groups', key)
except AttributeError:
self.fail_with_error('\n'.join(['Package boto seems a bit older.',
'Please upgrade boto >= 2.3.0.']))
# Inventory: Group by tag keys
if self.group_by_tag_keys:
for k, v in instance.tags.items():
if self.expand_csv_tags and v and ',' in v:
values = map(lambda x: x.strip(), v.split(','))
else:
values = [v]
for v in values:
if v:
key = self.to_safe("tag_" + k + "=" + v)
else:
key = self.to_safe("tag_" + k)
self.push(self.inventory, key, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'tags', self.to_safe("tag_" + k))
if v:
self.push_group(self.inventory, self.to_safe("tag_" + k), key)
# Inventory: Group by Route53 domain names if enabled
if self.route53_enabled and self.group_by_route53_names:
route53_names = self.get_instance_route53_names(instance)
for name in route53_names:
self.push(self.inventory, name, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'route53', name)
# Global Tag: instances without tags
if self.group_by_tag_none and len(instance.tags) == 0:
self.push(self.inventory, 'tag_none', hostname)
if self.nested_groups:
self.push_group(self.inventory, 'tags', 'tag_none')
# Global Tag: tag all EC2 instances
self.push(self.inventory, 'ec2', hostname)
self.inventory["_meta"]["hostvars"][hostname] = self.get_host_info_dict_from_instance(instance)
self.inventory["_meta"]["hostvars"][hostname]['ansible_ssh_host'] = dest
def add_rds_instance(self, instance, region):
''' Adds an RDS instance to the inventory and index, as long as it is
addressable '''
# Only want available instances unless all_rds_instances is True
if not self.all_rds_instances and instance.status != 'available':
return
# Select the best destination address
dest = instance.endpoint[0]
if not dest:
# Skip instances we cannot address (e.g. private VPC subnet)
return
# Set the inventory name
hostname = None
if self.hostname_variable:
if self.hostname_variable.startswith('tag_'):
hostname = instance.tags.get(self.hostname_variable[4:], None)
else:
hostname = getattr(instance, self.hostname_variable)
# If we can't get a nice hostname, use the destination address
if not hostname:
hostname = dest
hostname = self.to_safe(hostname).lower()
# Add to index
self.index[hostname] = [region, instance.id]
# Inventory: Group by instance ID (always a group of 1)
if self.group_by_instance_id:
self.inventory[instance.id] = [hostname]
if self.nested_groups:
self.push_group(self.inventory, 'instances', instance.id)
# Inventory: Group by region
if self.group_by_region:
self.push(self.inventory, region, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'regions', region)
# Inventory: Group by availability zone
if self.group_by_availability_zone:
self.push(self.inventory, instance.availability_zone, hostname)
if self.nested_groups:
if self.group_by_region:
self.push_group(self.inventory, region, instance.availability_zone)
self.push_group(self.inventory, 'zones', instance.availability_zone)
# Inventory: Group by instance type
if self.group_by_instance_type:
type_name = self.to_safe('type_' + instance.instance_class)
self.push(self.inventory, type_name, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'types', type_name)
# Inventory: Group by VPC
if self.group_by_vpc_id and instance.subnet_group and instance.subnet_group.vpc_id:
vpc_id_name = self.to_safe('vpc_id_' + instance.subnet_group.vpc_id)
self.push(self.inventory, vpc_id_name, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'vpcs', vpc_id_name)
# Inventory: Group by security group
if self.group_by_security_group:
try:
if instance.security_group:
key = self.to_safe("security_group_" + instance.security_group.name)
self.push(self.inventory, key, hostname)
if self.nested_groups:
self.push_group(self.inventory, 'security_groups', key)
except AttributeError:
self.fail_with_error('\n'.join(['Package boto seems a bit older.',
'Please upgrade boto >= 2.3.0.']))
# Inventory: Group by engine
if self.group_by_rds_engine:
self.push(self.inventory, self.to_safe("rds_" + instance.engine), hostname)
if self.nested_groups:
self.push_group(self.inventory, 'rds_engines', self.to_safe("rds_" + instance.engine))
# Inventory: Group by parameter group
if self.group_by_rds_parameter_group:
self.push(self.inventory, self.to_safe("rds_parameter_group_" + instance.parameter_group.name), hostname)
if self.nested_groups:
self.push_group(self.inventory, 'rds_parameter_groups', self.to_safe("rds_parameter_group_" + instance.parameter_group.name))
# Global Tag: all RDS instances
self.push(self.inventory, 'rds', hostname)
self.inventory["_meta"]["hostvars"][hostname] = self.get_host_info_dict_from_instance(instance)
self.inventory["_meta"]["hostvars"][hostname]['ansible_ssh_host'] = dest
def add_elasticache_cluster(self, cluster, region):
''' Adds an ElastiCache cluster to the inventory and index, as long as
it's nodes are addressable '''
# Only want available clusters unless all_elasticache_clusters is True
if not self.all_elasticache_clusters and cluster['CacheClusterStatus'] != 'available':
return
# Select the best destination address
if 'ConfigurationEndpoint' in cluster and cluster['ConfigurationEndpoint']:
# Memcached cluster
dest = cluster['ConfigurationEndpoint']['Address']
is_redis = False
else:
# Redis sigle node cluster
# Because all Redis clusters are single nodes, we'll merge the
# info from the cluster with info about the node
dest = cluster['CacheNodes'][0]['Endpoint']['Address']
is_redis = True
if not dest:
# Skip clusters we cannot address (e.g. private VPC subnet)
return
# Add to index
self.index[dest] = [region, cluster['CacheClusterId']]
# Inventory: Group by instance ID (always a group of 1)
if self.group_by_instance_id:
self.inventory[cluster['CacheClusterId']] = [dest]
if self.nested_groups:
self.push_group(self.inventory, 'instances', cluster['CacheClusterId'])
# Inventory: Group by region
if self.group_by_region and not is_redis:
self.push(self.inventory, region, dest)
if self.nested_groups:
self.push_group(self.inventory, 'regions', region)
# Inventory: Group by availability zone
if self.group_by_availability_zone and not is_redis:
self.push(self.inventory, cluster['PreferredAvailabilityZone'], dest)
if self.nested_groups:
if self.group_by_region:
self.push_group(self.inventory, region, cluster['PreferredAvailabilityZone'])
self.push_group(self.inventory, 'zones', cluster['PreferredAvailabilityZone'])
# Inventory: Group by node type
if self.group_by_instance_type and not is_redis:
type_name = self.to_safe('type_' + cluster['CacheNodeType'])
self.push(self.inventory, type_name, dest)
if self.nested_groups:
self.push_group(self.inventory, 'types', type_name)
# Inventory: Group by VPC (information not available in the current
# AWS API version for ElastiCache)
# Inventory: Group by security group
if self.group_by_security_group and not is_redis:
# Check for the existence of the 'SecurityGroups' key and also if
# this key has some value. When the cluster is not placed in a SG
# the query can return None here and cause an error.
if 'SecurityGroups' in cluster and cluster['SecurityGroups'] is not None:
for security_group in cluster['SecurityGroups']:
key = self.to_safe("security_group_" + security_group['SecurityGroupId'])
self.push(self.inventory, key, dest)
if self.nested_groups:
self.push_group(self.inventory, 'security_groups', key)
# Inventory: Group by engine
if self.group_by_elasticache_engine and not is_redis:
self.push(self.inventory, self.to_safe("elasticache_" + cluster['Engine']), dest)
if self.nested_groups:
self.push_group(self.inventory, 'elasticache_engines', self.to_safe(cluster['Engine']))
# Inventory: Group by parameter group
if self.group_by_elasticache_parameter_group:
self.push(self.inventory, self.to_safe("elasticache_parameter_group_" + cluster['CacheParameterGroup']['CacheParameterGroupName']), dest)
if self.nested_groups:
self.push_group(self.inventory, 'elasticache_parameter_groups', self.to_safe(cluster['CacheParameterGroup']['CacheParameterGroupName']))
# Inventory: Group by replication group
if self.group_by_elasticache_replication_group and 'ReplicationGroupId' in cluster and cluster['ReplicationGroupId']:
self.push(self.inventory, self.to_safe("elasticache_replication_group_" + cluster['ReplicationGroupId']), dest)
if self.nested_groups:
self.push_group(self.inventory, 'elasticache_replication_groups', self.to_safe(cluster['ReplicationGroupId']))
# Global Tag: all ElastiCache clusters
self.push(self.inventory, 'elasticache_clusters', cluster['CacheClusterId'])
host_info = self.get_host_info_dict_from_describe_dict(cluster)
self.inventory["_meta"]["hostvars"][dest] = host_info
# Add the nodes
for node in cluster['CacheNodes']:
self.add_elasticache_node(node, cluster, region)
def add_elasticache_node(self, node, cluster, region):
''' Adds an ElastiCache node to the inventory and index, as long as
it is addressable '''
# Only want available nodes unless all_elasticache_nodes is True
if not self.all_elasticache_nodes and node['CacheNodeStatus'] != 'available':
return
# Select the best destination address
dest = node['Endpoint']['Address']
if not dest:
# Skip nodes we cannot address (e.g. private VPC subnet)
return
node_id = self.to_safe(cluster['CacheClusterId'] + '_' + node['CacheNodeId'])
# Add to index
self.index[dest] = [region, node_id]
# Inventory: Group by node ID (always a group of 1)
if self.group_by_instance_id:
self.inventory[node_id] = [dest]
if self.nested_groups:
self.push_group(self.inventory, 'instances', node_id)
# Inventory: Group by region
if self.group_by_region:
self.push(self.inventory, region, dest)
if self.nested_groups:
self.push_group(self.inventory, 'regions', region)
# Inventory: Group by availability zone
if self.group_by_availability_zone:
self.push(self.inventory, cluster['PreferredAvailabilityZone'], dest)
if self.nested_groups:
if self.group_by_region:
self.push_group(self.inventory, region, cluster['PreferredAvailabilityZone'])
self.push_group(self.inventory, 'zones', cluster['PreferredAvailabilityZone'])
# Inventory: Group by node type
if self.group_by_instance_type:
type_name = self.to_safe('type_' + cluster['CacheNodeType'])
self.push(self.inventory, type_name, dest)
if self.nested_groups:
self.push_group(self.inventory, 'types', type_name)
# Inventory: Group by VPC (information not available in the current
# AWS API version for ElastiCache)
# Inventory: Group by security group
if self.group_by_security_group:
# Check for the existence of the 'SecurityGroups' key and also if
# this key has some value. When the cluster is not placed in a SG
# the query can return None here and cause an error.
if 'SecurityGroups' in cluster and cluster['SecurityGroups'] is not None:
for security_group in cluster['SecurityGroups']:
key = self.to_safe("security_group_" + security_group['SecurityGroupId'])
self.push(self.inventory, key, dest)
if self.nested_groups:
self.push_group(self.inventory, 'security_groups', key)
# Inventory: Group by engine
if self.group_by_elasticache_engine:
self.push(self.inventory, self.to_safe("elasticache_" + cluster['Engine']), dest)
if self.nested_groups:
self.push_group(self.inventory, 'elasticache_engines', self.to_safe("elasticache_" + cluster['Engine']))
# Inventory: Group by parameter group (done at cluster level)
# Inventory: Group by replication group (done at cluster level)
# Inventory: Group by ElastiCache Cluster
if self.group_by_elasticache_cluster:
self.push(self.inventory, self.to_safe("elasticache_cluster_" + cluster['CacheClusterId']), dest)
# Global Tag: all ElastiCache nodes
self.push(self.inventory, 'elasticache_nodes', dest)
host_info = self.get_host_info_dict_from_describe_dict(node)
if dest in self.inventory["_meta"]["hostvars"]:
self.inventory["_meta"]["hostvars"][dest].update(host_info)
else:
self.inventory["_meta"]["hostvars"][dest] = host_info
def add_elasticache_replication_group(self, replication_group, region):
''' Adds an ElastiCache replication group to the inventory and index '''
# Only want available clusters unless all_elasticache_replication_groups is True
if not self.all_elasticache_replication_groups and replication_group['Status'] != 'available':
return
# Select the best destination address (PrimaryEndpoint)
dest = replication_group['NodeGroups'][0]['PrimaryEndpoint']['Address']
if not dest:
# Skip clusters we cannot address (e.g. private VPC subnet)
return
# Add to index
self.index[dest] = [region, replication_group['ReplicationGroupId']]
# Inventory: Group by ID (always a group of 1)
if self.group_by_instance_id:
self.inventory[replication_group['ReplicationGroupId']] = [dest]
if self.nested_groups:
self.push_group(self.inventory, 'instances', replication_group['ReplicationGroupId'])
# Inventory: Group by region
if self.group_by_region:
self.push(self.inventory, region, dest)
if self.nested_groups:
self.push_group(self.inventory, 'regions', region)
# Inventory: Group by availability zone (doesn't apply to replication groups)
# Inventory: Group by node type (doesn't apply to replication groups)
# Inventory: Group by VPC (information not available in the current
# AWS API version for replication groups
# Inventory: Group by security group (doesn't apply to replication groups)
# Check this value in cluster level
# Inventory: Group by engine (replication groups are always Redis)
if self.group_by_elasticache_engine:
self.push(self.inventory, 'elasticache_redis', dest)
if self.nested_groups:
self.push_group(self.inventory, 'elasticache_engines', 'redis')
# Global Tag: all ElastiCache clusters
self.push(self.inventory, 'elasticache_replication_groups', replication_group['ReplicationGroupId'])
host_info = self.get_host_info_dict_from_describe_dict(replication_group)
self.inventory["_meta"]["hostvars"][dest] = host_info
def get_route53_records(self):
''' Get and store the map of resource records to domain names that
point to them. '''
r53_conn = route53.Route53Connection()
all_zones = r53_conn.get_zones()
route53_zones = [ zone for zone in all_zones if zone.name[:-1]
not in self.route53_excluded_zones ]
self.route53_records = {}
for zone in route53_zones:
rrsets = r53_conn.get_all_rrsets(zone.id)
for record_set in rrsets:
record_name = record_set.name
if record_name.endswith('.'):
record_name = record_name[:-1]
for resource in record_set.resource_records:
self.route53_records.setdefault(resource, set())
self.route53_records[resource].add(record_name)
def get_instance_route53_names(self, instance):
''' Check if an instance is referenced in the records we have from
Route53. If it is, return the list of domain names pointing to said
instance. If nothing points to it, return an empty list. '''
instance_attributes = [ 'public_dns_name', 'private_dns_name',
'ip_address', 'private_ip_address' ]
name_list = set()
for attrib in instance_attributes:
try:
value = getattr(instance, attrib)
except AttributeError:
continue
if value in self.route53_records:
name_list.update(self.route53_records[value])
return list(name_list)
def get_host_info_dict_from_instance(self, instance):
instance_vars = {}
for key in vars(instance):
value = getattr(instance, key)
key = self.to_safe('ec2_' + key)
# Handle complex types
# state/previous_state changed to properties in boto in https://github.com/boto/boto/commit/a23c379837f698212252720d2af8dec0325c9518
if key == 'ec2__state':
instance_vars['ec2_state'] = instance.state or ''
instance_vars['ec2_state_code'] = instance.state_code
elif key == 'ec2__previous_state':
instance_vars['ec2_previous_state'] = instance.previous_state or ''
instance_vars['ec2_previous_state_code'] = instance.previous_state_code
elif type(value) in [int, bool]:
instance_vars[key] = value
elif isinstance(value, six.string_types):
instance_vars[key] = value.strip()
elif type(value) == type(None):
instance_vars[key] = ''
elif key == 'ec2_region':
instance_vars[key] = value.name
elif key == 'ec2__placement':
instance_vars['ec2_placement'] = value.zone
elif key == 'ec2_tags':
for k, v in value.items():
if self.expand_csv_tags and ',' in v:
v = list(map(lambda x: x.strip(), v.split(',')))
key = self.to_safe('ec2_tag_' + k)
instance_vars[key] = v
elif key == 'ec2_groups':
group_ids = []
group_names = []
for group in value:
group_ids.append(group.id)
group_names.append(group.name)
instance_vars["ec2_security_group_ids"] = ','.join([str(i) for i in group_ids])
instance_vars["ec2_security_group_names"] = ','.join([str(i) for i in group_names])
elif key == 'ec2_block_device_mapping':
instance_vars["ec2_block_devices"] = {}
for k, v in value.items():
instance_vars["ec2_block_devices"][ os.path.basename(k) ] = v.volume_id
else:
pass
# TODO Product codes if someone finds them useful
#print key
#print type(value)
#print value
return instance_vars
def get_host_info_dict_from_describe_dict(self, describe_dict):
''' Parses the dictionary returned by the API call into a flat list
of parameters. This method should be used only when 'describe' is
used directly because Boto doesn't provide specific classes. '''
# I really don't agree with prefixing everything with 'ec2'
# because EC2, RDS and ElastiCache are different services.
# I'm just following the pattern used until now to not break any
# compatibility.
host_info = {}
for key in describe_dict:
value = describe_dict[key]
key = self.to_safe('ec2_' + self.uncammelize(key))
# Handle complex types
# Target: Memcached Cache Clusters
if key == 'ec2_configuration_endpoint' and value:
host_info['ec2_configuration_endpoint_address'] = value['Address']
host_info['ec2_configuration_endpoint_port'] = value['Port']
# Target: Cache Nodes and Redis Cache Clusters (single node)
if key == 'ec2_endpoint' and value:
host_info['ec2_endpoint_address'] = value['Address']
host_info['ec2_endpoint_port'] = value['Port']
# Target: Redis Replication Groups
if key == 'ec2_node_groups' and value:
host_info['ec2_endpoint_address'] = value[0]['PrimaryEndpoint']['Address']
host_info['ec2_endpoint_port'] = value[0]['PrimaryEndpoint']['Port']
replica_count = 0
for node in value[0]['NodeGroupMembers']:
if node['CurrentRole'] == 'primary':
host_info['ec2_primary_cluster_address'] = node['ReadEndpoint']['Address']
host_info['ec2_primary_cluster_port'] = node['ReadEndpoint']['Port']
host_info['ec2_primary_cluster_id'] = node['CacheClusterId']
elif node['CurrentRole'] == 'replica':
host_info['ec2_replica_cluster_address_'+ str(replica_count)] = node['ReadEndpoint']['Address']
host_info['ec2_replica_cluster_port_'+ str(replica_count)] = node['ReadEndpoint']['Port']
host_info['ec2_replica_cluster_id_'+ str(replica_count)] = node['CacheClusterId']
replica_count += 1
# Target: Redis Replication Groups
if key == 'ec2_member_clusters' and value:
host_info['ec2_member_clusters'] = ','.join([str(i) for i in value])
# Target: All Cache Clusters
elif key == 'ec2_cache_parameter_group':
host_info["ec2_cache_node_ids_to_reboot"] = ','.join([str(i) for i in value['CacheNodeIdsToReboot']])
host_info['ec2_cache_parameter_group_name'] = value['CacheParameterGroupName']
host_info['ec2_cache_parameter_apply_status'] = value['ParameterApplyStatus']
# Target: Almost everything
elif key == 'ec2_security_groups':
# Skip if SecurityGroups is None
# (it is possible to have the key defined but no value in it).
if value is not None:
sg_ids = []
for sg in value:
sg_ids.append(sg['SecurityGroupId'])
host_info["ec2_security_group_ids"] = ','.join([str(i) for i in sg_ids])
# Target: Everything
# Preserve booleans and integers
elif type(value) in [int, bool]:
host_info[key] = value
# Target: Everything
# Sanitize string values
elif isinstance(value, six.string_types):
host_info[key] = value.strip()
# Target: Everything
# Replace None by an empty string
elif type(value) == type(None):
host_info[key] = ''
else:
# Remove non-processed complex types
pass
return host_info
def get_host_info(self):
''' Get variables about a specific host '''
if len(self.index) == 0:
# Need to load index from cache
self.load_index_from_cache()
if not self.args.host in self.index:
# try updating the cache
self.do_api_calls_update_cache()
if not self.args.host in self.index:
# host might not exist anymore
return self.json_format_dict({}, True)
(region, instance_id) = self.index[self.args.host]
instance = self.get_instance(region, instance_id)
return self.json_format_dict(self.get_host_info_dict_from_instance(instance), True)
def push(self, my_dict, key, element):
''' Push an element onto an array that may not have been defined in
the dict '''
group_info = my_dict.setdefault(key, [])
if isinstance(group_info, dict):
host_list = group_info.setdefault('hosts', [])
host_list.append(element)
else:
group_info.append(element)
def push_group(self, my_dict, key, element):
''' Push a group as a child of another group. '''
parent_group = my_dict.setdefault(key, {})
if not isinstance(parent_group, dict):
parent_group = my_dict[key] = {'hosts': parent_group}
child_groups = parent_group.setdefault('children', [])
if element not in child_groups:
child_groups.append(element)
def get_inventory_from_cache(self):
''' Reads the inventory from the cache file and returns it as a JSON
object '''
cache = open(self.cache_path_cache, 'r')
json_inventory = cache.read()
return json_inventory
def load_index_from_cache(self):
''' Reads the index from the cache file sets self.index '''
cache = open(self.cache_path_index, 'r')
json_index = cache.read()
self.index = json.loads(json_index)
def write_to_cache(self, data, filename):
''' Writes data in JSON format to a file '''
json_data = self.json_format_dict(data, True)
cache = open(filename, 'w')
cache.write(json_data)
cache.close()
def uncammelize(self, key):
temp = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', key)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', temp).lower()
def to_safe(self, word):
''' Converts 'bad' characters in a string to underscores so they can be used as Ansible groups '''
regex = "[^A-Za-z0-9\_"
if not self.replace_dash_in_groups:
regex += "\-"
return re.sub(regex + "]", "_", word)
def json_format_dict(self, data, pretty=False):
''' Converts a dict to a JSON object and dumps it as a formatted
string '''
if pretty:
return json.dumps(data, sort_keys=True, indent=2)
else:
return json.dumps(data)
# Run the script
Ec2Inventory()
================================================
FILE: cloud/aws-ansible/files/sources.list
================================================
deb http://mirrors.yun-idc.com/ubuntu/ trusty main restricted universe multiverse
deb http://mirrors.yun-idc.com/ubuntu/ trusty-security main restricted universe multiverse
deb http://mirrors.yun-idc.com/ubuntu/ trusty-updates main restricted universe multiverse
deb http://mirrors.yun-idc.com/ubuntu/ trusty-backports main restricted universe multiverse
================================================
FILE: cloud/aws-ansible/roles/aws/tasks/main.yml
================================================
---
# all cluster use a single vpc
- name: vpc setup
ec2_vpc:
state: present
cidr_block: 172.233.0.0/16
resource_tags:
Name: pingcap-vpc-ansible
ManagedBy: tidb-ansible
Creator: ansible-auto
subnets:
- cidr: 172.233.1.0/24
az: cn-north-1a
resource_tags:
Name: pingcap-subnet-1a
Environment: "test"
ManagedBy: tidb-ansible
Creator: ansible-auto
Tier: "db"
- cidr: 172.233.2.0/24
az: cn-north-1b
resource_tags:
Name: pingcap-subnet-1b
Environment: "test"
ManagedBy: tidb-ansible
Creator: ansible-auto
Tier: "db"
internet_gateway: yes # assign internet
route_tables:
- subnets:
- 172.233.1.0/24
- 172.233.2.0/24
routes:
- dest: 0.0.0.0/0
gw: igw
region: cn-north-1
register: vpc
# existing cluster
- name: tidb cluster group
ec2_group:
state: present
name: "ansible-sg-by-{{ managed_by }}"
description: vpc security group by {{ creator }}
vpc_id: "{{ vpc.vpc_id }}"
rules:
- proto: tcp
from_port: 0
to_port: 0
group_name: "ansible-sg-by-{{ managed_by }}"
- proto: -1
from_port: 0
to_port: 0
cidr_ip: 172.233.0.0/16
- proto: tcp
from_port: 22
to_port: 22
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 3000
to_port: 3000
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 3306
to_port: 3306
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 4000
to_port: 4000
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 8000
to_port: 8000
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 4567
to_port: 4567
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 9000
to_port: 9050
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 9090 # prometheus
to_port: 9091 # pushgateway
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 9200
to_port: 9200
cidr_ip: 0.0.0.0/0
# outbound
rules_egress:
- proto: -1
from_port: 0
to_port: 0
cidr_ip: 0.0.0.0/0
region: cn-north-1
register: security_group
- name: tikv servers
ec2:
region: cn-north-1
key_name: pingcap
group_id: "{{ security_group.group_id }}"
instance_type: "{{ tikv_instance_type }}"
image: "{{ image_ami }}"
wait: yes
wait_timeout: 500
# volumes:
# - device_name: /dev/xvdb
# volume_type: gp2
# volume_size: 80
# delete_on_termination: true
# count: 1
instance_tags:
Name: tikv-by-{{ creator }}
ManagedBy: "{{ managed_by }}"
Creator: "{{ creator }}"
Type: tikv
count_tag:
Type: tikv
ManagedBy: "{{ managed_by }}"
exact_count: "{{ tikv_count }}"
vpc_subnet_id: "{{ vpc.subnets[0].id }}"
assign_public_ip: yes
- name: pd servers
ec2:
region: cn-north-1
key_name: pingcap
group_id: "{{ security_group.group_id }}"
instance_type: "{{ pd_instance_type }}"
image: "{{ image_ami }}"
wait: yes
wait_timeout: 500
# count: 1
instance_tags:
Name: pd-by-{{ creator }}
ManagedBy: "{{ managed_by }}"
Creator: "{{ creator }}"
Type: pd
count_tag:
Type: pd
ManagedBy: "{{ managed_by }}"
exact_count: "{{ pd_count }}"
vpc_subnet_id: "{{ vpc.subnets[0].id }}"
assign_public_ip: yes
- name: tidb servers
ec2:
region: cn-north-1
key_name: pingcap
group_id: "{{ security_group.group_id }}"
instance_type: "{{ tidb_instance_type }}"
image: "{{ image_ami }}"
wait: yes
wait_timeout: 500
# count: 1
instance_tags:
Name: tidb-by-{{ creator }}
ManagedBy: "{{ managed_by }}"
Creator: "{{ creator }}"
Type: tidb
count_tag:
Type: tidb
ManagedBy: "{{ managed_by }}"
exact_count: "{{ tidb_count }}"
vpc_subnet_id: "{{ vpc.subnets[0].id }}"
assign_public_ip: yes
- name: monitoring servers
ec2:
region: cn-north-1
key_name: pingcap
group_id: "{{ security_group.group_id }}"
instance_type: "{{ monitoring_instance_type }}"
image: "{{ image_ami }}"
wait: yes
wait_timeout: 500
# volumes:
# - device_name: /dev/xvdb
# volume_type: gp2
# volume_size: 50
# delete_on_termination: false
instance_tags:
Name: mon-by-{{ creator }}
ManagedBy: "{{ managed_by }}"
Creator: "{{ creator }}"
Type: monitoring
count_tag:
Type: monitoring
ManagedBy: "{{ managed_by }}"
exact_count: "{{ monitoring_count }}"
vpc_subnet_id: "{{ vpc.subnets[0].id }}"
assign_public_ip: yes
================================================
FILE: cloud/aws-ansible/templates/aws.inventory.ini.j2
================================================
[tidb_servers]
{% if groups.tidb_servers is defined %}
{% for item in groups.tidb_servers -%}
{{ item }}
{% endfor %}
{% endif %}
[tikv_servers]
{% if groups.tikv_servers is defined %}
{% for item in groups.tikv_servers -%}
{{ item }}
{% endfor %}
{% endif %}
[pd_servers]
{% if groups.pd_servers is defined %}
{% for item in groups.pd_servers -%}
{{ item }}
{% endfor %}
{% endif %}
[monitoring_servers]
{% if groups.monitoring_servers is defined %}
{% for item in groups.monitoring_servers -%}
{{ item }}
{% endfor %}
{% endif %}
[grafana_servers]
{% if groups.monitoring_servers is defined %}
{% for item in groups.monitoring_servers -%}
{{ item }}
{% endfor %}
{% endif %}
[monitored_servers]
{% if groups.monitored_servers is defined %}
{% for item in groups.monitored_servers -%}
{{ item }}
{% endfor %}
{% endif %}
[all:vars]
ansible_user = ubuntu
cluster_name = {{ creator }}-cluster
================================================
FILE: cloud/aws-ansible/vars.yml
================================================
---
tikv_count: 1
pd_count: 1
tidb_count: 1
# 1 or 0
monitoring_count: 1
creator: pingcap-auto
managed_by: ansible-pingcap
# CentOS 7
# image_ami: ami-c9a06aa4
# Ubuntu 14.04
# image_ami: ami-0220b23b
# CoreOS
# image_ami: ami-1ce93d71
# ubuntu 16.04, hvm-ssd
# image_ami: ami-a0e136cd
image_ami: ami-0220b23b
# m3.medium: 4core 16G
# t2.xlarge: 4 16
# t2.2xlarge: 8 32
# m3.medium: 1 3.75 1 x 4 (SSD)
# m3.large: 2 7.5 1 x 32 (SSD)
tidb_instance_type: r3.4xlarge
# 2core 8G
pd_instance_type: r3.2xlarge
tikv_instance_type: i2.4xlarge
monitoring_instance_type: t2.2xlarge
# volume must be use with EBS only
================================================
FILE: collect_diagnosis.yml
================================================
---
# Copyright 2016 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
- hosts: localhost
tags:
- always
tasks:
- name: clean up fetch_tmp_dir
file: path={{ fetch_tmp_dir }} state=absent
- name: create fetch directories
file: path={{ item }} state=directory mode=0755
with_items:
- "{{ fetch_tmp_dir }}"
- "{{ fetch_dir }}"
- name: collect pd diagnosis information
hosts: pd_servers
tags:
- pd
roles:
- collector_pd
- name: collect tikv diagnosis information
hosts: tikv_servers
tags:
- tikv
roles:
- collector_tikv
- name: collect tidb diagnosis information
hosts: tidb_servers
tags:
- tidb
roles:
- collector_tidb
- name: collect pump diagnosis information
hosts: pump_servers
tags:
- pump
roles:
- collector_pump
- name: collect prometheus metric data
hosts: monitoring_servers
tags:
- prometheus
roles:
- collector_prometheus
- name: collect host infomation
hosts: monitored_servers
roles:
- collector_host
tags:
- host
- hosts: localhost
tags:
- always
tasks:
- name: collect inventory.ini
shell: "cd {{ fetch_tmp_dir }} && cp {{ playbook_dir }}/inventory.ini ."
- name: get datetime
shell: date +%Y%m%d_%H%M%S
register: datetime
changed_when: false
- set_fact:
archive_name: "collect_diagnosis_{{ datetime.stdout | trim }}.tar.gz"
- name: archive all diagnosis files
shell: "cd {{ playbook_dir }} && tar czvf {{ fetch_dir }}/{{ archive_name }} {{ fetch_tmp_dir | basename }}"
- name: clean up fetch_tmp_dir
file: path={{ fetch_tmp_dir }} state=absent
- name: display the file path of collect_diagnosis tarball
debug:
msg: "collect_diagnosis tarball: {{ fetch_dir }}/{{ archive_name }}"
================================================
FILE: common_tasks/add_evict_leader_scheduler.yml
================================================
---
- name: remove evict-leader-scheduler
uri:
url: "http://{{ pd_addr }}/pd/api/v1/schedulers/evict-leader-scheduler-{{ store_id }}"
method: DELETE
status_code: 200,500,404
return_content: yes
register: scheduler_info
until: "'scheduler not found' in scheduler_info.content"
retries: 3
delay: 5
when: not enable_tls|default(false)
- name: remove evict-leader-scheduler when enable_tls|default(false)
uri:
url: "https://{{ pd_addr }}/pd/api/v1/schedulers/evict-leader-scheduler-{{ store_id_tls }}"
validate_certs: no
client_cert: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}.pem"
client_key: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}-key.pem"
method: DELETE
status_code: 200,500,404
return_content: yes
register: scheduler_info_tls
until: "'scheduler not found' in scheduler_info_tls.content"
retries: 3
delay: 5
when: enable_tls|default(false)
- name: add evict-leader-scheduler
uri:
url: "http://{{ pd_addr }}/pd/api/v1/schedulers"
method: POST
status_code: 200
body_format: json
body:
name: "evict-leader-scheduler"
store_id: "{{ store_id }}"
when: not enable_tls|default(false)
- name: add evict-leader-scheduler when enable_tls|default(false)
uri:
url: "https://{{ pd_addr }}/pd/api/v1/schedulers"
validate_certs: no
client_cert: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}.pem"
client_key: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}-key.pem"
method: POST
status_code: 200
body_format: json
body:
name: "evict-leader-scheduler"
store_id: "{{ store_id_tls }}"
when: enable_tls|default(false)
- name: check tikv's leader count
uri:
url: "http://{{ pd_addr }}/pd/api/v1/store/{{ store_id }}"
method: GET
return_content: yes
body_format: json
status_code: 200
register: store_info
until: (store_info.json.status.leader_count is defined and store_info.json.status.leader_count|int < 1) or store_info.json.status.leader_count is not defined
retries: 18
delay: 10
failed_when: false
when: not enable_tls|default(false)
- name: check tikv's leader count when enable_tls|default(false)
uri:
url: "https://{{ pd_addr }}/pd/api/v1/store/{{ store_id_tls }}"
validate_certs: no
client_cert: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}.pem"
client_key: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}-key.pem"
method: GET
return_content: yes
body_format: json
status_code: 200
register: store_info_tls
until: (store_info_tls.json.status.leader_count is defined and store_info_tls.json.status.leader_count|int < 1) or store_info_tls.json.status.leader_count is not defined
retries: 18
delay: 10
failed_when: false
when: enable_tls|default(false)
- name: display leader_count
debug:
msg: "leader_count: {{ store_info.json.status.leader_count|default(0) }}"
when: not enable_tls|default(false)
- name: display leader_count when enable_tls|default(false)
debug:
msg: "leader_count: {{ store_info_tls.json.status.leader_count|default(0) }}"
when: enable_tls|default(false)
================================================
FILE: common_tasks/create_grafana_api_keys.yml
================================================
---
- name: Ensure grafana API Key directory exists
file:
path: "{{ grafana_api_keys_dir }}"
state: directory
delegate_to: localhost
- name: Check grafana API Key list
uri:
url: "http://{{ grafana_host }}:{{ grafana_port }}/api/auth/keys"
user: "{{ grafana_admin_user }}"
password: "{{ grafana_admin_password }}"
force_basic_auth: yes
return_content: yes
register: existing_api_keys
- name: Check grafana API Key file existed
stat:
path: "{{ grafana_api_keys_dir }}/grafana_apikey.key"
register: grafana_apikey_file
delegate_to: localhost
- set_fact:
apikey_id: "{{ item }}"
with_items: "{{ existing_api_keys.json|json_query(apikey_id_query) }}"
vars:
apikey_id_query: "[?name=='grafana_apikey'].id"
when:
- ((existing_api_keys['json'] | selectattr("name", "equalto", "grafana_apikey")) | list) | length == 1
- grafana_apikey_file.stat.exists == False
- debug:
var: apikey_id
when:
- ((existing_api_keys['json'] | selectattr("name", "equalto", "grafana_apikey")) | list) | length == 1
- grafana_apikey_file.stat.exists == False
- name: Delete grafana API Key when grafana API Key file is missing
uri:
url: "http://{{ grafana_host }}:{{ grafana_port }}/api/auth/keys/{{ apikey_id }}"
user: "{{ grafana_admin_user }}"
password: "{{ grafana_admin_password }}"
force_basic_auth: yes
method: DELETE
when:
- ((existing_api_keys['json'] | selectattr("name", "equalto", "grafana_apikey")) | list) | length == 1
- grafana_apikey_file.stat.exists == False
- name: Create grafana API Key
uri:
url: "http://{{ grafana_host }}:{{ grafana_port }}/api/auth/keys"
user: "{{ grafana_admin_user }}"
password: "{{ grafana_admin_password }}"
force_basic_auth: yes
method: POST
body_format: json
body: "{{ item | to_json }}"
with_items: "{{ grafana_api_keys }}"
when: (((existing_api_keys['json'] | selectattr("name", "equalto", item['name'])) | list) | length == 0) or (((existing_api_keys['json'] | selectattr("name", "equalto", "grafana_apikey")) | list) | length == 1 and grafana_apikey_file.stat.exists == False)
register: new_api_keys
- name: Create grafana API key file
become: no
copy:
dest: "{{ grafana_api_keys_dir }}/{{ item['item']['name'] }}.key"
content: "{{ item['json']['key'] }}"
backup: no
when: item['json'] is defined
with_items: "{{ new_api_keys['results'] }}"
delegate_to: localhost
================================================
FILE: common_tasks/get_pd_leader.yml
================================================
---
- name: get PD leader info
uri:
url: "http://{{ pd_addr }}/pd/api/v1/leader"
method: GET
return_content: yes
status_code: 200
register: pd_leader_info
================================================
FILE: common_tasks/get_pd_leader_tls.yml
================================================
---
- name: get PD leader info when enable_tls|default(false)
uri:
url: "https://{{ pd_addr }}/pd/api/v1/leader"
validate_certs: no
client_cert: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}.pem"
client_key: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}-key.pem"
method: GET
return_content: yes
status_code: 200
register: pd_leader_info
================================================
FILE: common_tasks/get_pd_name.yml
================================================
---
- name: get PD name
uri:
url: "http://{{ pd_addr }}/pd/api/v1/members"
method: GET
return_content: yes
status_code: 200
register: pd_info
- set_fact:
pd_name_list: "{{ pd_info.json.members | json_query(query) }}"
vars:
query: '[?client_urls==[`http://{{ pd_addr }}`]].name'
- set_fact:
pd_name: "{{ pd_name_list[0] }}"
================================================
FILE: common_tasks/get_pd_name_tls.yml
================================================
---
- name: get PD name
uri:
url: "https://{{ pd_addr }}/pd/api/v1/members"
validate_certs: no
client_cert: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}.pem"
client_key: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}-key.pem"
method: GET
return_content: yes
status_code: 200
register: pd_info
- set_fact:
pd_name_list: "{{ pd_info.json.members | json_query(query) }}"
vars:
query: '[?client_urls==[`https://{{ pd_addr }}`]].name'
- set_fact:
pd_name: "{{ pd_name_list[0] }}"
================================================
FILE: common_tasks/get_pd_tikv_addr.yml
================================================
---
- set_fact:
pd_host: "{{ hostvars[groups.pd_servers[0]].ansible_host | default(hostvars[groups.pd_servers[0]].inventory_hostname) }}"
pd_client_port: "{{ hostvars[groups.pd_servers[0]].pd_client_port }}"
- set_fact:
pd_addr: "{{ pd_host }}:{{ pd_client_port }}"
tikv_addr: "{{ ansible_host }}:{{ tikv_port }}"
- name: display pd addr
debug:
var: pd_addr
- name: display tikv addr
debug:
var: tikv_addr
================================================
FILE: common_tasks/get_store_id.yml
================================================
---
- name: get store info from PD
uri:
url: "http://{{ pd_addr }}/pd/api/v1/stores"
method: GET
return_content: yes
status_code: 200
register: stores_info
- set_fact:
store_id: "{{ item }}"
with_items: "{{ stores_info.json|json_query(store_id_query) }}"
vars:
store_id_query: "stores[?store.address==`{{ tikv_addr }}`].store.id"
- name: display store id
debug:
var: store_id
- name: check store_id is defined
fail:
msg: "The tikv node of {{ tikv_addr }} is not registered in this cluster."
when: store_id is not defined
================================================
FILE: common_tasks/get_store_id_tls.yml
================================================
---
- name: get store info from PD when enable_tls|default(false)
uri:
url: "https://{{ pd_addr }}/pd/api/v1/stores"
validate_certs: no
client_cert: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}.pem"
client_key: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}-key.pem"
method: GET
return_content: yes
status_code: 200
register: stores_info_tls
- set_fact:
store_id_tls: "{{ item }}"
with_items: "{{ stores_info_tls.json|json_query(store_id_query) }}"
vars:
store_id_query: "stores[?store.address==`{{ tikv_addr }}`].store.id"
- name: display store id
debug:
var: store_id_tls
================================================
FILE: common_tasks/remove_evict_leader_scheduler.yml
================================================
---
- name: remove evict-leader-scheduler
uri:
url: "http://{{ pd_addr }}/pd/api/v1/schedulers/evict-leader-scheduler-{{ store_id }}"
method: DELETE
status_code: 200
when: not enable_tls|default(false)
- name: remove evict-leader-scheduler when enable_tls|default(false)
uri:
url: "https://{{ pd_addr }}/pd/api/v1/schedulers/evict-leader-scheduler-{{ store_id_tls }}"
validate_certs: no
client_cert: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}.pem"
client_key: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}-key.pem"
method: DELETE
status_code: 200
when: enable_tls|default(false)
================================================
FILE: common_tasks/transfer_pd_leader.yml
================================================
---
- set_fact:
pd_leader_name: "{{ pd_leader_info.json.name }}"
- name: display PD leader name
debug:
var: pd_leader_name
- name: transfer PD leader to another PD server
uri:
url: "http://{{ pd_addr }}/pd/api/v1/leader/resign"
method: POST
status_code: 200
when:
- groups['pd_servers'] | length >= 3
- pd_leader_name == pd_name
- not enable_tls|default(false)
- name: transfer PD leader to another PD server when enable_tls|default(false)
uri:
url: "https://{{ pd_addr }}/pd/api/v1/leader/resign"
method: POST
validate_certs: no
client_cert: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}.pem"
client_key: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}-key.pem"
status_code: 200
when:
- groups['pd_servers'] | length >= 3
- pd_leader_name == pd_name
- enable_tls|default(false)
- name: wait for transfering PD leader
pause:
seconds: 10
when:
- groups['pd_servers'] | length >= 3
- pd_leader_name == pd_name
- name: check current PD leader
uri:
url: "http://{{ pd_addr }}/pd/api/v1/leader"
method: GET
return_content: yes
body_format: json
status_code: 200
register: pd_leader_info
until: pd_leader_info.json is defined and pd_leader_info.json.name is defined and pd_leader_info.json.name != pd_name
retries: 12
delay: 10
failed_when: false
when:
- groups['pd_servers'] | length >= 3
- pd_leader_name == pd_name
- not enable_tls|default(false)
- name: check current PD leader when enable_tls|default(false)
uri:
url: "https://{{ pd_addr }}/pd/api/v1/leader"
validate_certs: no
client_cert: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}.pem"
client_key: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}-key.pem"
method: GET
return_content: yes
body_format: json
status_code: 200
register: pd_leader_info_tls
until: pd_leader_info_tls.json is defined and pd_leader_info_tls.json.name is defined and pd_leader_info_tls.json.name != pd_name
retries: 12
delay: 10
failed_when: false
when:
- groups['pd_servers'] | length >= 3
- pd_leader_name == pd_name
- enable_tls|default(false)
================================================
FILE: conf/alertmanager.yml
================================================
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'localhost:25'
smtp_from: 'alertmanager@example.org'
smtp_auth_username: 'alertmanager'
smtp_auth_password: 'password'
# smtp_require_tls: true
# The Slack webhook URL.
# slack_api_url: ''
route:
# A default receiver
receiver: "db-alert-email"
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['env','instance','alertname','type','group','job']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 3m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3m
routes:
# - match:
# receiver: webhook-kafka-adapter
# continue: true
# - match:
# env: test-cluster
# receiver: db-alert-slack
# - match:
# env: test-cluster
# receiver: db-alert-email
receivers:
# - name: 'webhook-kafka-adapter'
# webhook_configs:
# - send_resolved: true
# url: 'http://10.0.3.6:28082/v1/alertmanager'
#- name: 'db-alert-slack'
# slack_configs:
# - channel: '#alerts'
# username: 'db-alert'
# icon_emoji: ':bell:'
# title: '{{ .CommonLabels.alertname }}'
# text: '{{ .CommonAnnotations.summary }} {{ .CommonAnnotations.description }} expr: {{ .CommonLabels.expr }} http://172.0.0.1:9093/#/alerts'
- name: 'db-alert-email'
email_configs:
- send_resolved: true
to: 'xxx@xxx.com'
================================================
FILE: conf/drainer.toml
================================================
# drainer Configuration.
# the interval time (in seconds) of detect pumps' status
detect-interval = 10
# Use the specified compressor algorithm to compress payload between pump and drainer
# compressor = "gzip"
# syncer Configuration.
[syncer]
# Assume the upstream sql-mode.
# If this is setted , drainer will use the sql-mode to parse DDL statment
# sql-mode = "STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION
# disable sync these schema
ignore-schemas = "INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql"
# number of binlog events in a transaction batch
txn-batch = 20
# work count to execute binlogs
# if the latency between drainer and downstream(mysql or tidb) are too high, you might want to increase this
# to get higher throughput by higher concurrent write to the downstream
worker-count = 16
# whether to disable the SQL feature of splitting a single binlog event.
# If it is set to "true", binlog events are restored to a single transaction for synchronization based on the order of binlogs.
# If the downstream service is MySQL, set it to "False".
disable-dispatch = false
# safe mode will split update to delete and insert
safe-mode = false
# downstream storage, equal to --dest-db-type
# valid values are "mysql", "file", "tidb", "kafka"
db-type = "mysql"
# ignore syncing the txn with specified commit ts to downstream
ignore-txn-commit-ts = []
# replicate-do-db priority over replicate-do-table if have same db name
# and we support regex expression , start with '~' declare use regex expression.
# replicate-do-db = ["~^b.*","s1"]
# [[syncer.replicate-do-table]]
# db-name ="test"
# tbl-name = "log"
# [[syncer.replicate-do-table]]
# db-name ="test"
# tbl-name = "~^a.*"
# disable sync these table
# [[syncer.ignore-table]]
# db-name = "test"
# tbl-name = "log"
# the downstream mysql protocol database
[syncer.to]
host = "127.0.0.1"
user = "root"
password = ""
port = 3306
# Uncomment this if you want to use file as db-type.
# [syncer.to]
# dir = "data.drainer"
# when db-type is kafka, you can uncomment this to config the down stream kafka, it will be the globle config kafka default
# [syncer.to]
# only need config one of zookeeper-addrs and kafka-addrs, will get kafka address if zookeeper-addrs is configed.
# zookeeper-addrs = "127.0.0.1:2181"
# kafka-addrs = "127.0.0.1:9092"
# kafka-version = "0.8.2.0"
# kafka-max-messages = 1024
# the topic name drainer will push msg, the default name is <cluster-id>_obinlog
# be careful don't use the same name if run multi drainer instances
# topic-name = ""
================================================
FILE: conf/pd.yml
================================================
---
# default configuration file for pd in yaml format
global:
# lease: 3
# tso-save-interval: "3s"
security:
log:
#level: "info"
# file logging
file:
# max log file size in MB
# max-size: 300
# max log file keep days
# max-days: 28
# maximum number of old log files to retain
# max-backups: 7
# rotate log by day
# log-rotate: true
metric:
schedule:
# max-merge-region-size: 20
# max-merge-region-keys: 200000
# split-merge-interval: "1h"
# max-snapshot-count: 3
# max-pending-peer-count: 16
# max-store-down-time: "30m"
# leader-schedule-limit: 4
# region-schedule-limit: 64
# replica-schedule-limit: 64
# merge-schedule-limit: 8
# enable-one-way-merge: false
replication:
# The number of replicas for each region.
# max-replicas: 3
# The label keys specified the location of a store.
# The placement priorities is implied by the order of label keys.
# For example, ["zone", "rack"] means that we should place replicas to
# different zones first, then to different racks if we don't have enough zones.
# location-labels: []
dashboard:
## Configurations below are for the TiDB Dashboard embedded in the PD.
## The path of the CA certificate used to verify the TiDB server in TLS.
# tidb-cacert-path: ""
## The path of the certificate used to connect to TiDB server in TLS.
# tidb-cert-path: ""
## The path of the certificate private key.
# tidb-key-path: ""
## The public path prefix to serve Dashboard urls. It can be set when Dashboard
## is running behind a reverse proxy. Do not configure it if you access
## Dashboard directly.
# public-path-prefix: "/dashboard"
## When enabled, request will be proxied to the instance running Dashboard
## internally instead of result in a 307 redirection.
# internal-proxy: false
## When enabled, usage data will be sent to PingCAP for improving user experience.
# enable-telemetry: true
================================================
FILE: conf/pump.yml
================================================
---
# default configuration file for pump in yaml format
global:
# a integer value to control expiry date of the binlog data, indicates for how long (in days) the binlog data would be stored.
# must bigger than 0
# gc: 7
# number of seconds between heartbeat ticks (in 2 seconds)
# heartbeat-interval: 2
security:
# Path of file that contains list of trusted SSL CAs for connection with cluster components.
# ssl-ca: "/path/to/ca.pem"
# Path of file that contains X509 certificate in PEM format for connection with cluster components.
# ssl-cert: "/path/to/drainer.pem"
# Path of file that contains X509 key in PEM format for connection with cluster components.
# ssl-key: "/path/to/drainer-key.pem"
storage:
# Set to true (by default) to guarantee reliability by ensuring binlog data is flushed to the disk.
# sync-log: true
# stop write when disk available space less than the configured size
# 42 MB -> 42000000, 42 mib -> 44040192
# default: 10 gib
# stop-write-at-available-space = "10 gib"
================================================
FILE: conf/spark-defaults.yml
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Default system properties included when running spark-submit.
# This is useful for setting default environmental settings.
# Example:
# spark.eventLog.dir: "hdfs://namenode:8021/directory"
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
spark.eventLog.enabled: false
spark.driver.memory: 2g
# TiSpark configuration items
# PD Cluster Addresses, split by comma.
# Do not need to fill, it will be automatically generated through ansible
# spark.tispark.pd.addresses: 127.0.0.1:2379
# Max frame size of GRPC response
spark.tispark.grpc.framesize: 2147483647
# GRPC timeout time in seconds
spark.tispark.grpc.timeout_in_sec: 100
# Metastore reload period in seconds
spark.tispark.meta.reload_period_in_sec: 60
# If allow aggregation pushdown (in case of busy TiKV nodes)
# spark.tispark.plan.allow_agg_pushdown: true
# If allow index double read (which might cause heavy pressure on TiKV)
# spark.tispark.plan.allow_index_double_read: false
# How many row key in batch for concurrent index scan
# spark.tispark.index.scan_batch_size: 2000000
# Maximal threads for index scan retrieving row keys (shared among tasks inside each JVM)
# spark.tispark.index.scan_concurrency: 2
# Maximal threads for table scan (shared among tasks inside each JVM)
spark.tispark.table.scan_concurrency: 256
# Can be "Low", "Normal", "High" ,which impacts resource to get in TiKV. Low is recommended for not disturbing OLTP workload
spark.tispark.request.command.priority: "Low"
# Whether to use streaming for response fetching
# spark.tispark.coprocess.streaming: false
# A comma separated list of expressions. In case you have very old version of TiKV, you might disable some of the expression push-down if not supported
# spark.tispark.plan.unsupported_pushdown_exprs: ""
# If index scan handles for one region exceeds this limit in original request, downgrade the request to a full table scan rather than original planned index scan
# spark.tispark.plan.downgrade.index_threshold: 100000
# An integer, represents timezone offset to UTC time(like 28800, GMT+8), this value will be added to requests issued to TiKV
# spark.tispark.request.timezone.offset: 28800
#Whether to load statistics info automatically during database mapping
# spark.tispark.statistics.auto_load: true
# spark.tispark.plan.allow_index_read: true
spark.sql.extensions: org.apache.spark.sql.TiExtensions
================================================
FILE: conf/spark-env.yml
================================================
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This file is sourced when running various Spark programs.
# Copy it as spark-env.sh and edit that to configure Spark for your site.
# Options read when launching programs locally with
# ./bin/run-example or ./bin/spark-submit
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
# - SPARK_CLASSPATH, default classpath entries to append
# Options read by executors and drivers running inside the cluster
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
# - SPARK_CLASSPATH, default classpath entries to append
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
# Options read in YARN client mode
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2)
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
# Options for the daemons used in the standalone deploy mode
# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
# - SPARK_WORKER_DIR, to set the working directory of worker processes
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
# Generic options for the daemons used in the standalone deploy mode
# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs)
# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER)
# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file.
# export JAVA_HOME, to set jdk home
# Maybe you can get cores and memory through the following configuration
# ansible spark_master,spark_slaves -m shell -a 'cat /proc/cpuinfo| grep "processor"| wc -l'
# ansible spark_master,spark_slaves -m shell -a "free" | grep 'Mem' | awk '{print int($2/1024/1024/1.25)}'
SPARK_EXECUTOR_CORES: 5
SPARK_EXECUTOR_MEMORY: 10g
SPARK_WORKER_CORES: 5
SPARK_WORKER_MEMORY: 10g
================================================
FILE: conf/ssl/ca-config.json
================================================
{
"signing": {
"default": {
"expiry": "876000h"
},
"profiles": {
"server": {
"expiry": "876000h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
},
"client": {
"expiry": "876000h",
"usages": [
"signing",
"key encipherment",
"client auth"
]
}
}
}
}
================================================
FILE: conf/ssl/ca-csr.json
================================================
{
"CN": "My own CA",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "Beijing",
"O": "PingCAP",
"ST": "Beijing"
}
]
}
================================================
FILE: conf/tidb-lightning.yml
================================================
---
### tidb-lightning configuration
lightning:
# check if the cluster satisfies the minimum requirement before starting
# check-requirements = true
# table-concurrency controls the maximum handled tables concurrently while reading Mydumper SQL files.
# index-concurrency controls the maximum handled index concurrently while reading Mydumper SQL files.
# They can affect the tikv-importer memory and disk usage.
# table-concurrency + index-concurrency must be <= max-open-engines value in tikv-importer.tmol
index-concurrency: 2
table-concurrency: 6
# region-concurrency changes the concurrency number of data. It is set to the number of logical CPU cores by default and needs no configuration.
# in mixed configuration, you can set it to 75% of the size of logical CPU cores.
# region-concurrency default to runtime.NumCPU()
# region-concurrency:
# io-concurrency controls the maximum IO concurrency
io-concurrency: 5
# logging
level: "info"
max-size: 128 # MB
max-days: 28
max-backups: 14
checkpoint:
# Whether to enable checkpoints.
# While importing, Lightning will record which tables have been imported, so even if Lightning or other component
# crashed, we could start from a known good state instead of redoing everything.
enable: true
# The schema name (database name) to store the checkpoints
schema: "tidb_lightning_checkpoint"
# Where to store the checkpoints.
# Set to "file" to store as a local file.
# Set to "mysql" to store into a remote MySQL-compatible database
# driver: "file"
# The data source name (DSN) indicating the location of the checkpoint storage.
# For "file" driver, the DSN is a path. If not specified, Lightning would default to "/tmp/CHKPTSCHEMA.pb".
# For "mysql" driver, the DSN is a URL in the form "USER:PASS@tcp(HOST:PORT)/".
# If not specified, the TiDB server from the [tidb] section will be used to store the checkpoints.
# dsn: "/tmp/tidb_lightning_checkpoint.pb"
# Whether to keep the checkpoints after all data are imported. If false, the checkpoints will be deleted. The schema
# needs to be dropped manually, however.
# keep-after-success: false
tikv_importer:
# delivery back end ("tidb" or "importer")
backend: "importer"
# action on duplicated entry ("error", "ignore" or "replace")
# on-duplicate: "replace"
mydumper:
# block size of file reading
read-block-size: 65536 # Byte (default = 64 KB)
# minimum size (in terms of source data file) of each batch of import.
# Lightning will split a large table into multiple engine files according to this size.
# batch-size: 107374182400 # Byte (default = 100 GiB)
# Engine file needs to be imported sequentially. Due to table-concurrency, multiple engines will be
# imported nearly the same time, and this will create a queue and this wastes resources. Therefore,
# Lightning will slightly increase the size of the first few batches to properly distribute
# resources. The scale up is controlled by this parameter, which expresses the ratio of duration
# between the "import" and "write" steps with full concurrency. This can be calculated as the ratio
# (import duration / write duration) of a single table of size around 1 GB. The exact timing can be
# found in the log. If "import" is faster, the batch size anomaly is smaller, and a ratio of
# zero means uniform batch size. This value should be in the range (0 <= batch-import-ratio < 1).
# batch-import-ratio: 0.75
# the source data directory of Mydumper. tidb-lightning will automatically create the corresponding database and tables based on the schema file in the directory.
# data-source-dir: "/data/mydumper"
# If no-schema is set to true, tidb-lightning will obtain the table schema information from tidb-server,
# instead of creating the database or tables based on the schema file of data-source-dir.
# This applies to manually creating tables or the situation where the table schema exits in TiDB.
no-schema: false
# the character set of the schema files; only supports one of:
# - utf8mb4: the schema files must be encoded as UTF-8, otherwise will emit errors
# - gb18030: the schema files must be encoded as GB-18030, otherwise will emit errors
# - auto: (default) automatically detect if the schema is UTF-8 or GB-18030, error if the encoding is neither
# - binary: do not try to decode the schema files
# note that the *data* files are always parsed as binary regardless of schema encoding.
# character-set: "auto"
# CSV files are imported according to MySQL's LOAD DATA INFILE rules.
# See https://pingcap.com/docs/tools/lightning/csv/ for details of these settings
csv:
separator: ','
delimiter: '"'
header: true
not-null: false
'null': \N
backslash-escape: true
trim-last-separator: false
# configuration for TiDB (pick one of them if it has many TiDB servers) and the PD server.
tidb:
# the target cluster information
# the listening address of tidb-server. Setting one of them is enough.
# host: "127.0.0.1"
# port: 4000
# user: "root"
# password: ""
# table schema information is fetched from TiDB via this status-port.
# status-port: 10080
# Lightning uses some code of TiDB (used as a library) and the flag controls its log level.
log-level: "error"
# Set tidb session variables to speed up checksum/analyze table.
# See https://pingcap.com/docs/sql/statistics/#control-analyze-concurrency for the meaning of each setting
build-stats-concurrency: 20
distsql-scan-concurrency: 100
index-serial-scan-concurrency: 20
checksum-table-concurrency: 16
# cron performs some periodic actions in background
cron:
# duration between which Lightning will automatically refresh the import mode status.
# should be shorter than the corresponding TiKV setting
switch-mode: '5m'
# the duration which the an import progress will be printed to the log.
log-progress: '5m'
# post-restore provide some options which will be executed after all kv data has been imported into the tikv cluster.
# the execution order are(if set true): checksum -> compact -> analyze
post_restore:
# if it is set to true, tidb-lightning will perform the ADMIN CHECKSUM TABLE <table> operation on the tables one by one.
checksum: true
# compaction is performed automatically starting v2.1.6. These settings should be left as `false`.
# level-1-compact: false
# compact: false
# if it is set to true, tidb-lightning will perform the ANALYZE TABLE <table> operation on the tables one by one.
# If the Analyze operation fails, you can analyze data manually on the Mysql client.
analyze: true
================================================
FILE: conf/tidb.yml
================================================
---
# default configuration file for TiDB in yaml format
global:
# TiDB Configuration.
# The socket file to use for connection.
# socket: ""
# Schema lease duration, very dangerous to change only if you know what you do.
# lease: "45s"
# The limit of concurrent executed sessions.
# token-limit: 1000
# Only print a log when out of memory quota.
# Valid options: ["log", "cancel"]
# oom-action: "cancel"
# Set the memory quota for a query in bytes. Default: 32GB
# mem-quota-query: 34359738368
# Make "kill query" behavior compatible with MySQL. It's not recommend to
# turn on this option when TiDB server is behind a proxy.
# compatible-kill-query: false
# check mb4 value in utf8 is used to control whether to check the mb4 characters when the charset is utf8.
# check-mb4-value-in-utf8: true
# max-index-length is used to deal with compatibility issues from v3.0.7 and previous version upgrades. It can only be in [3072, 3072*4].
# max-index-length: 3072
# alter-primary-key is used to control alter primary key feature. Default is false, indicate the alter primary key feature is disabled.
# If it is true, we can add the primary key by "alter table". However, if a table already exists before the switch is turned true and
# the data type of its primary key column is an integer, the primary key cannot be dropped.
# alter-primary-key: false
# server-version is used to change the version string of TiDB in the following scenarios:
# 1. the server version returned by builtin-function `VERSION()`.
# 2. the server version filled in handshake packets of MySQL Connection Protocol, see https://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::Handshake for more details.
# if server-version = "", the default value(original TiDB version string) is used.
server-version: ""
# Whether new collations are enabled, as indicated by its name, this configuration entry take effect ONLY when a TiDB cluster bootstraps for the first time.
new_collations_enabled_on_first_bootstrap: false
# When enabled, usage data (for example, instance versions) will be reported to PingCAP periodically for user experience analytics.
# If this config is set to `false` on all TiDB servers, telemetry will be always disabled regardless of the value of the global variable `tidb_enable_telemetry`.
# See PingCAP privacy policy for details: https://pingcap.com/en/privacy-policy/
# enable-telemetry: true
log:
# Log level: debug, info, warn, error, fatal.
# level: "info"
# Queries with execution time greater than this value will be logged. (Milliseconds)
# slow-threshold: 300
# Queries with internal result greater than this value will be logged.
# expensive-threshold: 10000
status:
# TiDB status host.
# status-host: "0.0.0.0"
# Prometheus pushgateway address, leaves it empty will disable prometheus push.
# TiDB status port.
# status-port: 10080
# Prometheus pushgateway address, leaves it empty will disable prometheus push.
# metrics-addr: ""
# Prometheus client push interval in second, set \"0\" to disable prometheus push.
# metrics-interval: 15
performance:
# Max CPUs to use, 0 use number of CPUs in the machine.
# max-procs: 0
# Max memory size to use, 0 use the total usable memory in the machine.
# max-memory: 0
# StmtCountLimit limits the max count of statement inside a transaction.
# stmt-count-limit: 5000
# Stats lease duration, which influences the time of analyze and stats load.
# stats-lease: "3s"
proxy_protocol:
prepared_plan_cache:
# enabled: false
# capacity: 100
# memory-guard-ratio: 0.1
opentracing:
# Enable opentracing.
# enable: false
# Whether to enable the rpc metrics.
# rpc-metrics: false
sampler:
# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
# type: "const"
# Param is a value passed to the sampler.
# Valid values for Param field are:
# - for "const" sampler, 0 or 1 for always false/true respectively
# - for "probabilistic" sampler, a probability between 0 and 1
# - for "rateLimiting" sampler, the number of spans per second
# - for "remote" sampler, param is the same as for "probabilistic"
# and indicates the initial sampling rate before the actual one
# is received from the mothership
# param: 1.0
# SamplingServerURL is the address of jaeger-agent's HTTP sampling server
# sampling-server-url: ""
# MaxOperations is the maximum number of operations that the sampler
# will keep track of. If an operation is not tracked, a default probabilistic
# sampler will be used rather than the per operation specific sampler.
# max-operations: 0
# SamplingRefreshInterval controls how often the remotely controlled sampler will poll
# jaeger-agent for the appropriate sampling strategy.
# sampling-refresh-interval: 0
reporter:
# QueueSize controls how many spans the reporter can keep in memory before it starts dropping
# new spans. The queue is continuously drained by a background go-routine, as fast as spans
# can be sent out of process.
# queue-size: 0
# BufferFlushInterval controls how often the buffer is force-flushed, even if it's not full.
# It is generally not useful, as it only matters for very low traffic services.
# buffer-flush-interval: 0
# LogSpans, when true, enables LoggingReporter that runs in parallel with the main reporter
# and logs all submitted spans. Main Configuration.Logger must be initialized in the code
# for this option to have any effect.
# log-spans: false
# LocalAgentHostPort instructs reporter to send spans to jaeger-agent at this address
# local-agent-host-port: ""
tikv_client:
# Max gRPC connections that will be established with each tikv-server.
# grpc-connection-count: 4
# After a duration of this time in seconds if the client doesn't see any activity it pings
# the server to see if the transport is still alive.
# grpc-keepalive-time: 10
# After having pinged for keepalive check, the client waits for a duration of Timeout in seconds
# and if no activity is seen even after that the connection is closed.
# grpc-keepalive-timeout: 3
# max time for commit command, must be twice bigger than raft election timeout.
# commit-timeout: "41s"
# Max batch size in gRPC.
# max-batch-size: 128
# Overload threshold of TiKV.
# overload-threshold: 200
# Max batch wait time in nanosecond to avoid waiting too long. 0 means disable this feature.
# max-batch-wait-time: 0
# Batch wait size, to avoid waiting too long.
# batch-wait-size: 8
txn_local_latches:
binlog:
# WriteTimeout specifies how long it will wait for writing binlog to pump.
# write-timeout: "15s"
# If IgnoreError is true, when writting binlog meets error, TiDB would stop writting binlog,
# but still provide service.
# ignore-error: false
pessimistic_txn:
# enable pessimistic transaction.
# enable: true
# max retry count for a statement in a pessimistic transaction.
# max-retry-count: 256
experimental:
# enable column attribute `auto_random` to be defined on the primary key column.
allow-auto-random: false
# enable creating expression index.
allow-expression-index: false
================================================
FILE: conf/tiflash-learner.yml
================================================
# TiKV config template
# Human-readable big numbers:
# File size(based on byte): KB, MB, GB, TB, PB
# e.g.: 1_048_576: "1MB"
# Time(based on ms): ms, s, m, h
# e.g.: 78_000: "1.3m"
readpool:
storage:
coprocessor:
server:
storage:
pd:
# This section will be overwritten by command line parameters
metric:
#address: "172.16.30.31:9531"
#interval: "15s"
#job: "tikv"
raftstore:
coprocessor:
rocksdb:
wal-dir: ""
defaultcf:
lockcf:
writecf:
raftdb:
defaultcf:
security:
ca-path: ""
cert-path: ""
key-path: ""
import:
================================================
FILE: conf/tiflash.yml
================================================
---
global:
display_name: "TiFlash"
default_profile: "default"
mark_cache_size: 5368709120
listen_host: "0.0.0.0"
flash:
flash_cluster:
refresh_interval: 20
update_rule_interval: 5
master_ttl: 60
proxy:
status:
logger:
count: 20
size: "1000M"
level: "debug"
application:
runAsDaemon: true
raft:
quotas:
default:
interval:
result_rows: 0
read_rows: 0
execution_time: 0
queries: 0
errors: 0
duration: 3600
users:
readonly:
quota: "default"
profile: "readonly"
password: ""
networks:
ip: "::/0"
default:
quota: "default"
profile: "default"
password: ""
networks:
ip: "::/0"
profiles:
readonly:
readonly: 1
default:
load_balancing: "random"
use_uncompressed_cache: 0
max_memory_usage: 10000000000
================================================
FILE: conf/tikv-importer.yml
================================================
---
# TiKV Importer configuration file template
global:
# log file.
# log level: trace, debug, info, warn, error, off.
log-level: "info"
server:
# size of thread pool for the gRPC server.
grpc-concurrency: 16
metric:
# the Prometheus client push job name.
job: "tikv-importer"
# the Prometheus client push interval.
interval: "15s"
# the Prometheus Pushgateway address.
# address: ""
rocksdb:
# the maximum number of concurrent background jobs.
max-background-jobs: 32
defaultcf:
# amount of data to build up in memory before flushing data to the disk.
write-buffer-size: "1GB"
# the maximum number of write buffers that are built up in memory.
max-write-buffer-number: 8
# the compression algorithms used in different levels.
# the algorithm at level-0 is used to compress KV data.
# the algorithm at level-6 is used to compress SST files.
# the algorithms at level-1 ~ level-5 are not used now.
compression-per-level: ["lz4", "no", "no", "no", "no", "no", "lz4"]
writecf:
compression-per-level: ["lz4", "no", "no", "no", "no", "no", "lz4"]
import:
# this directory is used to store the data written by `tidb-lightning`.
# import-dir: "/tmp/tikv/import"
# the number of threads to handle RPC requests.
num-threads: 16
# the number of concurrent import jobs.
num-import-jobs: 24
# the stream channel window size. Stream will be blocked when the channel is full.
stream-channel-window: 128
# maximum duration to prepare regions.
# max-prepare-duration = "5m"
# split regions into this size according to the importing data.
# region-split-size = "512MB"
# max-open-engines must be >= index-concurrency + table-concurrency value in tidb-lightning.toml
max-open-engines: 8
# speed limit of uploading SST to TiKV (unit: byte/s)
# upload-speed-limit: "512MB"
# minimum ratio of target store available space: store_available_space / store_capacity
# Importer will pause to upload SST to target store if its available ratio less than
# this value, and give the store some time window to balance regions.
min-available-ratio: 0.05
# Note: the machine's memory size should be more than
# (write-buffer-size * max-write-buffer-number * 2) + (num-import-jobs * region-split-size * 2)
================================================
FILE: conf/tikv.yml
================================================
---
## The default configuration file for TiKV in YAML format
## TiKV config template
## Human-readable big numbers:
## File size(based on byte): KB, MB, GB, TB, PB
## e.g.: 1_048_576 = "1MB"
## Time(based on ms): ms, s, m, h
## e.g.: 78_000 = "1.3m"
global:
## Log levels: trace, debug, info, warning, error, critical.
## Note that `debug` and `trace` are only available in development builds.
# log-level: "info"
## Timespan between rotating the log files.
## Once this timespan passes, log files will be rotated, i.e. existing log file will have a
## timestamp appended to its name and a new file will be created.
# log-rotation-timespan: "24h"
readpool:
## Configurations for the single thread pool serving read requests.
unified:
## The minimal working thread count of the thread pool.
# min-thread-count: 1
## The maximum working thread count of the thread pool.
## The default value is max(4, LOGICAL_CPU_NUM * 0.8).
# max-thread-count: 8
## Size of the stack for each thread in the thread pool.
# stack-size: "10MB"
## Max running tasks of each worker, reject if exceeded.
# max-tasks-per-worker: 2000
storage:
## Whether to use the unified read pool to handle storage requests.
# use-unified-pool: false
## The following configurations only take effect when `use-unified-pool` is false.
## Size of the thread pool for high-priority operations.
# high-concurrency: 4
## Size of the thread pool for normal-priority operations.
# normal-concurrency: 4
## Size of the thread pool for low-priority operations.
# low-concurrency: 4
## Max running high-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-high: 2000
## Max running normal-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-normal: 2000
## Max running low-priority operations of each worker, reject if exceeded.
# max-tasks-per-worker-low: 2000
## Size of the stack for each thread in the thread pool.
# stack-size: "10MB"
coprocessor:
## Whether to use the unified read pool to handle storage requests.
# use-unified-pool: true
## The following configurations only take effect when `use-unified-pool` is false.
## Most read requests from TiDB are sent to the coprocessor of TiKV. high/normal/low-concurrency is
## used to set the number of threads of the coprocessor.
## If there are many read requests, you can increase these config values (but keep it within the
## number of system CPU cores). For example, for a 32-core machine deployed with TiKV, you can even
## set these config to 30 in heavy read scenarios.
## If CPU_NUM > 8, the default thread pool size for coprocessors is set to CPU_NUM * 0.8.
# high-concurrency: 8
# normal-concurrency: 8
# low-concurrency: 8
# max-tasks-per-worker-high: 2000
# max-tasks-per-worker-normal: 2000
# max-tasks-per-worker-low: 2000
# stack-size: "10MB"
server:
## Advertise listening address for client communication.
## If not set, `addr` will be used.
# advertise-addr: ""
## Size of the thread pool for the gRPC server.
# grpc-concurrency: 4
## The number of max concurrent streams/requests on a client connection.
# grpc-concurrent-stream: 1024
## The number of connections with each TiKV server to send Raft messages.
# grpc-raft-conn-num: 1
## Amount to read ahead on individual gRPC streams.
# grpc-stream-initial-window-size: "2MB"
## Time to wait before sending out a ping to check if server is still alive.
## This is only for communications between TiKV instances.
# grpc-keepalive-time: "10s"
## Time to wait before closing the connection without receiving KeepAlive ping Ack.
# grpc-keepalive-timeout: "3s"
## How many snapshots can be sent concurrently.
# concurrent-send-snap-limit: 32
## How many snapshots can be received concurrently.
# concurrent-recv-snap-limit: 32
## Max allowed recursion level when decoding Coprocessor DAG expression.
# end-point-recursion-limit: 1000
## Max time to handle Coprocessor requests before timeout.
# end-point-request-max-handle-duration: "60s"
## Max bytes that snapshot can be written to disk in one second.
## It should be set based on your disk performance.
# snap-max-write-bytes-per-sec: "100MB"
## Attributes about this server, e.g. `{ zone = "us-west-1", disk = "ssd" }`.
# labels: {}
storage:
## The number of slots in Scheduler latches, which controls write concurrency.
## In most cases you can use the default value. When importing data, you can set it to a larger
## value, but no more than 2097152
# scheduler-concurrency: 524288
## Scheduler's worker pool size, i.e. the number of write threads.
## It should be less than total CPU cores. When there are frequent write operations, set it to a
## higher value. More specifically, you can run `top -H -p tikv-pid` to check whether the threads
## named `sched-worker-pool` are busy.
# scheduler-worker-pool-size: 4
## When the pending write bytes exceeds this threshold, the "scheduler too busy" error is displayed.
# scheduler-pending-write-threshold: "100MB"
## TiKV will create a temporary file in {{data-dir}} to reserve some space, which is named 'space_placeholder_file'.
## When the disk has no free space you could remove this temporary file so thath TiKV can execute compaction
## job to reclaim disk space, which requires some extra temporary space.
# reserve-space: "2GB"
block-cache:
## Whether to create a shared block cache for all RocksDB column families.
##
## Block cache is used by RocksDB to cache uncompressed blocks. Big block cache can speed up
## read. It is recommended to turn on shared block cache. Since only the total cache size need
## to be set, it is easier to config. In most cases it should be able to auto-balance cache
## usage between column families with standard LRU algorithm.
##
## The rest of config in the storage.block-cache session is effective only when shared block
## cache is on.
# shared: true
## Size of the shared block cache. Normally it should be tuned to 30%-50% of system's total
## memory. When the config is not set, it is decided by the sum of the following fields or
## their default value:
## * rocksdb.defaultcf.block-cache-size or 25% of system's total memory
## * rocksdb.writecf.block-cache-size or 15% of system's total memory
## * rocksdb.lockcf.block-cache-size or 2% of system's total memory
## * raftdb.defaultcf.block-cache-size or 2% of system's total memory
##
## To deploy multiple TiKV nodes on a single physical machine, configure this parameter
## explicitly. Otherwise, the OOM problem might occur in TiKV.
# capacity: "1GB"
pd:
## PD endpoints.
# endpoints: []
metric:
## Prometheus client push interval.
## Setting the value to 0s stops Prometheus client from pushing.
# interval: "15s"
## Prometheus PushGateway address.
## Leaving it empty stops Prometheus client from pushing.
# address: ""
## Prometheus client push job name.
## Note: A node id will automatically append, e.g., "tikv_1".
# job: "tikv"
raftstore:
## Store capacity, i.e. max data size allowed.
## If it is not set, disk capacity is used.
# capacity: 0
## Internal notify capacity.
## 40960 is suitable for about 7000 Regions. It is recommended to use the default value.
# notify-capacity: 40960
## Maximum number of internal messages to process in a tick.
# messages-per-tick: 4096
## Region heartbeat tick interval for reporting to PD.
# pd-heartbeat-tick-interval: "60s"
## Store heartbeat tick interval for reporting to PD.
# pd-store-heartbeat-tick-interval: "10s"
## How long the peer will be considered down and reported to PD when it hasn't been active for this
## time.
# max-peer-down-duration: "5m"
## Interval to check whether to start manual compaction for a Region.
# region-compact-check-interval: "5m"
## Interval (s) to check Region whether the data are consistent.
# consistency-check-interval: 0
## Delay time before deleting a stale peer.
# clean-stale-peer-delay: "10m"
## Use how many threads to handle log apply
# apply-pool-size: 2
## Use how many threads to handle raft messages
# store-pool-size: 2
coprocessor:
rocksdb:
## Maximum number of threads of RocksDB background jobs.
## The background tasks include compaction and flush. For detailed information why RocksDB needs to
## do compaction, see RocksDB-related materials. When write traffic (like the importing data size)
## is big, it is recommended to enable more threads. But set the number of the enabled threads
## smaller than that of CPU cores. For example, when importing data, for a machine with a 32-core
## CPU, set the value to 28.
# max-background-jobs: 8
## Represents the maximum number of threads that will concurrently perform a sub-compaction job by
## breaking it into multiple, smaller ones running simultaneously.
# max-sub-compactions: 1
## Number of open files that can be used by the DB.
## Value -1 means files opened are always kept open and RocksDB will prefetch index and filter
## blocks into block cache at startup. So if your database has a large working set, it will take
## several minutes to open the DB. You may need to increase this if your database has a large
## working set. You can estimate the number of files based on `target-file-size-base` and
## `target_file_size_multiplier` for level-based compaction.
# max-open-files: 40960
## RocksDB Write-Ahead Logs (WAL) recovery mode.
## 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs;
## 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL;
## 2 : PointInTimeRecovery, Recover to point-in-time consistency;
## 3 : SkipAnyCorruptedRecords, Recovery after a disaster;
# wal-recovery-mode: 2
## RocksDB WAL directory.
## This config specifies the absolute directory path for WAL.
## If it is not set, the log files will be in the same directory as data. When you set the path to
## RocksDB directory in memory like in `/dev/shm`, you may want to set`wal-dir` to a directory on a
## persistent storage. See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
## If there are two disks on the machine, storing RocksDB data and WAL logs on different disks can
## improve performance.
# wal-dir: "/tmp/tikv/store"
## The following two fields affect how archived WAL will be deleted.
## 1. If both values are set to 0, logs will be deleted ASAP and will not get into the archive.
## 2. If `wal-ttl-seconds` is 0 and `wal-size-limit` is not 0, WAL files will be checked every 10
## min and if total size is greater than `wal-size-limit`, they will be deleted starting with the
## earliest until `wal-size-limit` is met. All empty files will be deleted.
## 3. If `wal-ttl-seconds` is not 0 and `wal-size-limit` is 0, then WAL files will be checked every
## `wal-ttl-seconds / 2` and those that are older than `wal-ttl-seconds` will be deleted.
## 4. If both are not 0, WAL files will be checked every 10 min and both checks will be performed
## with ttl being first.
## When you set the path to RocksDB directory in memory like in `/dev/shm`, you may want to set
## `wal-ttl-seconds` to a value greater than 0 (like 86400) and backup your DB on a regular basis.
## See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database .
# wal-ttl-seconds: 0
# wal-size-limit: 0
## Max RocksDB WAL size in total
# max-total-wal-size: "4GB"
## RocksDB Statistics provides cumulative stats over time.
## Turning statistics on will introduce about 5%-10% overhead for RocksDB, but it can help you to
## know the internal status of RocksDB.
# enable-statistics: true
## Dump statistics periodically in information logs.
## Same as RocksDB's default value (10 min).
# stats-dump-period: "10m"
## Refer to: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ
## If you want to use RocksDB on multi disks or spinning disks, you should set value at least 2MB.
# compaction-readahead-size: 0
## Max buffer size that is used by WritableFileWrite.
# writable-file-max-buffer-size: "1MB"
## Use O_DIRECT for both reads and writes in background flush and compactions.
# use-direct-io-for-flush-and-compaction: false
## Allows OS to incrementally sync files to disk while they are being written, asynchronously,
## in the background.
# bytes-per-sync: "1MB"
## Allows OS to incrementally sync WAL to disk while it is being written.
# wal-bytes-per-sync: "512KB"
## Options for `Titan`.
titan:
## Enables or disables `Titan`. Note that Titan is still an experimental feature. Once
## enabled, it can't fall back. Forced fallback may result in data loss.
# enabled: false
## Maximum number of threads of `Titan` background gc jobs.
# max-background-gc: 1
## Options for "Default" Column Family, which stores actual user data.
defaultcf:
## Compression method (if any) is used to compress a block.
## no: kNoCompression
## snappy: kSnappyCompression
## zlib: kZlibCompression
## bzip2: kBZip2Compression
## lz4: kLZ4Compression
## lz4hc: kLZ4HCCompression
## zstd: kZSTD
## `lz4` is a compression algorithm with moderate speed and compression ratio. The compression
## ratio of `zlib` is high. It is friendly to the storage space, but its compression speed is
## slow. This compression occupies many CPU resources.
## Per level compression.
## This config should be chosen carefully according to CPU and I/O resources. For example, if you
## use the compression mode of "no:no:lz4:lz4:lz4:zstd:zstd" and find much I/O pressure of the
## system (run the `iostat` command to find %util lasts 100%, or run the `top` command to find many
## iowaits) when writing (importing) a lot of data while the CPU resources are adequate, you can
## compress level-0 and level-1 and exchange CPU resources for I/O resources. If you use the
## compression mode of "no:no:lz4:lz4:lz4:zstd:zstd" and you find the I/O pressure of the system is
## not big when writing a lot of data, but CPU resources are inadequate. Then run the `top` command
## and choose the `-H` option. If you find a lot of bg threads (namely the compression thread of
## RocksDB) are running, you can exchange I/O resources for CPU resources and change the compression
## mode to "no:no:no:lz4:lz4:zstd:zstd". In a word, it aims at making full use of the existing
## resources of the system and improving TiKV performance in terms of the current resources.
# compression-per-level: ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
## The data block size. RocksDB compresses data based on the unit of block.
## Similar to page in other databases, block is the smallest unit cached in block-cache. Note that
## the block size specified here corresponds to uncompressed data.
# block-size: "64KB"
## If you're doing point lookups you definitely want to turn bloom filters on. We use bloom filters
## to avoid unnecessary disk reads. Default bits_per_key is 10, which yields ~1% false positive
## rate. Larger `bloom-filter-bits-per-key` values will reduce false positive rate, but increase
## memory usage and space amplification.
# bloom-filter-bits-per-key: 10
# level0-file-num-compaction-trigger: 4
## Soft limit on number of level-0 files.
## When the number of SST files of level-0 reaches the limit of `level0-slowdown-writes-trigger`,
## RocksDB tries to slow down the write operation, because too many SST files of level-0 can cause
## higher read pressure of RocksDB.
# level0-slowdown-writes-trigger: 20
## Maximum number of level-0 files.
## When the number of SST files of level-0 reaches the limit of `level0-stop-writes-trigger`,
## RocksDB stalls the new write operation.
# level0-stop-writes-trigger: 36
## Amount of data to build up in memory (backed by an unsorted log on disk) before converting to a
## sorted on-disk file. It is the RocksDB MemTable size.
# write-buffer-size: "128MB"
## The maximum number of the MemTables. The data written into RocksDB is first recorded in the WAL
## log, and then inserted into MemTables. When the MemTable reaches the size limit of
## `write-buffer-size`, it turns into read only and generates a new MemTable receiving new write
## operations. The flush threads of RocksDB will flush the read only MemTable to the disks to become
## an SST file of level0. `max-background-flushes` controls the maximum number of flush threads.
## When the flush threads are busy, resulting in the number of the MemTables waiting to be flushed
## to the disks reaching the limit of `max-write-buffer-number`, RocksDB stalls the new operation.
## "Stall" is a flow control mechanism of RocksDB. When importing data, you can set the
## `max-write-buffer-number` value higher, like 10.
# max-write-buffer-number: 5
## The minimum number of write buffers that will be merged together before writing to storage.
# min-write-buffer-number-to-merge: 1
## Control maximum total data size for base level (level 1).
## When the level-1 data size reaches the limit value of `max-bytes-for-level-base`, the SST files
## of level-1 and their overlap SST files of level-2 will be compacted. The golden rule: the first
## reference principle of setting `max-bytes-for-level-base` is guaranteeing that the
## `max-bytes-for-level-base` value is roughly equal to the data volume of level-0. Thus
## unnecessary compaction is reduced. For example, if the compression mode is
## "no:no:lz4:lz4:lz4:lz4:lz4", the `max-bytes-for-level-base` value can be `write-buffer-size * 4`,
## because there is no compression of level-0 and level-1 and the trigger condition of compaction
## for level-0 is that the number of the SST files reaches 4 (the default value). When both level-0
## and level-1 adopt compaction, it is necessary to analyze RocksDB logs to know the size of an SST
## file compressed from a MemTable. For example, if the file size is 32MB, the proposed value of
## `max-bytes-for-level-base` is 32MB * 4 = 128MB.
# max-bytes-for-level-base: "512MB"
## Target file size for compaction.
## The SST file size of level-0 is influenced by the compaction algorithm of `write-buffer-size`
## and level0. `target-file-size-base` is used to control the size of a single SST file of level1 to
## level6.
# target-file-size-base: "8MB"
## Max bytes for `compaction.max_compaction_bytes`.
# max-compaction-bytes: "2GB"
## There are four different compaction priorities.
## 0 : ByCompensatedSize
## 1 : OldestLargestSeqFirst
## 2 : OldestSmallestSeqFirst
## 3 : MinOverlappingRatio
# compaction-pri: 3
## Enable read amplification statistics.
## value => memory usage (percentage of loaded blocks memory)
## 1 => 12.50 %
## 2 => 06.25 %
## 4 => 03.12 %
## 8 => 01.56 %
## 16 => 00.78 %
# read-amp-bytes-per-bit: 0
## Options for "Titan" for "Default" Column Family
titan:
## The smallest value to store in blob files. Value smaller than
## this threshold will be inlined in base DB.
# min-blob-size: "1KB"
## The compression algorithm used to compress data in blob files.
## Compression method.
## no: kNoCompression
## snappy: kSnappyCompression
## zlib: kZlibCompression
## bzip2: kBZip2Compression
## lz4: kLZ4Compression
## lz4hc: kLZ4HCCompression
## zstd: kZSTD
# blob-file-compression: "lz4"
## Specifics cache size for blob records
# blob-cache-size: "0GB"
## If the ratio of discardable size of a blob file is larger than
## this threshold, the blob file will be GCed out.
# discardable-ratio: 0.5
## The mode used to process blob files. In read-only mode Titan
## stops writing value into blob log. In fallback mode Titan
## converts blob index into real value on flush and compaction.
## This option is especially useful for downgrading Titan.
## default: kNormal
## read-only: kReadOnly
## fallback: kFallback
# blob-run-mode: "normal"
## Options for "Write" Column Family, which stores MVCC commit information
writecf:
## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level: ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
# block-size: "64KB"
## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size: "128MB"
# max-write-buffer-number: 5
# min-write-buffer-number-to-merge: 1
## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`.
# max-bytes-for-level-base: "512MB"
# target-file-size-base: "8MB"
# level0-file-num-compaction-trigger: 4
# level0-slowdown-writes-trigger: 20
# level0-stop-writes-trigger: 36
# cache-index-and-filter-blocks: true
# pin-l0-filter-and-index-blocks: true
# compaction-pri: 3
# read-amp-bytes-per-bit: 0
# dynamic-level-bytes: true
lockcf:
# compression-per-level: ["no", "no", "no", "no", "no", "no", "no"]
# block-size: "16KB"
# write-buffer-size: "128MB"
# max-write-buffer-number: 5
# min-write-buffer-number-to-merge: 1
# max-bytes-for-level-base: "128MB"
# target-file-size-base: "8MB"
# level0-slowdown-writes-trigger: 20
# level0-stop-writes-trigger: 36
# cache-index-and-filter-blocks: true
# pin-l0-filter-and-index-blocks: true
# compaction-pri: 0
# read-amp-bytes-per-bit: 0
# dynamic-level-bytes: true
raftdb:
# max-background-jobs: 4
# max-sub-compactions: 2
# max-open-files: 40960
# max-manifest-file-size: "20MB"
# create-if-missing: true
# enable-statistics: true
# stats-dump-period: "10m"
# compaction-readahead-size: 0
# writable-file-max-buffer-size: "1MB"
# use-direct-io-for-flush-and-compaction: false
# enable-pipelined-write: true
# allow-concurrent-memtable-write: false
# bytes-per-sync: "1MB"
# wal-bytes-per-sync: "512KB"
# info-log-max-size: "1GB"
# info-log-roll-time: "0"
# info-log-keep-log-file-num: 10
# info-log-dir: ""
# optimize-filters-for-hits: true
defaultcf:
## Recommend to set it the same as `rocksdb.defaultcf.compression-per-level`.
# compression-per-level: ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
# block-size: "64KB"
## Recommend to set it the same as `rocksdb.defaultcf.write-buffer-size`.
# write-buffer-size: "128MB"
# max-write-buffer-number: 5
# min-write-buffer-number-to-merge: 1
## Recommend to set it the same as `rocksdb.defaultcf.max-bytes-for-level-base`.
# max-bytes-for-level-base: "512MB"
# target-file-size-base: "8MB"
# level0-file-num-compaction-trigger: 4
# level0-slowdown-writes-trigger: 20
# level0-stop-writes-trigger: 36
# cache-index-and-filter-blocks: true
# pin-l0-filter-and-index-blocks: true
# compaction-pri: 0
# read-amp-bytes-per-bit: 0
# dynamic-level-bytes: true
# optimize-filters-for-hits: true
security:
## The path for TLS certificates. Empty string means disabling secure connections.
# ca-path: ""
# cert-path: ""
# key-path: ""
# cert-allowed-cn: []
## Configurations for encryption at rest. Experimental.
encryption:
## Encryption method to use for data files.
## Possible values are "plaintext", "aes128-ctr", "aes192-ctr" and "aes256-ctr". Value other than
## "plaintext" means encryption is enabled, in which case master key must be specified.
# data-encryption-method: "plaintext"
## Specifies how often TiKV rotates data encryption key.
# data-key-rotation-period = "7d"
## Specifies master key if encryption is enabled. There are three types of master key:
##
## * "plaintext":
##
## Plaintext as master key means no master key is given and only applicable when
## encryption is not enabled, i.e. data-encryption-method = "plaintext". This type doesn't
## have sub-config items. Example:
##
## master-key:
## type: "plaintext"
##
## * "kms":
##
## Use a KMS service to supply master key. Currently only AWS KMS is supported. This type of
## master key is recommended for production use. Example:
##
## master-key:
## type: "kms"
## ## KMS CMK key id. Must be a valid KMS CMK where the TiKV process has access to.
## ## In production is recommended to grant access of the CMK to TiKV using IAM.
## key-id = "1234abcd-12ab-34cd-56ef-1234567890ab"
## ## AWS region of the KMS CMK.
## region: "us-west-2"
## ## (Optional) AWS KMS service endpoint. Only required when non-default KMS endpoint is
## ## desired.
## endpoint: "https://kms.us-west-2.amazonaws.com"
##
## * "file":
##
## Supply a custom encryption key stored in a file. It is recommended NOT to use in production,
## as it breaks the purpose of encryption at rest, unless the file is stored in tempfs.
## The file must contain a 256-bits (32 bytes, regardless of key length implied by
## data-encryption-method) key encoded as hex string and end with newline ("\n"). Example:
##
## master-key:
## type: "file"
## path: "/path/to/master/key/file"
##
# master-key:
# type = "plaintext"
## Specifies the old master key when rotating master key. Same config format as master-key.
## The key is only access once during TiKV startup, after that TiKV do not need access to the key.
## And it is okay to leave the stale previous-master-key config after master key rotation.
# previous-master-key:
# type: "plaintext"
import:
pessimistic_txn:
## Enable pessimistic transaction
# enabled: true
## Time to wait in milliseconds before responding to TiDB when pessimistic
## transactions encounter locks
# wait-for-lock-timeout: "1s"
## If more than one transaction is waiting for the same lock, only the one with smallest
## start timestamp will be waked up immediately when the lock is released. Others will
## be waked up after `wake_up_delay_duration(ms)` to reduce contention and make the oldest
## one more likely acquires the lock.
# wake-up-delay-duration: "20ms"
gc:
## The number of keys to GC in one batch.
# batch-keys: 512
## Max bytes that GC worker can write to rocksdb in one second.
## If it is set to 0, there is no limit.
# max-write-bytes-per-sec: "0"
================================================
FILE: create_users.yml
================================================
---
- hosts: all
tasks:
- name: create user
user: name={{ username }} shell=/bin/bash createhome=yes
- name: set authorized key
authorized_key:
user: "{{ username }}"
key: "{{ lookup('file', lookup('env','HOME')+ '/.ssh/id_rsa.pub') }}"
state: present
- name: update sudoers file
lineinfile:
dest: /etc/sudoers
insertafter: EOF
line: '{{ username }} ALL=(ALL) NOPASSWD: ALL'
regexp: '^{{ username }} .*'
state: present
================================================
FILE: deploy.yml
================================================
---
# Copyright 2016 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# The Playbook of TiDB
- name: check config locally
hosts: localhost
any_errors_fatal: true
tags:
- always
roles:
- check_config_static
- name: check system environment
hosts: monitored_servers
any_errors_fatal: true
tags:
- always
roles:
- check_system_dynamic
- name: initializing deployment target
hosts: all
any_errors_fatal: true
tags:
- always
roles:
- check_config_dynamic
- name: Pre-check PD configuration
hosts: pd_servers[0]
tags:
- pd
roles:
- check_config_pd
- name: Pre-check TiKV configuration
hosts: tikv_servers[0]
tags:
- tikv
roles:
- check_config_tikv
- name: Pre-check TiDB configuration
hosts: tidb_servers[0]
tags:
- tidb
roles:
- check_config_tidb
- name: deploying node_exporter
hosts: monitored_servers
tags:
- node_exporter
roles:
- node_exporter
- name: deploying blackbox_exporter
hosts: monitored_servers
tags:
- blackbox_exporter
roles:
- blackbox_exporter
- name: deploying diagnostic tools
hosts: monitored_servers
tags:
- collect_diagnosis
roles:
- collect_diagnosis
- name: deploying alertmanager
hosts: alertmanager_servers
tags:
- alertmanager
roles:
- alertmanager
- name: deploying pushgateway
hosts: monitoring_servers
tags:
- pushgateway
roles:
- pushgateway
- name: deploying prometheus
hosts: monitoring_servers
tags:
- prometheus
roles:
- prometheus
- name: deploying grafana
hosts: grafana_servers
tags:
- grafana
roles:
- grafana
- name: deploying kafka_exporter
hosts: kafka_exporter_servers
tags:
- kafka_exporter
roles:
- { role: kafka_exporter, when: 'enable_binlog|default(false) and kafka_addrs|default("") != ""' }
# deploying TiDB cluster
- name: deploying PD cluster
hosts: pd_servers
tags:
- pd
roles:
- pd
- name: deploying TiKV cluster
hosts: tikv_servers
tags:
- tikv
roles:
- tikv
- name: deploying TiFlash cluster
hosts: tiflash_servers
tags:
- tiflash
roles:
- { role: tiflash, when: cpu_architecture == 'amd64' }
- name: deploying pump cluster
hosts: pump_servers
tags:
- pump
roles:
- { role: pump, when: enable_binlog|default(false) }
- name: deploying TiDB cluster
hosts: tidb_servers
tags:
- tidb
roles:
- tidb
- { role: tispark, when: "(groups.get('spark_master', []) | length == 0 or groups.get('spark_slaves', []) | length == 0) and (deployment_method == 'binary')" }
- name: deploying tispark cluster
hosts: spark_master,spark_slaves
tags:
- tispark
roles:
- { role: tispark, when: "groups.get('spark_master', []) | length != 0 and groups.get('spark_slaves', []) | length != 0 and deployment_method == 'binary'" }
- name: deploying tidb-lightning
hosts: lightning_server
tags:
- lightning
roles:
- tidb_lightning
- name: deploying tikv-importer
hosts: importer_server
tags:
- lightning
roles:
- tikv_importer
- name: finalizing deployment target
hosts: all
become: true
roles:
- { role: firewalld, when: enable_firewalld is defined and enable_firewalld }
- name: deploying perf-tools
hosts: monitored_servers
tags:
- always
roles:
- perf_tools
================================================
FILE: deploy_drainer.yml
================================================
---
# Copyright 2018 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# The Playbook of TiDB
- name: check config locally
hosts: localhost
any_errors_fatal: true
tags:
- always
roles:
- check_config_static
- name: check system environment
hosts: monitored_servers
any_errors_fatal: true
tags:
- always
roles:
- check_system_dynamic
- name: initializing deployment target
hosts: all
any_errors_fatal: true
tags:
- always
roles:
- check_config_dynamic
- name: deploying drainer(binlog cluster)
hosts: drainer_servers
tags:
- drainer
roles:
- { role: drainer, when: enable_binlog|default(false) }
- name: finalizing deployment target
hosts: all
become: true
roles:
- { role: firewalld, when: enable_firewalld is defined and enable_firewalld }
================================================
FILE: deploy_ntp.yml
================================================
---
- hosts: all
tasks:
- name: get facts
setup:
- name: RedHat family Linux distribution - make sure ntp, ntpstat have been installed
yum:
name: "{{ item }}"
state: present
with_items:
- ntp
when:
- ansible_os_family == "RedHat"
- name: RedHat family Linux distribution - make sure ntpdate have been installed
yum:
name: "{{ item }}"
state: present
with_items:
- ntpdate
when:
- ansible_os_family == "RedHat"
- ntp_server is defined
- name: Debian family Linux distribution - make sure ntp, ntpstat have been installed
apt:
name: "{{ item }}"
state: present
with_items:
- ntp
- ntpstat
when:
- ansible_os_family == "Debian"
- name: Debian family Linux distribution - make sure ntpdate have been installed
apt:
name: "{{ item }}"
state: present
with_items:
- ntpdate
when:
- ansible_os_family == "Debian"
- ntp_server is defined
- name: RedHat family Linux distribution - make sure ntpd service has been stopped
service:
name: ntpd
state: stopped
when:
- ansible_os_family == "RedHat"
- ntp_server is defined
- name: Debian family Linux distribution - make sure ntp service has been stopped
service:
name: ntp
state: stopped
when:
- ansible_os_family == "Debian"
- ntp_server is defined
- name: Adjust Time | start to adjust time with {{ ntp_server }}
shell: ntpdate {{ ntp_server }}
when: ntp_server is defined
- name: RedHat family Linux distribution - make sure ntpd service has been started
service:
name: ntpd
state: started
when:
- ansible_os_family == "RedHat"
- name: Debian family Linux distribution - Make sure ntp service has been started
service:
name: ntp
state: started
when:
- ansible_os_family
gitextract_3y37qitl/ ├── .gitignore ├── LICENSE ├── README.md ├── ansible.cfg ├── bootstrap.yml ├── callback_plugins/ │ ├── help.py │ └── yaml.py ├── clean_log_cron.yml ├── cloud/ │ └── aws-ansible/ │ ├── aws_bootstrap.yml │ ├── aws_inventory_file_generate.yml │ ├── aws_prepare.yml │ ├── aws_teardown.yml │ ├── ec2.ini │ ├── ec2.py │ ├── files/ │ │ └── sources.list │ ├── roles/ │ │ └── aws/ │ │ └── tasks/ │ │ └── main.yml │ ├── templates/ │ │ └── aws.inventory.ini.j2 │ └── vars.yml ├── collect_diagnosis.yml ├── common_tasks/ │ ├── add_evict_leader_scheduler.yml │ ├── create_grafana_api_keys.yml │ ├── get_pd_leader.yml │ ├── get_pd_leader_tls.yml │ ├── get_pd_name.yml │ ├── get_pd_name_tls.yml │ ├── get_pd_tikv_addr.yml │ ├── get_store_id.yml │ ├── get_store_id_tls.yml │ ├── remove_evict_leader_scheduler.yml │ └── transfer_pd_leader.yml ├── conf/ │ ├── alertmanager.yml │ ├── drainer.toml │ ├── pd.yml │ ├── pump.yml │ ├── spark-defaults.yml │ ├── spark-env.yml │ ├── ssl/ │ │ ├── ca-config.json │ │ └── ca-csr.json │ ├── tidb-lightning.yml │ ├── tidb.yml │ ├── tiflash-learner.yml │ ├── tiflash.yml │ ├── tikv-importer.yml │ └── tikv.yml ├── create_users.yml ├── deploy.yml ├── deploy_drainer.yml ├── deploy_ntp.yml ├── excessive_rolling_update.yml ├── filter_plugins/ │ └── tags.py ├── graceful_stop.yml ├── group_vars/ │ ├── alertmanager_servers.yml │ ├── all.yml │ ├── drainer_servers.yml │ ├── grafana_servers.yml │ ├── importer_server.yml │ ├── lightning_server.yml │ ├── monitored_servers.yml │ ├── monitoring_servers.yml │ ├── pd_servers.yml │ ├── pump_servers.yml │ ├── tidb_servers.yml │ ├── tiflash_servers.yml │ └── tikv_servers.yml ├── hosts.ini ├── inventory.ini ├── library/ │ ├── coreos_facts │ ├── docker_facts │ └── wait_for_pid.py ├── local_prepare.yml ├── log/ │ └── .gitignore ├── migrate_monitor.yml ├── requirements.txt ├── roles/ │ ├── alertmanager/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ └── templates/ │ │ ├── run_alertmanager_binary.sh.j2 │ │ └── run_alertmanager_docker.sh.j2 │ ├── blackbox_exporter/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ └── templates/ │ │ ├── blackbox.yml.j2 │ │ ├── run_blackbox_exporter_binary.sh.j2 │ │ └── run_blackbox_exporter_docker.sh.j2 │ ├── bootstrap/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ └── tasks/ │ │ ├── main.yml │ │ └── root_tasks.yml │ ├── check_config_dynamic/ │ │ └── tasks/ │ │ └── main.yml │ ├── check_config_pd/ │ │ └── tasks/ │ │ └── main.yml │ ├── check_config_static/ │ │ └── tasks/ │ │ └── main.yml │ ├── check_config_tidb/ │ │ └── tasks/ │ │ └── main.yml │ ├── check_config_tikv/ │ │ └── tasks/ │ │ └── main.yml │ ├── check_system_dynamic/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ └── tasks/ │ │ └── main.yml │ ├── check_system_optional/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ └── tasks/ │ │ └── main.yml │ ├── check_system_static/ │ │ └── tasks/ │ │ └── main.yml │ ├── clean_log_pd/ │ │ └── tasks/ │ │ ├── add_cron.yml │ │ ├── del_cron.yml │ │ └── main.yml │ ├── clean_log_tidb/ │ │ └── tasks/ │ │ ├── add_cron.yml │ │ ├── del_cron.yml │ │ └── main.yml │ ├── clean_log_tikv/ │ │ └── tasks/ │ │ ├── add_cron.yml │ │ ├── del_cron.yml │ │ └── main.yml │ ├── collect_diagnosis/ │ │ ├── meta/ │ │ │ └── main.yml │ │ └── tasks/ │ │ └── main.yml │ ├── collector_host/ │ │ └── tasks/ │ │ ├── collect_log.yml │ │ └── main.yml │ ├── collector_pd/ │ │ └── tasks/ │ │ ├── collect_config.yml │ │ ├── collect_log.yml │ │ └── main.yml │ ├── collector_prometheus/ │ │ └── tasks/ │ │ └── main.yml │ ├── collector_pump/ │ │ └── tasks/ │ │ ├── collect_log.yml │ │ └── main.yml │ ├── collector_tidb/ │ │ └── tasks/ │ │ ├── collect_config.yml │ │ ├── collect_log.yml │ │ └── main.yml │ ├── collector_tikv/ │ │ └── tasks/ │ │ ├── collect_config.yml │ │ ├── collect_log.yml │ │ └── main.yml │ ├── common_dir/ │ │ └── tasks/ │ │ └── main.yml │ ├── dashboard_topo/ │ │ ├── tasks/ │ │ │ └── main.yml │ │ └── templates/ │ │ └── init_dashboard_topo.sh.j2 │ ├── drainer/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── files/ │ │ │ └── make-ssl.sh │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── check_certs.yml │ │ │ ├── gen_certs.yml │ │ │ ├── install_certs.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ ├── templates/ │ │ │ └── run_drainer_binary.sh.j2 │ │ └── vars/ │ │ └── default.yml │ ├── firewalld/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── handlers/ │ │ │ └── main.yml │ │ └── tasks/ │ │ └── main.yml │ ├── grafana/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ ├── systemd_deployment.yml │ │ │ └── tasks.yml │ │ └── templates/ │ │ ├── data_source.json.j2 │ │ ├── grafana.ini.j2 │ │ ├── run_grafana_binary.sh.j2 │ │ └── run_grafana_docker.sh.j2 │ ├── kafka_exporter/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ └── templates/ │ │ └── run_kafka_exporter_binary.sh.j2 │ ├── local/ │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── docker_deployment.yml │ │ │ └── main.yml │ │ └── templates/ │ │ ├── binary_packages.yml.j2 │ │ ├── common_packages.yml.j2 │ │ └── docker_packages.yml.j2 │ ├── machine_benchmark/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ └── tasks/ │ │ ├── fio_randread.yml │ │ ├── fio_randread_write.yml │ │ ├── fio_randread_write_latency.yml │ │ └── main.yml │ ├── node_exporter/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ └── templates/ │ │ ├── run_node_exporter_binary.sh.j2 │ │ └── run_node_exporter_docker.sh.j2 │ ├── ops/ │ │ ├── tasks/ │ │ │ └── main.yml │ │ └── templates/ │ │ ├── check_tikv.sh.j2 │ │ └── pd-ctl.sh.j2 │ ├── pd/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── files/ │ │ │ └── make-ssl.sh │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── check_certs.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── gen_certs.yml │ │ │ ├── install_certs.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ ├── templates/ │ │ │ ├── pd.toml.j2 │ │ │ ├── run_pd_binary.sh.j2 │ │ │ └── run_pd_docker.sh.j2 │ │ └── vars/ │ │ └── default.yml │ ├── perf_tools/ │ │ └── tasks/ │ │ └── main.yml │ ├── pre-ansible/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ └── tasks/ │ │ ├── coreos.yml │ │ ├── main.yml │ │ └── root_tasks.yml │ ├── prometheus/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── files/ │ │ │ ├── binlog.rules.yml │ │ │ ├── blacker.rules.yml │ │ │ ├── bypass.rules.yml │ │ │ ├── kafka.rules.yml │ │ │ ├── lightning.rules.yml │ │ │ ├── node.rules.yml │ │ │ ├── pd.rules.yml │ │ │ ├── tidb.rules.yml │ │ │ ├── tiflash.rules.yml │ │ │ ├── tikv.accelerate.rules.yml │ │ │ └── tikv.rules.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ └── templates/ │ │ ├── prometheus.yml.j2 │ │ ├── run_prometheus_binary.sh.j2 │ │ └── run_prometheus_docker.sh.j2 │ ├── pump/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── files/ │ │ │ └── make-ssl.sh │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── check_certs.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── gen_certs.yml │ │ │ ├── install_certs.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ ├── templates/ │ │ │ ├── pump.toml.j2 │ │ │ ├── run_pump_binary.sh.j2 │ │ │ └── run_pump_docker.sh.j2 │ │ └── vars/ │ │ └── default.yml │ ├── pushgateway/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ └── templates/ │ │ ├── run_pushgateway_binary.sh.j2 │ │ └── run_pushgateway_docker.sh.j2 │ ├── supervise/ │ │ ├── tasks/ │ │ │ └── main.yml │ │ └── templates/ │ │ ├── start_role.sh.j2 │ │ └── stop_role.sh.j2 │ ├── systemd/ │ │ ├── tasks/ │ │ │ └── main.yml │ │ └── templates/ │ │ ├── start_role.sh.j2 │ │ ├── stop_role.sh.j2 │ │ ├── systemd_binary.service.j2 │ │ └── systemd_docker.service.j2 │ ├── tidb/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── files/ │ │ │ └── make-ssl.sh │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── check_certs.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── gen_certs.yml │ │ │ ├── install_certs.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ ├── templates/ │ │ │ ├── run_tidb_binary.sh.j2 │ │ │ ├── run_tidb_docker.sh.j2 │ │ │ └── tidb.toml.j2 │ │ └── vars/ │ │ └── default.yml │ ├── tidb_lightning/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ └── main.yml │ │ ├── templates/ │ │ │ ├── start_lightning_binary.sh.j2 │ │ │ ├── stop_lightning_binary.sh.j2 │ │ │ ├── tidb-lightning.toml.j2 │ │ │ └── tidb_lightning_ctl_binary.sh.j2 │ │ └── vars/ │ │ └── tidb-lightning.yml │ ├── tiflash/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ ├── templates/ │ │ │ ├── run_tiflash_binary.sh.j2 │ │ │ ├── tiflash.toml.j2 │ │ │ └── tiflash_learner.toml.j2 │ │ └── vars/ │ │ ├── tiflash-learner.yml │ │ └── tiflash.yml │ ├── tikv/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── files/ │ │ │ └── make-ssl.sh │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ ├── check_certs.yml │ │ │ ├── check_filesystem.yml │ │ │ ├── docker_deployment.yml │ │ │ ├── gen_certs.yml │ │ │ ├── install_certs.yml │ │ │ ├── main.yml │ │ │ ├── supervise_deployment.yml │ │ │ └── systemd_deployment.yml │ │ ├── templates/ │ │ │ ├── run_tikv_binary.sh.j2 │ │ │ ├── run_tikv_docker.sh.j2 │ │ │ └── tikv.toml.j2 │ │ └── vars/ │ │ └── default.yml │ ├── tikv_importer/ │ │ ├── defaults/ │ │ │ └── main.yml │ │ ├── meta/ │ │ │ └── main.yml │ │ ├── tasks/ │ │ │ ├── binary_deployment.yml │ │ │ └── main.yml │ │ ├── templates/ │ │ │ ├── start_importer_binary.sh.j2 │ │ │ ├── stop_importer_binary.sh.j2 │ │ │ └── tikv-importer.toml.j2 │ │ └── vars/ │ │ └── tikv-importer.yml │ └── tispark/ │ ├── tasks/ │ │ └── main.yml │ └── templates/ │ ├── log4j.properties.j2 │ ├── spark-defaults.conf.j2 │ ├── spark-env.sh.j2 │ └── start-slave.sh.j2 ├── rolling_update.yml ├── rolling_update_monitor.yml ├── scripts/ │ ├── binlog.json │ ├── blackbox_exporter.json │ ├── br.json │ ├── check/ │ │ ├── check_cpufreq.py │ │ ├── epoll_chk.cc │ │ ├── epollexclusive-amd64 │ │ ├── epollexclusive-arm64 │ │ └── parse_fio_output.py │ ├── clsrun.sh │ ├── dashboard_topo.py │ ├── disk_performance.json │ ├── funcslower │ ├── grafana-config-copy.py │ ├── grafana_pdf.py │ ├── inventory_check.py │ ├── iosnoop │ ├── kafka.json │ ├── lightning.json │ ├── loader.json │ ├── metrics-delete.py │ ├── montidb.sh │ ├── node.json │ ├── overview.json │ ├── pd.json │ ├── pdn.json │ ├── performance_read.json │ ├── performance_write.json │ ├── reparo.json │ ├── syncer.json │ ├── table-regions-statistic.py │ ├── table-regions.py │ ├── tidb.json │ ├── tidb_summary.json │ ├── tiflash_proxy_details.json │ ├── tiflash_proxy_summary.json │ ├── tiflash_summary.json │ ├── tikv_details.json │ ├── tikv_raw.json │ ├── tikv_summary.json │ └── tikv_trouble_shooting.json ├── start.yml ├── start_drainer.yml ├── start_spark.yml ├── stop.yml ├── stop_drainer.yml ├── stop_spark.yml ├── templates/ │ └── grafana.dest.json.j2 ├── unsafe_cleanup.yml ├── unsafe_cleanup_container.yml └── unsafe_cleanup_data.yml
SYMBOL INDEX (112 symbols across 15 files)
FILE: callback_plugins/help.py
class CallbackModule (line 28) | class CallbackModule(CallbackBase):
method __init__ (line 34) | def __init__(self):
method _format_results (line 50) | def _format_results(self, result, indent=None, sort_keys=True, keep_in...
method print_help_message (line 102) | def print_help_message(self):
method v2_runner_on_failed (line 108) | def v2_runner_on_failed(self, result, ignore_errors=False):
method v2_runner_on_unreachable (line 113) | def v2_runner_on_unreachable(self, result):
method v2_playbook_on_start (line 119) | def v2_playbook_on_start(self, playbook):
method v2_playbook_on_stats (line 123) | def v2_playbook_on_stats(self, stats):
FILE: callback_plugins/yaml.py
function should_use_block (line 34) | def should_use_block(value):
function my_represent_scalar (line 42) | def my_represent_scalar(self, tag, value, style=None):
class CallbackModule (line 66) | class CallbackModule(Default):
method __init__ (line 77) | def __init__(self):
method _dump_results (line 81) | def _dump_results(self, result, indent=None, sort_keys=True, keep_invo...
method v2_runner_on_skipped (line 123) | def v2_runner_on_skipped(self, result):
method v2_runner_item_on_skipped (line 126) | def v2_runner_item_on_skipped(self, result):
FILE: cloud/aws-ansible/ec2.py
class Ec2Inventory (line 153) | class Ec2Inventory(object):
method _empty_inventory (line 155) | def _empty_inventory(self):
method __init__ (line 158) | def __init__(self):
method is_cache_valid (line 204) | def is_cache_valid(self):
method read_settings (line 217) | def read_settings(self):
method parse_cli_args (line 465) | def parse_cli_args(self):
method do_api_calls_update_cache (line 480) | def do_api_calls_update_cache(self):
method connect (line 499) | def connect(self, region):
method boto_fix_security_token_in_profile (line 508) | def boto_fix_security_token_in_profile(self, connect_args):
method connect_to_aws (line 515) | def connect_to_aws(self, module, region):
method get_instances_by_region (line 529) | def get_instances_by_region(self, region):
method get_rds_instances_by_region (line 571) | def get_rds_instances_by_region(self, region):
method include_rds_clusters_by_region (line 595) | def include_rds_clusters_by_region(self, region):
method get_elasticache_clusters_by_region (line 654) | def get_elasticache_clusters_by_region(self, region):
method get_elasticache_replication_groups_by_region (line 690) | def get_elasticache_replication_groups_by_region(self, region):
method get_auth_error_message (line 724) | def get_auth_error_message(self):
method fail_with_error (line 741) | def fail_with_error(self, err_msg, err_operation=None):
method get_instance (line 749) | def get_instance(self, region, instance_id):
method add_instance (line 757) | def add_instance(self, instance, region):
method add_rds_instance (line 906) | def add_rds_instance(self, instance, region):
method add_elasticache_cluster (line 1004) | def add_elasticache_cluster(self, cluster, region):
method add_elasticache_node (line 1103) | def add_elasticache_node(self, node, cluster, region):
method add_elasticache_replication_group (line 1190) | def add_elasticache_replication_group(self, replication_group, region):
method get_route53_records (line 1242) | def get_route53_records(self):
method get_instance_route53_names (line 1268) | def get_instance_route53_names(self, instance):
method get_host_info_dict_from_instance (line 1289) | def get_host_info_dict_from_instance(self, instance):
method get_host_info_dict_from_describe_dict (line 1340) | def get_host_info_dict_from_describe_dict(self, describe_dict):
method get_host_info (line 1425) | def get_host_info(self):
method push (line 1444) | def push(self, my_dict, key, element):
method push_group (line 1454) | def push_group(self, my_dict, key, element):
method get_inventory_from_cache (line 1463) | def get_inventory_from_cache(self):
method load_index_from_cache (line 1472) | def load_index_from_cache(self):
method write_to_cache (line 1480) | def write_to_cache(self, data, filename):
method uncammelize (line 1488) | def uncammelize(self, key):
method to_safe (line 1492) | def to_safe(self, word):
method json_format_dict (line 1499) | def json_format_dict(self, data, pretty=False):
FILE: filter_plugins/tags.py
function epoch_time_diff (line 8) | def epoch_time_diff(t):
function with_default_dicts (line 11) | def with_default_dicts(d, *args):
function split_string (line 20) | def split_string(d, seperator=None, maxsplit=-1):
function split_regex (line 26) | def split_regex(d, seperator_pattern):
function update_default_dicts (line 32) | def update_default_dicts(d):
function dictsort_by_value_type (line 38) | def dictsort_by_value_type(d):
function tikv_server_labels_format (line 42) | def tikv_server_labels_format(label_str):
function get_element_by_index (line 54) | def get_element_by_index(d, index):
class FilterModule (line 57) | class FilterModule(object):
method filters (line 58) | def filters(self):
FILE: library/wait_for_pid.py
function main (line 16) | def main():
FILE: scripts/check/check_cpufreq.py
function get_file_content (line 10) | def get_file_content(path, default=None, strip=True):
function parse_opts (line 28) | def parse_opts():
FILE: scripts/check/epoll_chk.cc
function grpc_is_epollexclusive_available (line 30) | bool grpc_is_epollexclusive_available(void) {
function main (line 92) | int main() {
FILE: scripts/check/parse_fio_output.py
function parse_opts (line 8) | def parse_opts():
FILE: scripts/dashboard_topo.py
function parse_opts (line 15) | def parse_opts():
function etcd_write (line 28) | def etcd_write(etcd_url, key, value):
function parse_address (line 46) | def parse_address(con):
function request_topo (line 55) | def request_topo(comp, topo, etcd_target):
function concat_to_address (line 81) | def concat_to_address(ip, port):
FILE: scripts/grafana-config-copy.py
function export_dashboard (line 55) | def export_dashboard(api_url, api_key, dashboard_name):
function fill_dashboard_with_dest_config (line 64) | def fill_dashboard_with_dest_config(dashboard, dest, type_='node'):
function import_dashboard (line 102) | def import_dashboard(api_url, api_key, dashboard):
function import_dashboard_via_user_pass (line 118) | def import_dashboard_via_user_pass(api_url, user, password, dashboard):
FILE: scripts/grafana_pdf.py
function make_tarfile (line 30) | def make_tarfile(output_filename, source_dir):
function read_url (line 35) | def read_url(url):
function parse_opts (line 47) | def parse_opts():
function parse_timestamp (line 59) | def parse_timestamp(time_string):
FILE: scripts/inventory_check.py
function parse_inventory (line 10) | def parse_inventory(inventory):
function check_conflict (line 46) | def check_conflict(server_list):
FILE: scripts/metrics-delete.py
function query_metadata (line 33) | def query_metadata():
function query_all_series (line 42) | def query_all_series():
function delete_series_by_job_instance (line 57) | def delete_series_by_job_instance(job, instance):
function query_out_dated_job_from_pushgateway (line 68) | def query_out_dated_job_from_pushgateway(timeout):
function delete_job_from_pushgateway (line 100) | def delete_job_from_pushgateway(job, instance):
FILE: scripts/table-regions-statistic.py
class Resource (line 11) | class Resource(Enum):
function count (line 16) | def count(table_region_set, all_regions, resource, group, to_draw):
function main (line 31) | def main():
function generate_steps (line 40) | def generate_steps(resource, group, max_value):
function format_steps (line 53) | def format_steps(steps):
function get_resource_key (line 63) | def get_resource_key(resource):
function parse_args (line 70) | def parse_args():
function draw (line 85) | def draw(table_regions, resource):
function get_json (line 101) | def get_json(url):
function output (line 107) | def output(table_regions, steps, resource):
FILE: scripts/table-regions.py
function main (line 11) | def main():
function parse_args (line 55) | def parse_args():
function merge (line 65) | def merge(dist1, dist2):
function parse_regions (line 70) | def parse_regions(regions):
class StoreRegionPeers (line 78) | class StoreRegionPeers:
method __init__ (line 79) | def __init__(self):
method add (line 85) | def add(self, peer):
method merge (line 91) | def merge(self, rhs):
method num (line 96) | def num(self):
method __str__ (line 99) | def __str__(self):
method __repr__ (line 101) | def __repr__(self):
function merge_peers (line 104) | def merge_peers(dist1, dist2):
function parse_region_peers (line 112) | def parse_region_peers(regions):
function print_leaders (line 123) | def print_leaders(info, indent = " "):
function print_peers (line 131) | def print_peers(info, indent = " "):
Condensed preview — 379 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (4,272K chars).
[
{
"path": ".gitignore",
"chars": 119,
"preview": "retry_files/\ndownloads/\nresources/\nfact_files/\nconf/keys\nscripts/dests.json\n\n.vagrant/\n*.retry\n*.pyc\n.vscode\n.DS_Store\n"
},
{
"path": "LICENSE",
"chars": 11294,
"preview": "Apache License\n Version 2.0, January 2004\n http://www.apache.org/licens"
},
{
"path": "README.md",
"chars": 1899,
"preview": "## :warning: End of project :warning:\n\n[](https:/"
},
{
"path": "ansible.cfg",
"chars": 1140,
"preview": "[defaults]\n## Customize this!\n\ninventory = inventory.ini\n\ntransport = ssh\n\n# disable SSH key host checking\nhost_key_chec"
},
{
"path": "bootstrap.yml",
"chars": 1231,
"preview": "---\n# Copyright 2016 PingCAP, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use"
},
{
"path": "callback_plugins/help.py",
"chars": 5434,
"preview": "# Make coding more python3-ish\nfrom __future__ import (absolute_import, division, print_function)\n\n__metaclass__ = type\n"
},
{
"path": "callback_plugins/yaml.py",
"chars": 4443,
"preview": "# (c) 2017 Ansible Project\n# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)\n"
},
{
"path": "clean_log_cron.yml",
"chars": 214,
"preview": "---\n\n- hosts: pd_servers\n tags:\n - pd\n roles:\n - clean_log_pd\n\n- hosts: tikv_servers\n tags:\n - tikv\n roles:"
},
{
"path": "cloud/aws-ansible/aws_bootstrap.yml",
"chars": 3291,
"preview": "---\n\n# This play book is intend for one pass execution\n\n- name: \"Group nodes by OS distribution\"\n hosts: all\n gather_f"
},
{
"path": "cloud/aws-ansible/aws_inventory_file_generate.yml",
"chars": 1761,
"preview": "---\n# Copyright 2016 PingCAP, Inc.\n# The Playbook of TiDB\n\n# Generates\n\n- name: prepare inventory config\n hosts: localh"
},
{
"path": "cloud/aws-ansible/aws_prepare.yml",
"chars": 304,
"preview": "---\n\n- name: do AWS preparation\n hosts: localhost\n gather_facts: false\n pre_tasks:\n - include_vars:\n file: "
},
{
"path": "cloud/aws-ansible/aws_teardown.yml",
"chars": 1436,
"preview": "---\n\n- name: test\n hosts: localhost\n connection: local\n gather_facts: false\n pre_tasks:\n - include_vars:\n "
},
{
"path": "cloud/aws-ansible/ec2.ini",
"chars": 8151,
"preview": "# Ansible EC2 external inventory script settings\n#\n\n[ec2]\n\n# to talk to a private eucalyptus instance uncomment these li"
},
{
"path": "cloud/aws-ansible/ec2.py",
"chars": 63782,
"preview": "#!/usr/bin/env python\n\n'''\nEC2 external inventory script\n=================================\n\nGenerates inventory that Ans"
},
{
"path": "cloud/aws-ansible/files/sources.list",
"chars": 356,
"preview": "\ndeb http://mirrors.yun-idc.com/ubuntu/ trusty main restricted universe multiverse\ndeb http://mirrors.yun-idc.com/ubuntu"
},
{
"path": "cloud/aws-ansible/roles/aws/tasks/main.yml",
"chars": 4870,
"preview": "---\n\n# all cluster use a single vpc\n- name: vpc setup\n ec2_vpc:\n state: present\n cidr_block: 172.233.0.0/16\n r"
},
{
"path": "cloud/aws-ansible/templates/aws.inventory.ini.j2",
"chars": 899,
"preview": "[tidb_servers]\n{% if groups.tidb_servers is defined %}\n{% for item in groups.tidb_servers -%}\n{{ item }}\n{% endfor %}\n{%"
},
{
"path": "cloud/aws-ansible/vars.yml",
"chars": 619,
"preview": "---\n\ntikv_count: 1\npd_count: 1\ntidb_count: 1\n\n# 1 or 0\nmonitoring_count: 1\n\ncreator: pingcap-auto\nmanaged_by: ansible-pi"
},
{
"path": "collect_diagnosis.yml",
"chars": 2265,
"preview": "---\n# Copyright 2016 PingCAP, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use"
},
{
"path": "common_tasks/add_evict_leader_scheduler.yml",
"chars": 3161,
"preview": "---\n\n- name: remove evict-leader-scheduler\n uri:\n url: \"http://{{ pd_addr }}/pd/api/v1/schedulers/evict-leader-sched"
},
{
"path": "common_tasks/create_grafana_api_keys.yml",
"chars": 2465,
"preview": "---\n\n- name: Ensure grafana API Key directory exists\n file:\n path: \"{{ grafana_api_keys_dir }}\"\n state: directory"
},
{
"path": "common_tasks/get_pd_leader.yml",
"chars": 176,
"preview": "---\n\n- name: get PD leader info\n uri:\n url: \"http://{{ pd_addr }}/pd/api/v1/leader\"\n method: GET\n return_conte"
},
{
"path": "common_tasks/get_pd_leader_tls.yml",
"chars": 374,
"preview": "---\n\n- name: get PD leader info when enable_tls|default(false)\n uri:\n url: \"https://{{ pd_addr }}/pd/api/v1/leader\"\n"
},
{
"path": "common_tasks/get_pd_name.yml",
"chars": 360,
"preview": "---\n\n- name: get PD name\n uri:\n url: \"http://{{ pd_addr }}/pd/api/v1/members\"\n method: GET\n return_content: ye"
},
{
"path": "common_tasks/get_pd_name_tls.yml",
"chars": 528,
"preview": "---\n\n- name: get PD name\n uri:\n url: \"https://{{ pd_addr }}/pd/api/v1/members\"\n validate_certs: no\n client_cer"
},
{
"path": "common_tasks/get_pd_tikv_addr.yml",
"chars": 438,
"preview": "---\n\n- set_fact:\n pd_host: \"{{ hostvars[groups.pd_servers[0]].ansible_host | default(hostvars[groups.pd_servers[0]].i"
},
{
"path": "common_tasks/get_store_id.yml",
"chars": 571,
"preview": "---\n\n- name: get store info from PD\n uri:\n url: \"http://{{ pd_addr }}/pd/api/v1/stores\"\n method: GET\n return_c"
},
{
"path": "common_tasks/get_store_id_tls.yml",
"chars": 639,
"preview": "---\n\n- name: get store info from PD when enable_tls|default(false)\n uri:\n url: \"https://{{ pd_addr }}/pd/api/v1/stor"
},
{
"path": "common_tasks/remove_evict_leader_scheduler.yml",
"chars": 640,
"preview": "---\n\n- name: remove evict-leader-scheduler\n uri:\n url: \"http://{{ pd_addr }}/pd/api/v1/schedulers/evict-leader-sched"
},
{
"path": "common_tasks/transfer_pd_leader.yml",
"chars": 2187,
"preview": "---\n\n- set_fact:\n pd_leader_name: \"{{ pd_leader_info.json.name }}\"\n\n- name: display PD leader name\n debug:\n var: "
},
{
"path": "conf/alertmanager.yml",
"chars": 1963,
"preview": "global:\n # The smarthost and SMTP sender used for mail notifications.\n smtp_smarthost: 'localhost:25'\n smtp_from: 'al"
},
{
"path": "conf/drainer.toml",
"chars": 2534,
"preview": "# drainer Configuration.\n\n# the interval time (in seconds) of detect pumps' status\ndetect-interval = 10\n\n# Use the speci"
},
{
"path": "conf/pd.yml",
"chars": 1952,
"preview": "---\n# default configuration file for pd in yaml format\n\nglobal:\n # lease: 3\n # tso-save-interval: \"3s\"\n\nsecurity:\n\nlog"
},
{
"path": "conf/pump.yml",
"chars": 1038,
"preview": "---\n# default configuration file for pump in yaml format\n\nglobal:\n # a integer value to control expiry date of the binl"
},
{
"path": "conf/spark-defaults.yml",
"chars": 3220,
"preview": "#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreements. See the NOTICE"
},
{
"path": "conf/spark-env.yml",
"chars": 4370,
"preview": "#!/usr/bin/env bash\n\n#\n# Licensed to the Apache Software Foundation (ASF) under one or more\n# contributor license agreem"
},
{
"path": "conf/ssl/ca-config.json",
"chars": 611,
"preview": "{\n \"signing\": {\n \"default\": {\n \"expiry\": \"876000h\"\n },\n \"profiles\": {\n \"se"
},
{
"path": "conf/ssl/ca-csr.json",
"chars": 239,
"preview": "{\n \"CN\": \"My own CA\",\n \"key\": {\n \"algo\": \"rsa\",\n \"size\": 2048\n },\n \"names\": [\n {\n "
},
{
"path": "conf/tidb-lightning.yml",
"chars": 6672,
"preview": "---\n### tidb-lightning configuration\nlightning:\n # check if the cluster satisfies the minimum requirement before starti"
},
{
"path": "conf/tidb.yml",
"chars": 7361,
"preview": "---\n# default configuration file for TiDB in yaml format\n\nglobal:\n # TiDB Configuration.\n\n # The socket file to use fo"
},
{
"path": "conf/tiflash-learner.yml",
"chars": 559,
"preview": "# TiKV config template\n# Human-readable big numbers:\n# File size(based on byte): KB, MB, GB, TB, PB\n# e.g.: 1_048_"
},
{
"path": "conf/tiflash.yml",
"chars": 840,
"preview": "---\n\nglobal:\n display_name: \"TiFlash\"\n default_profile: \"default\"\n mark_cache_size: 5368709120\n listen_host: \"0.0.0."
},
{
"path": "conf/tikv-importer.yml",
"chars": 2295,
"preview": "---\n\n# TiKV Importer configuration file template\n\nglobal:\n # log file.\n # log level: trace, debug, info, warn, error, "
},
{
"path": "conf/tikv.yml",
"chars": 27352,
"preview": "---\n## The default configuration file for TiKV in YAML format\n\n## TiKV config template\n## Human-readable big numbers:\n#"
},
{
"path": "create_users.yml",
"chars": 518,
"preview": "---\n\n- hosts: all\n tasks:\n - name: create user\n user: name={{ username }} shell=/bin/bash createhome=yes\n\n -"
},
{
"path": "deploy.yml",
"chars": 3790,
"preview": "---\n# Copyright 2016 PingCAP, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use"
},
{
"path": "deploy_drainer.yml",
"chars": 1254,
"preview": "---\n# Copyright 2018 PingCAP, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use"
},
{
"path": "deploy_ntp.yml",
"chars": 2076,
"preview": "---\n\n- hosts: all\n tasks:\n - name: get facts\n setup:\n\n - name: RedHat family Linux distribution - make sure "
},
{
"path": "excessive_rolling_update.yml",
"chars": 16932,
"preview": "---\n# Copyright 2016 PingCAP, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use"
},
{
"path": "filter_plugins/tags.py",
"chars": 2110,
"preview": "#!/usr/bin/env python\n\nimport re\nimport time\nimport copy\nimport json\n\ndef epoch_time_diff(t):\n return int(int(t) - ti"
},
{
"path": "graceful_stop.yml",
"chars": 9699,
"preview": "---\n# Copyright 2016 PingCAP, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use"
},
{
"path": "group_vars/alertmanager_servers.yml",
"chars": 61,
"preview": "---\n\nalertmanager_port: 9093\nalertmanager_cluster_port: 9094\n"
},
{
"path": "group_vars/all.yml",
"chars": 1289,
"preview": "---\n\n# Variables here are applicable to all host groups\ndeploy_user: \"{{ ansible_user }}\"\nstatus_dir: \"{{ deploy_dir }}/"
},
{
"path": "group_vars/drainer_servers.yml",
"chars": 5,
"preview": "---\n\n"
},
{
"path": "group_vars/grafana_servers.yml",
"chars": 78,
"preview": "---\n\ngrafana_port: 3000\n\ngrafana_api_keys_dir: \"{{ playbook_dir }}/conf/keys\"\n"
},
{
"path": "group_vars/importer_server.yml",
"chars": 290,
"preview": "---\ndummy:\n\n# this directory is used to store the data written by `tidb-lightning`\nimport_dir: \"{{ deploy_dir }}/data.im"
},
{
"path": "group_vars/lightning_server.yml",
"chars": 304,
"preview": "---\ndummy:\n\n# background profile for debugging\ntidb_lightning_pprof_port: 8289\n\n# the source data directory of Mydumper\n"
},
{
"path": "group_vars/monitored_servers.yml",
"chars": 52,
"preview": "---\n\nnode_exporter_log_dir: \"{{ deploy_dir }}/log\"\n\n"
},
{
"path": "group_vars/monitoring_servers.yml",
"chars": 131,
"preview": "---\n\nprometheus_port: 9090\npushgateway_port: 9091\n\n# How long to retain samples in the storage\nprometheus_storage_retent"
},
{
"path": "group_vars/pd_servers.yml",
"chars": 170,
"preview": "---\ndummy:\n\npd_client_port: 2379\npd_peer_port: 2380\n\npd_data_dir: \"{{ deploy_dir }}/data.pd\"\npd_log_dir: \"{{ deploy_dir "
},
{
"path": "group_vars/pump_servers.yml",
"chars": 5,
"preview": "---\n\n"
},
{
"path": "group_vars/tidb_servers.yml",
"chars": 133,
"preview": "---\ndummy:\n\ntidb_port: 4000\ntidb_status_port: 10080\n\ntidb_log_dir: \"{{ deploy_dir }}/log\"\ntidb_cert_dir: \"{{ deploy_dir "
},
{
"path": "group_vars/tiflash_servers.yml",
"chars": 135,
"preview": "---\n\ntcp_port: 9000\nhttp_port: 8123\nflash_service_port: 3930\nflash_proxy_port: 20170\nflash_proxy_status_port: 20292\nmetr"
},
{
"path": "group_vars/tikv_servers.yml",
"chars": 173,
"preview": "---\ndummy:\n\ntikv_port: 20160\ntikv_status_port: 20180\n\ntikv_data_dir: \"{{ deploy_dir }}/data\"\ntikv_log_dir: \"{{ deploy_di"
},
{
"path": "hosts.ini",
"chars": 161,
"preview": "[servers]\n192.168.0.2\n192.168.0.3\n192.168.0.4\n192.168.0.5\n192.168.0.6\n192.168.0.7\n192.168.0.8\n192.168.0.10\n\n[all:vars]\nu"
},
{
"path": "inventory.ini",
"chars": 2091,
"preview": "## TiDB Cluster Part\n[tidb_servers]\n192.168.0.2\n\n[tikv_servers]\n192.168.0.3\n192.168.0.4\n192.168.0.5\n\n[pd_servers]\n192.16"
},
{
"path": "library/coreos_facts",
"chars": 3629,
"preview": "#!/bin/bash\n\nset -e\n\n_default_gw=$(ip route list match 0.0.0.0 | cut '-d ' -f3)\n_default_if=$(ip route list match 0.0.0."
},
{
"path": "library/docker_facts",
"chars": 631,
"preview": "#!/bin/bash\n\nset -e\n\ncat <<EOF\n{\n \"ansible_facts\": {\n \"docker_server_version\": \"$(docker version -f '{{.Server"
},
{
"path": "library/wait_for_pid.py",
"chars": 4770,
"preview": "#!/usr/bin/python\n# -*- coding: utf-8 -*-\n\nimport binascii\nimport datetime\nimport math\nimport re\nimport select\nimport so"
},
{
"path": "local_prepare.yml",
"chars": 116,
"preview": "---\n\n- name: do local preparation\n hosts: localhost\n connection: local\n gather_facts: false\n roles:\n - local\n"
},
{
"path": "log/.gitignore",
"chars": 6,
"preview": "*.log\n"
},
{
"path": "migrate_monitor.yml",
"chars": 10795,
"preview": "---\n# Copyright 2016 PingCAP, Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use"
},
{
"path": "requirements.txt",
"chars": 46,
"preview": "ansible==2.7.11\njinja2>=2.9.6\njmespath>=0.9.0\n"
},
{
"path": "roles/alertmanager/defaults/main.yml",
"chars": 255,
"preview": "---\n\n# default configuration for alertmanager\n\nalertmanager_data_dir: \"{{ deploy_dir }}/data.alertmanager\"\n\nalertmanager"
},
{
"path": "roles/alertmanager/meta/main.yml",
"chars": 40,
"preview": "---\n\ndependencies:\n - role: common_dir\n"
},
{
"path": "roles/alertmanager/tasks/binary_deployment.yml",
"chars": 625,
"preview": "---\n\n- name: create deploy directories\n file: path=\"{{ item }}\" state=directory mode=0755\n with_items:\n - \"{{ alertma"
},
{
"path": "roles/alertmanager/tasks/docker_deployment.yml",
"chars": 609,
"preview": "---\n\n- name: deploy alertmanager image\n copy: src=\"{{ downloads_dir }}/alertmanager.tar\" dest=\"{{ deploy_dir }}/images\""
},
{
"path": "roles/alertmanager/tasks/main.yml",
"chars": 564,
"preview": "---\n\n- include_tasks: \"{{ deployment_method }}_deployment.yml\"\n\n- name: create configuration file\n copy: src=\"{{ playbo"
},
{
"path": "roles/alertmanager/tasks/supervise_deployment.yml",
"chars": 162,
"preview": "---\n\n- name: deploy supervise\n include_role:\n name: supervise\n vars:\n this_role_name: alertmanager\n service_n"
},
{
"path": "roles/alertmanager/tasks/systemd_deployment.yml",
"chars": 158,
"preview": "---\n\n- name: deploy systemd\n include_role:\n name: systemd\n vars:\n this_role_name: alertmanager\n service_name:"
},
{
"path": "roles/alertmanager/templates/run_alertmanager_binary.sh.j2",
"chars": 603,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\nDEPLOY_DIR={{ deploy_dir }}\ncd \"${DEPLOY_DIR}\" || exit 1\n\n# WARNING: This file was"
},
{
"path": "roles/alertmanager/templates/run_alertmanager_docker.sh.j2",
"chars": 740,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\n# WARNING: This file was auto-generated. Do not edit!\n# All your edit mig"
},
{
"path": "roles/blackbox_exporter/defaults/main.yml",
"chars": 271,
"preview": "---\n\n# default configuration for blackbox_exporter\n\nblackbox_exporter_log_level: info\nblackbox_exporter_log_dir: \"{{ dep"
},
{
"path": "roles/blackbox_exporter/meta/main.yml",
"chars": 40,
"preview": "---\n\ndependencies:\n - role: common_dir\n"
},
{
"path": "roles/blackbox_exporter/tasks/binary_deployment.yml",
"chars": 753,
"preview": "---\n\n- name: create deploy directories\n file: path={{ item }} state=directory mode=0755\n with_items:\n - \"{{ blackbox_"
},
{
"path": "roles/blackbox_exporter/tasks/docker_deployment.yml",
"chars": 634,
"preview": "---\n\n- name: deploy blackbox_exporter image\n copy: src=\"{{ downloads_dir }}/blackbox-exporter.tar\" dest=\"{{ deploy_dir "
},
{
"path": "roles/blackbox_exporter/tasks/main.yml",
"chars": 519,
"preview": "---\n\n- include_tasks: \"{{ deployment_method }}_deployment.yml\"\n\n- name: create config file\n template: src=blackbox.yml."
},
{
"path": "roles/blackbox_exporter/tasks/supervise_deployment.yml",
"chars": 177,
"preview": "---\n\n- name: deploy supervise\n include_role:\n name: supervise\n vars:\n this_role_name: blackbox_exporter\n serv"
},
{
"path": "roles/blackbox_exporter/tasks/systemd_deployment.yml",
"chars": 173,
"preview": "---\n\n- name: deploy systemd\n include_role:\n name: systemd\n vars:\n this_role_name: blackbox_exporter\n service_"
},
{
"path": "roles/blackbox_exporter/templates/blackbox.yml.j2",
"chars": 718,
"preview": "modules:\n http_2xx:\n prober: http\n http:\n method: GET\n http_post_2xx:\n prober: http\n http:\n meth"
},
{
"path": "roles/blackbox_exporter/templates/run_blackbox_exporter_binary.sh.j2",
"chars": 473,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\n# WARNING: This file was auto-generated. Do not edit!\n# All your edit mig"
},
{
"path": "roles/blackbox_exporter/templates/run_blackbox_exporter_docker.sh.j2",
"chars": 471,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\n# WARNING: This file was auto-generated. Do not edit!\n# All your edit mig"
},
{
"path": "roles/bootstrap/defaults/main.yml",
"chars": 67,
"preview": "---\n\ntuning_kernel_parameters: true\n\ntuning_irqbalance_value: true\n"
},
{
"path": "roles/bootstrap/tasks/main.yml",
"chars": 404,
"preview": "---\n# bootstrap a machine and begin deployment\n\n- name: gather facts\n setup:\n gather_timeout: 30\n\n- name: group host"
},
{
"path": "roles/bootstrap/tasks/root_tasks.yml",
"chars": 3817,
"preview": "---\n\n- name: setting absent kernel params\n sysctl:\n name: \"{{ item.name }}\"\n value: \"{{ item.value }}\"\n sysctl"
},
{
"path": "roles/check_config_dynamic/tasks/main.yml",
"chars": 1656,
"preview": "---\n\n- name: Set enable_binlog variable\n set_fact:\n enable_binlog: true\n when: \"enable_binlog in ['true', 'True', '"
},
{
"path": "roles/check_config_pd/tasks/main.yml",
"chars": 2009,
"preview": "---\n\n- set_fact:\n tidb_check_dir: \"/tmp/tidb_check_config\"\n pd_host: \"{{ hostvars[groups.pd_servers[0]].ansible_ho"
},
{
"path": "roles/check_config_static/tasks/main.yml",
"chars": 3253,
"preview": "---\n# Common Tasks\n\n- name: Ensure monitoring_servers exists\n fail: msg=\"monitoring_servers should be specified.\"\n whe"
},
{
"path": "roles/check_config_tidb/tasks/main.yml",
"chars": 2139,
"preview": "---\n\n- set_fact:\n tidb_check_dir: \"/tmp/tidb_check_config\"\n tidb_host: \"{{ hostvars[groups.tidb_servers[0]].ansibl"
},
{
"path": "roles/check_config_tikv/tasks/main.yml",
"chars": 2269,
"preview": "---\n\n- set_fact:\n tidb_check_dir: \"/tmp/tidb_check_config\"\n tikv_host: \"{{ hostvars[groups.tikv_servers[0]].ansibl"
},
{
"path": "roles/check_system_dynamic/defaults/main.yml",
"chars": 69,
"preview": "---\n\n# ulimit -n, hard-coded in startup scrips\nmin_open_fds: 1000000\n"
},
{
"path": "roles/check_system_dynamic/tasks/main.yml",
"chars": 2343,
"preview": "---\n\n- name: Disk space check - Fail task when disk is full\n shell: df -h . | tail -n1\n register: disk_space_st\n fail"
},
{
"path": "roles/check_system_optional/defaults/main.yml",
"chars": 283,
"preview": "---\n\n# CPU\ntidb_min_cpu: 8\ntikv_min_cpu: 8\npd_min_cpu: 4\nmonitor_min_cpu: 4\n\n# Mem\ntidb_min_ram: 16000\ntikv_min_ram: 160"
},
{
"path": "roles/check_system_optional/tasks/main.yml",
"chars": 4507,
"preview": "---\n\n- name: Preflight check - Check TiDB server's CPU\n fail:\n msg: \"This machine does not have sufficient CPU to ru"
},
{
"path": "roles/check_system_static/tasks/main.yml",
"chars": 3889,
"preview": "---\n\n- name: Disk space check - Fail task when disk is full\n shell: df -h . | tail -n1\n register: disk_space_st\n fail"
},
{
"path": "roles/clean_log_pd/tasks/add_cron.yml",
"chars": 238,
"preview": "---\n\n- name: add crontab\n cron:\n name: \"pd-{{ pd_client_port }}\"\n user: \"{{ ansible_user }}\"\n minute: 0\n st"
},
{
"path": "roles/clean_log_pd/tasks/del_cron.yml",
"chars": 130,
"preview": "---\n\n- name: delete crontab if exist\n cron:\n name: 'pd-{{ pd_client_port }}'\n user: \"{{ ansible_user }}\"\n stat"
},
{
"path": "roles/clean_log_pd/tasks/main.yml",
"chars": 271,
"preview": "---\n\n- include_tasks: add_cron.yml\n when:\n - enable_log_clean|default(false)\n\n- include_tasks: del_cron.yml\n when:\n"
},
{
"path": "roles/clean_log_tidb/tasks/add_cron.yml",
"chars": 239,
"preview": "---\n\n- name: add crontab\n cron:\n name: \"tidb-{{ tidb_port }}\"\n user: \"{{ ansible_user }}\"\n minute: 0\n state"
},
{
"path": "roles/clean_log_tidb/tasks/del_cron.yml",
"chars": 127,
"preview": "---\n\n- name: delete crontab if exist\n cron:\n name: \"tidb-{{ tidb_port }}\"\n user: \"{{ ansible_user }}\"\n state: "
},
{
"path": "roles/clean_log_tidb/tasks/main.yml",
"chars": 271,
"preview": "---\n\n- include_tasks: add_cron.yml\n when:\n - enable_log_clean|default(false)\n\n- include_tasks: del_cron.yml\n when:\n"
},
{
"path": "roles/clean_log_tikv/tasks/add_cron.yml",
"chars": 239,
"preview": "---\n\n- name: add crontab\n cron:\n name: \"tikv-{{ tikv_port }}\"\n user: \"{{ ansible_user }}\"\n minute: 0\n state"
},
{
"path": "roles/clean_log_tikv/tasks/del_cron.yml",
"chars": 127,
"preview": "---\n\n- name: delete crontab if exist\n cron:\n name: \"tikv-{{ tikv_port }}\"\n user: \"{{ ansible_user }}\"\n state: "
},
{
"path": "roles/clean_log_tikv/tasks/main.yml",
"chars": 271,
"preview": "---\n\n- include_tasks: add_cron.yml\n when:\n - enable_log_clean|default(false)\n\n- include_tasks: del_cron.yml\n when:\n"
},
{
"path": "roles/collect_diagnosis/meta/main.yml",
"chars": 40,
"preview": "---\n\ndependencies:\n - role: common_dir\n"
},
{
"path": "roles/collect_diagnosis/tasks/main.yml",
"chars": 158,
"preview": "---\n\n- name: uncompress tidb-insight scripts\n unarchive: >\n mode=0755\n dest={{ deploy_dir }}/scripts/\n src={{ "
},
{
"path": "roles/collector_host/tasks/collect_log.yml",
"chars": 555,
"preview": "---\n\n- name: check node_exporter log directory\n stat: path={{ node_exporter_log_dir }} get_md5=false get_checksum=false"
},
{
"path": "roles/collector_host/tasks/main.yml",
"chars": 1642,
"preview": "---\n\n- set_fact:\n collector_dir: \"{{ hostvars[groups.monitored_servers[0]].deploy_dir }}\"\n service_host: \"{{ ansib"
},
{
"path": "roles/collector_pd/tasks/collect_config.yml",
"chars": 593,
"preview": "---\n- name: check pd config path\n set_fact:\n pd_conf_dir: \"{{ deploy_dir }}/conf\"\n when: pd_conf_dir is undefined\n\n"
},
{
"path": "roles/collector_pd/tasks/collect_log.yml",
"chars": 521,
"preview": "---\n\n- name: check pd log directory\n stat: path={{ pd_log_dir }} get_md5=false get_checksum=false\n register: log_dir_s"
},
{
"path": "roles/collector_pd/tasks/main.yml",
"chars": 1859,
"preview": "---\n\n- set_fact:\n collector_dir: \"{{ hostvars[groups.monitored_servers[0]].deploy_dir }}\"\n service_host: \"{{ ansib"
},
{
"path": "roles/collector_prometheus/tasks/main.yml",
"chars": 1672,
"preview": "---\n\n- set_fact:\n collector_dir: \"{{ hostvars[groups.monitored_servers[0]].deploy_dir }}\"\n service_host: \"{{ ansib"
},
{
"path": "roles/collector_pump/tasks/collect_log.yml",
"chars": 537,
"preview": "---\n\n- name: check pump log directory\n stat: path={{ pump_log_dir }} get_md5=false get_checksum=false\n register: log_d"
},
{
"path": "roles/collector_pump/tasks/main.yml",
"chars": 1388,
"preview": "---\n\n- set_fact:\n collector_dir: \"{{ hostvars[groups.monitored_servers[0]].deploy_dir }}\"\n service_host: \"{{ ansib"
},
{
"path": "roles/collector_tidb/tasks/collect_config.yml",
"chars": 616,
"preview": "---\n\n- name: check tidb config path\n set_fact:\n tidb_conf_dir: \"{{ deploy_dir }}/conf\"\n when: tidb_conf_dir is unde"
},
{
"path": "roles/collector_tidb/tasks/collect_log.yml",
"chars": 537,
"preview": "---\n\n- name: check tidb log directory\n stat: path={{ tidb_log_dir }} get_md5=false get_checksum=false\n register: log_d"
},
{
"path": "roles/collector_tidb/tasks/main.yml",
"chars": 1850,
"preview": "---\n\n- set_fact:\n collector_dir: \"{{ hostvars[groups.monitored_servers[0]].deploy_dir }}\"\n service_host: \"{{ ansib"
},
{
"path": "roles/collector_tikv/tasks/collect_config.yml",
"chars": 616,
"preview": "---\n\n- name: check tikv config path\n set_fact:\n tikv_conf_dir: \"{{ deploy_dir }}/conf\"\n when: tikv_conf_dir is unde"
},
{
"path": "roles/collector_tikv/tasks/collect_log.yml",
"chars": 537,
"preview": "---\n\n- name: check tikv log directory\n stat: path={{ tikv_log_dir }} get_md5=false get_checksum=false\n register: log_d"
},
{
"path": "roles/collector_tikv/tasks/main.yml",
"chars": 1659,
"preview": "---\n\n- set_fact:\n collector_dir: \"{{ hostvars[groups.monitored_servers[0]].deploy_dir }}\"\n service_host: \"{{ ansib"
},
{
"path": "roles/common_dir/tasks/main.yml",
"chars": 665,
"preview": "---\n# Common Tasks\n\n- name: create deploy directories\n file: path={{ item }} state=directory mode=0755\n with_items:\n "
},
{
"path": "roles/dashboard_topo/tasks/main.yml",
"chars": 273,
"preview": "---\n\n- name: generate init_dashboard_topo script\n template:\n src: \"init_dashboard_topo.sh.j2\"\n dest: \"{{ playbook"
},
{
"path": "roles/dashboard_topo/templates/init_dashboard_topo.sh.j2",
"chars": 2166,
"preview": "#!/bin/bash\nset -e\n\n{% set all_pd = [] -%}\n{% set pd_hosts = groups.pd_servers %}\n{% for host in pd_hosts -%}\n {% set p"
},
{
"path": "roles/drainer/defaults/main.yml",
"chars": 266,
"preview": "---\n\ndrainer_log_dir: \"{{ deploy_dir }}/log\"\ndrainer_log_filename: \"drainer.log\"\ndrainer_stderr_filename: \"drainer_stder"
},
{
"path": "roles/drainer/files/make-ssl.sh",
"chars": 2613,
"preview": "#!/bin/bash\n\n# Author: Smana smainklh@gmail.com\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# yo"
},
{
"path": "roles/drainer/meta/main.yml",
"chars": 40,
"preview": "---\n\ndependencies:\n - role: common_dir\n"
},
{
"path": "roles/drainer/tasks/binary_deployment.yml",
"chars": 423,
"preview": "---\n\n- name: deploy drainer binary\n copy: src=\"{{ resources_dir }}/bin/drainer\" dest=\"{{ deploy_dir }}/bin/\" mode=0755\n"
},
{
"path": "roles/drainer/tasks/check_certs.yml",
"chars": 2906,
"preview": "---\n\n- name: \"Check_certs | check if the certs have already been generated on control machine\"\n find:\n paths: \"{{ ce"
},
{
"path": "roles/drainer/tasks/gen_certs.yml",
"chars": 754,
"preview": "---\n\n- name: Gen_certs | copy certs generation script\n copy:\n src: \"make-ssl.sh\"\n dest: \"{{ script_dir }}/make-ss"
},
{
"path": "roles/drainer/tasks/install_certs.yml",
"chars": 472,
"preview": "---\n\n- name: \"Deploy_certs | Make sure the certificate directory exits\"\n file:\n path: \"{{ drainer_cert_dir }}\"\n s"
},
{
"path": "roles/drainer/tasks/main.yml",
"chars": 1834,
"preview": "---\n# tasks file for drainer\n\n- name: Preflight check - ensure initial_commit_ts variable is set\n fail: msg=\"Please set"
},
{
"path": "roles/drainer/tasks/supervise_deployment.yml",
"chars": 147,
"preview": "---\n\n- name: deploy supervise\n include_role:\n name: supervise\n vars:\n this_role_name: drainer\n service_name: "
},
{
"path": "roles/drainer/tasks/systemd_deployment.yml",
"chars": 174,
"preview": "---\n\n- name: deploy systemd\n include_role:\n name: systemd\n vars:\n this_role_name: drainer\n service_name: drai"
},
{
"path": "roles/drainer/templates/run_drainer_binary.sh.j2",
"chars": 1086,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\nDEPLOY_DIR={{ deploy_dir }}\n\ncd \"${DEPLOY_DIR}\" || exit 1\n\n# WARNING: This file wa"
},
{
"path": "roles/drainer/vars/default.yml",
"chars": 406,
"preview": "---\n# default configuration file for drainer in yaml format\n\nsecurity:\n # Path of file that contains list of trusted SS"
},
{
"path": "roles/firewalld/defaults/main.yml",
"chars": 25,
"preview": "---\n\nfirewalld_ports: []\n"
},
{
"path": "roles/firewalld/handlers/main.yml",
"chars": 96,
"preview": "---\n# Handlers for firewalld\n\n- name: reload firewalld\n service: name=firewalld state=reloaded\n"
},
{
"path": "roles/firewalld/tasks/main.yml",
"chars": 729,
"preview": "---\n# Tasks to configure firewalld rules\n\n- name: All enabled ports\n debug: var=firewalld_ports\n\n# need root\n- name: de"
},
{
"path": "roles/grafana/defaults/main.yml",
"chars": 476,
"preview": "---\n\ngrafana_log_dir: \"{{ deploy_dir }}/log\"\ngrafana_log_filename: \"grafana.log\"\n\ngrafana_data_dir: \"{{ deploy_dir }}/da"
},
{
"path": "roles/grafana/meta/main.yml",
"chars": 88,
"preview": "---\n\ndependencies:\n - { role: 'common_dir', when: 'grafana_exec_vars_only == false' }\n\n"
},
{
"path": "roles/grafana/tasks/binary_deployment.yml",
"chars": 1377,
"preview": "---\n\n- name: create binary deploy directories (1/2)\n file: path=\"{{ item }}\" state=directory mode=0755\n with_items:\n "
},
{
"path": "roles/grafana/tasks/docker_deployment.yml",
"chars": 587,
"preview": "---\n\n- name: deploy grafana image\n copy: src=\"{{ downloads_dir }}/grafana.tar\" dest=\"{{ deploy_dir }}/images\" mode=0755"
},
{
"path": "roles/grafana/tasks/main.yml",
"chars": 74,
"preview": "---\n\n- include_tasks: tasks.yml\n when: \"grafana_exec_vars_only == false\"\n"
},
{
"path": "roles/grafana/tasks/supervise_deployment.yml",
"chars": 147,
"preview": "---\n\n- name: deploy supervise\n include_role:\n name: supervise\n vars:\n this_role_name: grafana\n service_name: "
},
{
"path": "roles/grafana/tasks/systemd_deployment.yml",
"chars": 143,
"preview": "---\n\n- name: deploy systemd\n include_role:\n name: systemd\n vars:\n this_role_name: grafana\n service_name: graf"
},
{
"path": "roles/grafana/tasks/tasks.yml",
"chars": 471,
"preview": "---\n\n- name: create common deploy directories\n file: path=\"{{ item }}\" state=directory mode=0755\n with_items:\n - \"{{ "
},
{
"path": "roles/grafana/templates/data_source.json.j2",
"chars": 964,
"preview": "{% if groups.monitoring_servers | length == groups.grafana_servers | length -%}\n {% set index = [] -%}\n {% for host in"
},
{
"path": "roles/grafana/templates/grafana.ini.j2",
"chars": 8129,
"preview": "##################### Grafana Configuration Example #####################\n#\n# Everything has defaults so you only need t"
},
{
"path": "roles/grafana/templates/run_grafana_binary.sh.j2",
"chars": 371,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\n# WARNING: This file was auto-generated. Do not edit!\n# All your edit mig"
},
{
"path": "roles/grafana/templates/run_grafana_docker.sh.j2",
"chars": 358,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\n# WARNING: This file was auto-generated. Do not edit!\n# All your edit mig"
},
{
"path": "roles/kafka_exporter/defaults/main.yml",
"chars": 202,
"preview": "---\n\n# default configuration for kafka_exporter\n\nkafka_exporter_log_level: \"info\"\nkafka_exporter_log_dir: \"{{ deploy_dir"
},
{
"path": "roles/kafka_exporter/meta/main.yml",
"chars": 40,
"preview": "---\n\ndependencies:\n - role: common_dir\n"
},
{
"path": "roles/kafka_exporter/tasks/binary_deployment.yml",
"chars": 597,
"preview": "---\n\n- name: create deploy directories\n file: path=\"{{ item }}\" state=directory mode=0755\n with_items:\n - \"{{ kafka_e"
},
{
"path": "roles/kafka_exporter/tasks/main.yml",
"chars": 172,
"preview": "---\n\n- include_tasks: binary_deployment.yml\n\n- name: prepare firewalld white list\n set_fact:\n firewalld_ports: \"{{ ["
},
{
"path": "roles/kafka_exporter/tasks/supervise_deployment.yml",
"chars": 168,
"preview": "---\n\n- name: deploy supervise\n include_role:\n name: supervise\n vars:\n this_role_name: kafka_exporter\n service"
},
{
"path": "roles/kafka_exporter/tasks/systemd_deployment.yml",
"chars": 164,
"preview": "---\n\n- name: deploy systemd\n include_role:\n name: systemd\n vars:\n this_role_name: kafka_exporter\n service_nam"
},
{
"path": "roles/kafka_exporter/templates/run_kafka_exporter_binary.sh.j2",
"chars": 567,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\nDEPLOY_DIR={{ deploy_dir }}\ncd \"${DEPLOY_DIR}\" || exit 1\n\n# WARNING: This file was"
},
{
"path": "roles/local/tasks/binary_deployment.yml",
"chars": 3725,
"preview": "---\n\n- name: download other binary\n get_url:\n url: \"{{ item.url }}\"\n dest: \"{{ downloads_dir }}/{{ item.name }}-{"
},
{
"path": "roles/local/tasks/docker_deployment.yml",
"chars": 484,
"preview": "---\n\n- name: download tidb docker images\n docker_image:\n name: \"{{ item.name }}\"\n tag: \"{{ item.tag }}\"\n archi"
},
{
"path": "roles/local/tasks/main.yml",
"chars": 6410,
"preview": "---\n\n- name: Stop if ansible version is too low, make sure that the Ansible version is Ansible 2.4.2 or later, otherwise"
},
{
"path": "roles/local/templates/binary_packages.yml.j2",
"chars": 5902,
"preview": "---\n\n{% if cpu_architecture == 'amd64' -%}\nthird_party_packages:\n - name: prometheus\n version: 2.8.1\n url: \"https"
},
{
"path": "roles/local/templates/common_packages.yml.j2",
"chars": 2432,
"preview": "---\n\n{% if cpu_architecture == 'amd64' -%}\ntidb_packages:\n - name: tidb\n version: {{ tidb_version }}\n url: http:/"
},
{
"path": "roles/local/templates/docker_packages.yml.j2",
"chars": 875,
"preview": "---\n\ntidb_images:\n - name: pingcap/tidb\n tag: {{ tidb_version }}\n service: tidb\n - name: pingcap/tikv\n tag: {"
},
{
"path": "roles/machine_benchmark/defaults/main.yml",
"chars": 354,
"preview": "---\n\nfio_deploy_dir: \"{{ tikv_data_dir }}/fio\"\n\n# fio randread iops\nmin_ssd_randread_iops: 40000\n\n# fio mixed randread a"
},
{
"path": "roles/machine_benchmark/tasks/fio_randread.yml",
"chars": 1544,
"preview": "---\n\n- name: fio randread benchmark on tikv_data_dir disk\n shell: \"cd {{ fio_deploy_dir }} && ./fio -ioengine=psync -bs"
},
{
"path": "roles/machine_benchmark/tasks/fio_randread_write.yml",
"chars": 2643,
"preview": "---\n\n- name: fio mixed randread and sequential write benchmark on tikv_data_dir disk\n shell: \"cd {{ fio_deploy_dir }} &"
},
{
"path": "roles/machine_benchmark/tasks/fio_randread_write_latency.yml",
"chars": 2813,
"preview": "---\n\n- name: fio mixed randread and sequential write benchmark for latency on tikv_data_dir disk\n shell: \"cd {{ fio_dep"
},
{
"path": "roles/machine_benchmark/tasks/main.yml",
"chars": 686,
"preview": "---\n\n- name: create fio and tikv data directories\n file:\n path: '{{ item }}'\n state: directory\n mode: 0755\n "
},
{
"path": "roles/node_exporter/defaults/main.yml",
"chars": 154,
"preview": "---\n\n# default configuration for node_exporter\n\nnode_exporter_log_level: info\nnode_exporter_log_filename: \"node_exporter"
},
{
"path": "roles/node_exporter/meta/main.yml",
"chars": 40,
"preview": "---\n\ndependencies:\n - role: common_dir\n"
},
{
"path": "roles/node_exporter/tasks/binary_deployment.yml",
"chars": 591,
"preview": "---\n\n- name: create deploy directories\n file: path={{ item }} state=directory mode=0755\n with_items:\n - \"{{ node_expo"
},
{
"path": "roles/node_exporter/tasks/docker_deployment.yml",
"chars": 614,
"preview": "---\n\n- name: deploy node-exporter image\n copy: src=\"{{ downloads_dir }}/node-exporter.tar\" dest=\"{{ deploy_dir }}/image"
},
{
"path": "roles/node_exporter/tasks/main.yml",
"chars": 190,
"preview": "---\n\n- include_tasks: \"{{ deployment_method }}_deployment.yml\"\n\n- name: prepare firewalld white list\n set_fact:\n fir"
},
{
"path": "roles/node_exporter/tasks/supervise_deployment.yml",
"chars": 165,
"preview": "---\n\n- name: deploy supervise\n include_role:\n name: supervise\n vars:\n this_role_name: node_exporter\n service_"
},
{
"path": "roles/node_exporter/tasks/systemd_deployment.yml",
"chars": 161,
"preview": "---\n\n- name: deploy systemd\n include_role:\n name: systemd\n vars:\n this_role_name: node_exporter\n service_name"
},
{
"path": "roles/node_exporter/templates/run_node_exporter_binary.sh.j2",
"chars": 620,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\n# WARNING: This file was auto-generated. Do not edit!\n# All your edit mig"
},
{
"path": "roles/node_exporter/templates/run_node_exporter_docker.sh.j2",
"chars": 346,
"preview": "#!/bin/bash\nset -e\nulimit -n 1000000\n\n# WARNING: This file was auto-generated. Do not edit!\n# All your edit mig"
},
{
"path": "roles/ops/tasks/main.yml",
"chars": 285,
"preview": "---\n- name: create check_tikv.sh script\n template:\n src: \"check_tikv.sh.j2\"\n dest: \"{{ playbook_dir }}/scripts/ch"
},
{
"path": "roles/ops/templates/check_tikv.sh.j2",
"chars": 631,
"preview": "#!/bin/bash\n{% if enable_tls|default(false) %}\n{{ resources_dir }}/bin/pd-ctl store -d -u https://{{ groups.pd_servers[0"
},
{
"path": "roles/ops/templates/pd-ctl.sh.j2",
"chars": 416,
"preview": "#!/bin/bash\n{% if enable_tls|default(false) %}\n{{ resources_dir }}/bin/pd-ctl -u https://{{ groups.pd_servers[0] }}:{{ h"
},
{
"path": "roles/pd/defaults/main.yml",
"chars": 339,
"preview": "---\n\npd_client_port: 2379\npd_peer_port: 2380\n\npd_name_prefix: pd\npd_scheme: http\n\npd_data_dir: \"{{ deploy_dir }}/data.pd"
},
{
"path": "roles/pd/files/make-ssl.sh",
"chars": 2612,
"preview": "#!/bin/bash\n\n# Author: Smana smainklh@gmail.com\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# yo"
},
{
"path": "roles/pd/meta/main.yml",
"chars": 40,
"preview": "---\n\ndependencies:\n - role: common_dir\n"
},
{
"path": "roles/pd/tasks/binary_deployment.yml",
"chars": 632,
"preview": "---\n\n- name: deploy binary\n copy: src=\"{{ resources_dir }}/bin/pd-server\" dest=\"{{ deploy_dir }}/bin/\" mode=0755 backup"
},
{
"path": "roles/pd/tasks/check_certs.yml",
"chars": 2761,
"preview": "---\n\n- name: \"Check_certs | check if the certs have already been generated on control machine\"\n find:\n paths: \"{{ ce"
},
{
"path": "roles/pd/tasks/docker_deployment.yml",
"chars": 698,
"preview": "---\n\n- name: create log directory\n file: path=\"{{ item }}\" state=directory mode=0755\n with_items:\n - \"{{ pd_docker_lo"
},
{
"path": "roles/pd/tasks/gen_certs.yml",
"chars": 744,
"preview": "---\n\n- name: Gen_certs | copy certs generation script\n copy:\n src: \"make-ssl.sh\"\n dest: \"{{ script_dir }}/make-ss"
},
{
"path": "roles/pd/tasks/install_certs.yml",
"chars": 442,
"preview": "---\n\n- name: \"Deploy_certs | Make sure the certificate directory exits\"\n file:\n path: \"{{ pd_cert_dir }}\"\n state:"
},
{
"path": "roles/pd/tasks/main.yml",
"chars": 1848,
"preview": "---\n# tasks file for pd\n\n- name: create deploy directories\n file: path={{ item }} state=directory mode=0755\n with_item"
},
{
"path": "roles/pd/tasks/supervise_deployment.yml",
"chars": 139,
"preview": "---\n\n- name: deploy supervise\n include_role:\n name: supervise\n vars:\n this_role_name: pd\n service_name: pd-{{"
}
]
// ... and 179 more files (download for full content)
About this extraction
This page contains the full source code of the pingcap/tidb-ansible GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 379 files (3.7 MB), approximately 984.2k tokens, and a symbol index with 112 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.