Repository: credativ/elephant-shed Branch: rel1_3 Commit: 0e1afe36aa1e Files: 168 Total size: 899.6 KB Directory structure: gitextract_fgr39gxo/ ├── .gitignore ├── .gitlab-ci.yml ├── .vimrc ├── LICENSE ├── Makefile ├── README.md ├── ci/ │ ├── .gitignore │ ├── ansible.cfg │ ├── build-grafana.sh │ ├── credativDeutschlandServerCA-chain.pem │ ├── inventory │ ├── openstack_remove_instance_if_exists.sh │ ├── openstackrc.sh │ ├── rebuild.sh │ └── setup-gitlab-runner ├── cockpit/ │ ├── cockpit.conf │ └── port.conf ├── debian/ │ ├── .gitignore │ ├── changelog │ ├── clean │ ├── compat │ ├── control │ ├── copyright │ ├── elephant-shed-cockpit.install │ ├── elephant-shed-cockpit.postinst │ ├── elephant-shed-grafana.install │ ├── elephant-shed-grafana.postinst │ ├── elephant-shed-omnidb.install │ ├── elephant-shed-omnidb.postinst │ ├── elephant-shed-pgbackrest.install │ ├── elephant-shed-pgbackrest.postinst │ ├── elephant-shed-pgbadger.dirs │ ├── elephant-shed-pgbadger.install │ ├── elephant-shed-pgbadger.postinst │ ├── elephant-shed-portal.install │ ├── elephant-shed-portal.lintian-overrides │ ├── elephant-shed-portal.postinst │ ├── elephant-shed-postgresql.install │ ├── elephant-shed-postgresql.postinst │ ├── elephant-shed-powa.install │ ├── elephant-shed-powa.postinst │ ├── elephant-shed-prometheus-node-exporter.install │ ├── elephant-shed-prometheus-node-exporter.mainscript │ ├── elephant-shed-prometheus-node-exporter.postinst │ ├── elephant-shed-prometheus-sql-exporter.install │ ├── elephant-shed-prometheus-sql-exporter.lintian-overrides │ ├── elephant-shed-prometheus-sql-exporter.maintscript │ ├── elephant-shed-prometheus-sql-exporter.postinst │ ├── elephant-shed-prometheus-sql-exporter.postrm │ ├── elephant-shed-prometheus.install │ ├── elephant-shed-prometheus.postinst │ ├── elephant-shed-shellinabox.postinst │ ├── elephant-shed-tmate.install │ ├── elephant-shed-tmate.lintian-overrides │ ├── elephant-shed-tmate.postinst │ ├── rules │ └── source/ │ └── format ├── doc/ │ ├── .gitignore │ ├── Makefile │ ├── chapter/ │ │ ├── components.md │ │ ├── first-steps.md │ │ ├── installation.md │ │ ├── intro.md │ │ ├── issues.md │ │ ├── license.md │ │ ├── support.md │ │ └── users.md │ ├── conf.py │ └── index.rst ├── grafana/ │ ├── README │ ├── dashboard-sed │ ├── datasource_prometheus.yml │ ├── elephant-shed-grafana │ ├── elephant-shed-grafana.conf │ ├── elephant-shed-grafana.ini │ ├── node_overview.json │ ├── node_overview.yml │ ├── postgresql_server_overview.json │ └── postgresql_server_overview.yml ├── image-sources/ │ └── Unbenannt-1.psd ├── node-exporter/ │ ├── elephant-shed-prometheus-node-exporter │ └── elephant-shed-prometheus-node-exporter.conf ├── omnidb/ │ └── wsgi.py ├── pgbackrest/ │ ├── pgbackrest-archivecommand │ ├── pgbackrest-incr@.service │ ├── pgbackrest-incr@.timer │ ├── pgbackrest-run │ ├── pgbackrest-toggle-archiving │ ├── pgbackrest-toggle-archiving@.service │ ├── pgbackrest@.service │ └── pgbackrest@.timer ├── pgbadger/ │ ├── pgbadger-generator │ ├── pgbadger-run │ ├── pgbadger.service │ ├── pgbadger.timer │ └── pgbadger@.service ├── portal/ │ ├── cgi-bin/ │ │ ├── backrest.pl │ │ ├── error.pl │ │ ├── index_footer.pl │ │ ├── index_header.pl │ │ ├── notloggedin.pl │ │ ├── portalmain.pl │ │ └── support.pl │ ├── elephant-shed.conf │ ├── static/ │ │ └── css/ │ │ ├── elephant-shed.css │ │ └── esmenu.css │ └── template/ │ ├── backrest.html │ ├── error.html │ ├── footer.html │ ├── header.html │ ├── notloggedin.html │ ├── portalmain.html │ └── support.html ├── postgresql/ │ ├── elephant-shed.conf │ ├── es_ctlcluster │ └── rsyslog-postgresql-discard.conf ├── powa/ │ └── update-powa-web-config ├── prometheus/ │ ├── elephant-shed-prometheus │ ├── elephant-shed-prometheus.conf │ └── elephant-shed-prometheus.yml ├── rpm/ │ ├── 56-authnz_external.conf │ ├── README │ ├── elephant-shed.spec │ └── tmate.spec ├── sql-exporter/ │ ├── SAMPLE.yml │ ├── activity.yml │ ├── elephant-shed.conf │ ├── io.yml │ ├── prometheus-sql-exporter-restart.service │ ├── prometheus-sql-exporter-restart.timer │ ├── queries.yml │ ├── replication.yml │ ├── update-prometheus-sql-exporter-config │ └── wal.yml ├── tmate/ │ └── tmate.conf └── vagrant/ ├── .gitignore ├── Vagrantfile ├── Vagrantfile.template ├── ansible.cfg ├── elephant-shed.yml ├── files/ │ ├── git/ │ │ └── gitconfig │ ├── grafana-key.10458545.asc │ └── postgresql/ │ ├── ACCC4CF8.asc │ └── pg_hba.conf ├── group_vars/ │ ├── all │ ├── dev │ ├── openpower │ ├── test │ ├── test-with-disks │ └── x86-big ├── handlers.yml ├── inventory ├── openstack/ │ └── Vagrantfile ├── tasks/ │ ├── apache2.yml │ ├── config-backup.yml │ ├── config-system.yml │ ├── ferm.yml │ ├── grafana.yml │ ├── packages.yml │ ├── partitioning.yml │ ├── postgresql.yml │ └── repository-local.yml ├── templates/ │ ├── apache2/ │ │ └── mpm_event.conf.j2 │ ├── ferm/ │ │ ├── elephant-shed.conf.j2 │ │ └── ferm.conf.j2 │ ├── postgres/ │ │ └── postgres-limits.conf.j2 │ └── postgresql-common/ │ └── performance-tuning.conf.j2 └── vagrantpackage.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ CONCEPT.html CONCEPT.pdf README.html README.pdf lintian.log *.swp rpm/SOURCES rpm/SPECS rpm/RPMS rpm/SRPMS rpm/BUILD rpm/BUILDROOT ================================================ FILE: .gitlab-ci.yml ================================================ image: "debian:stretch" stages: - docbuild - build - publish # - test - deploy ### build ### docbuild:docs: stage: docbuild image: "tianon/latex" script: - apt-get update -qq - apt-get install -y --no-install-recommends make sphinx-common python3-sphinx python3-recommonmark python3-sphinx-rtd-theme - make docs artifacts: paths: - doc/_build/html build:stretch: &build stage: build script: - apt-get update -qq - apt-get -y --no-install-recommends build-dep ./ - apt-get -y --no-install-recommends install devscripts - ci/rebuild.sh $dist $is_release - make deb BUILD_ARGS=-Pstretch - mkdir -p debs - mv ../*.* debs variables: dist: deb9 artifacts: expire_in: 2 weeks paths: - debs/* - lintian.log build:stretch-release: <<: *build variables: dist: deb9 is_release: release when: manual build:buster: &build_buster <<: *build image: "debian:buster" script: - apt-get update -qq - apt-get -y --no-install-recommends build-dep ./ - apt-get -y --no-install-recommends install devscripts - ci/rebuild.sh $dist $is_release - make deb - mkdir -p debs - mv ../*.* debs variables: dist: deb10 build:buster-release: <<: *build_buster variables: dist: deb10 is_release: release when: manual build:bionic: &build_bionic <<: *build_buster variables: dist: ubuntu18.04 build:bionic-release: <<: *build_bionic variables: dist: ubuntu18.04 is_release: release when: manual build:centos7: &build_centos7 image: "centos:centos7" stage: build dependencies: - docbuild:docs script: - yum install -y rpm-build yum-utils git make - yum-builddep -y rpm/elephant-shed.spec - make rpmbuild PACKAGE_RELEASE=1~$(date -u +%Y%m%d.%H%M%S) artifacts: expire_in: 2 weeks paths: - rpm/RPMS/*/* - rpm/SRPMS/* build:centos7-release: <<: *build_centos7 script: - yum install -y rpm-build yum-utils git make - yum-builddep -y rpm/elephant-shed.spec - make rpmbuild when: manual ### publish ### publish:stretch: &publish tags: - aptly stage: publish dependencies: - build:stretch script: - aptly repo add $repo debs/*.dsc debs/*.deb - aptly publish update $repo - sync_packages.sh variables: repo: stretch-test only: - master - rel1_3 publish:stretch-release: <<: *publish dependencies: - build:stretch-release variables: repo: stretch-stable when: manual publish:buster: &publish_buster <<: *publish dependencies: - build:buster variables: repo: buster-test publish:buster-release: <<: *publish_buster dependencies: - build:buster-release variables: repo: buster-stable when: manual publish:bionic: &publish_bionic <<: *publish dependencies: - build:bionic variables: repo: bionic-test publish:bionic-release: <<: *publish_bionic dependencies: - build:bionic-release variables: repo: bionic-stable when: manual publish:centos7: &publish_centos <<: *publish dependencies: - build:centos7 script: - rpm --addsign rpm/RPMS/noarch/*.rpm rpm/SRPMS/*.rpm - cp rpm/RPMS/noarch/*.rpm /home/debian/postgresql/yum/$repo/noarch - cp rpm/SRPMS/*.rpm /home/debian/postgresql/yum/$repo/src - cd /home/debian; make $repo variables: repo: el7-test publish:centos7-release: <<: *publish_centos dependencies: - build:centos7-release variables: repo: el7 when: manual ### deploy ### # requires gitlab-runner to be configured to execute shell commands as root: # rm /home/gitlab-runner/.bash_logout # systemctl edit gitlab-runner # [Service] # ExecStart= # ExecStart=/usr/bin/gitlab-runner "run" "--working-directory" "/home/gitlab-runner" "--config" "/etc/gitlab-runner/config.toml" "--service" --user root deploy:stretch-x86: &deploy stage: deploy dependencies: - publish:stretch tags: - stretch-amd64 script: - 'test -f /etc/apt/sources.list.d/credativ_postgresql.list || echo "deb https://packages.credativ.com/public/postgresql/ $(lsb_release -cs)-test main" > https://packages.credativ.com/public/postgresql/' - apt-get update -q #- apt-get install -y ansible - apt-get install -y 'elephant-shed.*' only: - master - rel1_3 environment: name: elephant-shed url: https://elephant-shed.dev.credativ.de deploy:buster-x86: <<: *deploy dependencies: - publish:buster tags: - buster-amd64 environment: name: elephant-shed-buster url: https://elephant-shed-buster.dev.credativ.de deploy:bionic-x86: <<: *deploy dependencies: - publish:bionic tags: - bionic-amd64 environment: name: elephant-shed-bionic url: https://elephant-shed-bionic.dev.credativ.de deploy:centos7-x86: <<: *deploy stage: deploy dependencies: - build:centos7 tags: - centos7-x86 script: - 'test -f /etc/yum.repos.d/pgdg-redhat-all.repo || yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm' - 'test -f /etc/yum.repos.d/credativ-test.repo || yum install -y https://packages.credativ.com/public/postgresql/yum/credativ-test-repo.rpm' - yum clean expire-cache - yum update -y - yum install -y elephant-shed environment: name: elephant-shed-centos7 url: https://elephant-shed-centos7.dev.credativ.de ================================================ FILE: .vimrc ================================================ set sw=2 ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: Makefile ================================================ # Default git branch to build in RPM ifndef GITBRANCH GITBRANCH=HEAD endif all: docs doc docs: $(MAKE) -C doc html deb: dpkg-buildpackage -us -uc $(BUILD_ARGS) lintian | tee -a lintian.log upload_packages: deb aptly/upload_packages.sh include_packages: aptly/include_packages.sh publish_packages: aptly/publish_packages.sh vagrant: deb cd vagrant && vagrant up --provision ansible: deb cd vagrant && ./elephant-shed.yml $(ANSIBLE_ARGS) deploy_openpower: vagrant/inventory.openpower cd vagrant && ./elephant-shed.yml $(ANSIBLE_ARGS) \ -i inventory.openpower \ -e "repo=http" clean: $(MAKE) -C doc clean $(MAKE) -C grafana clean rm -rf rpm/SOURCES/ rpm/SPECS rpm/BUILD rpm/BUILDROOT rpm/RPMS rpm/SRPMS # rpm DPKG_VERSION=$(shell sed -ne '1s/.*(//; 1s/).*//p' debian/changelog) PACKAGE_RELEASE=1 RPMDIR=$(CURDIR)/rpm TARBALL=$(RPMDIR)/SOURCES/elephant-shed_$(DPKG_VERSION).tar rpmbuild: $(TMATESOURCE) $(TARBALL).xz rpmbuild -D"%_topdir $(RPMDIR)" --define='package_version $(DPKG_VERSION)' --define='package_release $(PACKAGE_RELEASE)' -ba rpm/elephant-shed.spec tarball $(TARBALL).xz: mkdir -p $(dir $(TARBALL)) rm -f $(TARBALL).xz git archive --prefix=elephant-shed-$(DPKG_VERSION)/ $(GITBRANCH) > $(TARBALL) # include pre-built documentation in tarball tar --append --transform "s!^!elephant-shed-$(DPKG_VERSION)/!" -f $(TARBALL) doc/_build/html xz $(TARBALL) # tmate rpm TMATESOURCE=$(CURDIR)/rpm/SOURCES/$(shell rpmspec --srpm --query --queryformat '%{Source}' rpm/tmate.spec) rpmbuild-tmate: $(TMATESOURCE) rpmbuild -D"%_topdir $(RPMDIR)" --define='package_version $(DPKG_VERSION)' -ba rpm/tmate.spec $(TMATESOURCE): mkdir -p rpm/SOURCES spectool -S -g -C rpm/SOURCES rpm/tmate.spec ================================================ FILE: README.md ================================================ # Intro Elephant Shed is a web-based PostgreSQL management front-end that bundles several utilities and applications for use with PostgreSQL. It currently manages single-node Linux PostgreSQL servers and appliances. The main components are: * PostgreSQL - * OmniDB - * postgresql-common - * pgBadger - * pgBackRest - * Grafana - * Prometheus - * Cockpit - * Shell In A Box - * tmate - All PostgreSQL versions are supported via . Supported distributions: * Debian Trixie (13) * Debian Bookworm (12) * Ubuntu Noble (24.04) * Ubuntu Jammy (22.04) # Download Packages, Vagrant boxes, images: # Documentation The documentation is located at # Screenshots ## Portal The main portal webpage has links to all bundled applications. It also shows the PostgreSQL status, and allows starting/stopping the clusters, backup, and log reports. ![Figure: Elephant Shed portal](images/el-portal.png) ## Cockpit For managing clusters and services, Cockpit is used. ![Figure: Cockpit Cluster Service](images/cockpit_postgresql_service.png) ## Monitoring - Prometheus Prometheus is a metric based monitoring system for servers and services. It collects metrics from configured targets at given intervals, evaluates rule expressions, displays the results, and can trigger alerts if some condition is observed to be true. ![Figure: Prometheus graphing the load](images/prometheus-load.png) ## Monitoring - Grafana Grafana bundles metrics collected by Prometheus and presents them in a dashboard. ![Figure: Grafana - PostgreSQL Server Overview](images/grafana-overview.png) ## DBA Tool - OmniDB OmniDB is a management tool for PostgreSQL to help DBAs execute many different tasks. It provides user management, DDL functionality, an interactive SQL shell, and more. ![Figure: OmniDB](images/omnidb.png) ## Backup - pgBackRest The Elephant Shed comes with a preinstalled backup solution, *pgBackRest*. ![Figure: pgBackRest Backup via Cockpit](images/el-backrest-start.png) ## Reporting - pgBadger A pgBadger report is created for each PostgreSQL instance. ![Figure: pgBadger overview](images/pgbadger-overview.png) ## Web Terminal - Shell In A Box Shell In A Box is a convenient web based terminal. ## Remote Control - tmate tmate is a fork of the popular terminal multiplexer tmux, tweaked for instant terminal sharing. It is preconfigured to connect to a relay server and enables the user to share the current terminal with a third party using a secret token. ![Figure: tmate](images/tmate.png) # License The Elephant Shed itself is licensed under the GPLv3 (). All bundled components are Free/Open-Source software with a known and approved open source license. # Support * Documentation: * Web-Chat: [#elephant-shed](https://webchat.oftc.net/?nick=web-user-.&channels=elephant-shed&uio=MT11bmRlZmluZWQmMj10cnVlJjk9dHJ1ZSYxMT0yMzY31) * IRC [#elephant-shed](https://webchat.oftc.net/?channels=elephant-shed&uio=MT11bmRlZmluZWQmMj10cnVlJjk9dHJ1ZSYxMT0yMzY31 ) on [irc.oftc.net](https://www.oftc.net/) Elephant Shed is an open source project, developed and maintained by credativ. For the Elephant Shed PostgreSQL appliance, credativ offers comprehensive technical support with service level agreements, which are also available on 365 days a year and 24 hours a day as an option. Installation and integration support, as well as an introduction in Elephant Shed PostgreSQL appliance is of course also part of credativ's services. If you are interested, please feel free to contact us. ![](images/logo_credativ_96.png) * **Web** [credativ.de](https://credativ.de) * **E-Mail:** [info@credativ.de](mailto:info@credativ.de) * **Phone:** [+49 2161 9174200](tel:+4921619174200) ================================================ FILE: ci/.gitignore ================================================ fact_cache/ *.retry ================================================ FILE: ci/ansible.cfg ================================================ [defaults] hostfile = inventory nocows = True gathering = smart fact_caching = jsonfile fact_caching_connection = fact_cache [ssh_connection] pipelining = True ================================================ FILE: ci/build-grafana.sh ================================================ #!/bin/bash # This script should be initiated inside the go src path of grafana! # For more information on building grafana "the grafana-way" see # https://github.com/grafana/grafana-packer. sudo apt install golang npm gem ruby ruby-dev sudo npm install -g yarn # This fails on ppc64el, since fpm isn't available on that platform. sudo gem install fpm -v 1.4 go run build.go build yarn install # Because we don't have fpm on ppc64el the following step fails # horible on this architecture. go run build.go pkg-deb latest ================================================ FILE: ci/credativDeutschlandServerCA-chain.pem ================================================ Subject: CN=credativ Root CA,O=credativ,C=DE Issuer: CN=credativ Root CA,O=credativ,C=DE -----BEGIN CERTIFICATE----- MIIDWzCCAkOgAwIBAgIISCM+EhxtDiMwDQYJKoZIhvcNAQELBQAwOzEZMBcGA1UE AwwQY3JlZGF0aXYgUm9vdCBDQTERMA8GA1UECgwIY3JlZGF0aXYxCzAJBgNVBAYT AkRFMB4XDTE0MDgxODEwMDkyNVoXDTI0MDgxNTEwMDkyNVowOzEZMBcGA1UEAwwQ Y3JlZGF0aXYgUm9vdCBDQTERMA8GA1UECgwIY3JlZGF0aXYxCzAJBgNVBAYTAkRF MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAjdd8jPo/DZYagM0gCbjD Qq6EBkHbeGbQPDQLoSt/M6xTP6CnHG3tBqG3gFRpjFAogn9eTGXFLvT+tGAvWHeY GQqT7Jwn/Utezuskl9M87O+QVPwwQfDus2+L98b4cBzs24t+jPqMe8Ucs7AsFDFj fHWRGh1a7lRvXwlcJzGT063MyC4awu8Ak7zxRI6h0/Six+h5v73OwOnW8jkkGNvS RNxgy3nkGdsUyculMhdNHx8CZm5SGmjOlNX9jolvfyRfCUGSOs4/MwKlB/kTFZsy YbEXhepa/58jf5XoU5vZoDUW7S3AcyW9luyH99vXokqDuUyjyHcBdCEawP+XNpBo XwIDAQABo2MwYTAdBgNVHQ4EFgQUukB/2X0jBvlK6IqQNEc1dSOZsA4wDwYDVR0T AQH/BAUwAwEB/zAfBgNVHSMEGDAWgBS6QH/ZfSMG+UroipA0RzV1I5mwDjAOBgNV HQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggEBAAEdlbkVPMRyGDSJqGphq3aM G9ksmuYUr64uoNydrnD64TT5mCmfNFYGXLYBIe0V5ZzgAUigoSipU0WUQFz0EKWX o5WYa1RcnzGKsphieafekGBA4GnSrMIuxN29ZckOvT2gZMTpM9X381aY9Slm6TOP a7XCobZDEZwBR4bmZZeJ8GOseWUHWK+Ddr+m38iUh0QqSMgfv5Iyzo0YrrFXl9FF ORYDcdKSpMQcvKM+XVLNgLqGG+1DZEwDH2Lo87OHaQzmZfkfS2wFcTzVCLJW2z0T cwgzRQQdEocD72c9QHm3+VYU56Kh5OtZWVVTVofssCBgwJlE0K9EdpKaJe3BEwE= -----END CERTIFICATE----- Subject: CN=credativ Deutschland Server CA,O=credativ,C=DE Issuer: CN=credativ Root CA,O=credativ,C=DE -----BEGIN CERTIFICATE----- MIIDbDCCAlSgAwIBAgIIFgGTWLPv+rkwDQYJKoZIhvcNAQELBQAwOzEZMBcGA1UE AwwQY3JlZGF0aXYgUm9vdCBDQTERMA8GA1UECgwIY3JlZGF0aXYxCzAJBgNVBAYT AkRFMB4XDTE0MTEyNDE0NTEwM1oXDTI0MDgxNTEwMDkyNVowSTEnMCUGA1UEAwwe Y3JlZGF0aXYgRGV1dHNjaGxhbmQgU2VydmVyIENBMREwDwYDVQQKDAhjcmVkYXRp djELMAkGA1UEBhMCREUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCR J/A2KvEBeoBR0cbb2sZadbzfg9+f/h7OozuW4/X6g1ciBKzC1eaU+eJnHjS3uZ2j oVd2Q94icuRV+LesA1vlVNU6e/ZsOQg2HJIxEDBNK1lbsIVKm0hdMZsE/jD6Jmcj v712331zu/33+PUXtbEgLBZS8rwQROgGwMV2Jmf/qamxiyhjqU805ONdafgypHKt zYkdJ1yUnvBnWpYYZ4DKmQSwSFTgxtH3xctvD3v8uTmR/KUmYlQsRvt4wXSUY3dE U+WiwBl1kepzvmh/fCGoVmrpCSJjp5+FD82Q6XbNzHHOJZ9P5+z34BUmvirDBmHP 5wZOkOYZKS2PCVjG9rv7AgMBAAGjZjBkMB0GA1UdDgQWBBSB3bLMhsu4ZBqT2r0w p3oMcyuaGzASBgNVHRMBAf8ECDAGAQH/AgEAMB8GA1UdIwQYMBaAFLpAf9l9Iwb5 SuiKkDRHNXUjmbAOMA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEA c+AW1b36jXmCZu3npwxTACfY8M04Mg1NjI5pEK3nI9Tj74rkCOYY0HJ8I1X4f3SR d0mrc6RXlU6jpFriBiGduU9GTMdeNkDNCHqbiuv56Saa8wosQmVTuXQk7XP7w2i3 p0nt7u1yJBIzny6tSQdSRgq/ryWlye1byX9SAiMEZGrasz/CnumZbuCP8rWN+PGw zWZxd6ZVgZJoR0ZPGqhuT4aRLkSbyEUMbP9xL8wJ/gDhZTvamgGdNkLSAYpNzio+ DKntJkpqXncMMSBi0pwnhK8JbXFXTvU0Qf/WEAEE1bAWqlxPku4XezFrYbAFhm0D f9YYoPUb3I1/K5+erEBADw== -----END CERTIFICATE----- ================================================ FILE: ci/inventory ================================================ elephant-shed.dev.credativ.lan ansible_host=172.26.251.138 ansible_user=debian ================================================ FILE: ci/openstack_remove_instance_if_exists.sh ================================================ #!/usr/bin/env bash # This script assumes that all OS_* variables are set and valid. set -u if [ $# -lt 1 ]; then cat <&2 Usage: $0 Where could be either a machine name or UUID. EOF exit 1 fi INSTANCE=$1 # Better save than sorry. test -n $INSTANCE if [ $? -ne 0 ]; then echo "Instance variable is empty, not going any further." 1>&2 exit 2 fi which openstack > /dev/null if [ $? -ne 0 ]; then echo "Could not find openstack client utilities." 1>&2 exit 3 fi openstack server list | grep -q " $INSTANCE " if [ $? -ne 0 ]; then echo "Instance \"$INSTANCE\" does not exist, exiting." exit 0 fi # If we get here a instance with name / UUID $INSTANCE does exist. openstack server delete "$INSTANCE" if [ $? -ne 0 ]; then echo "Something went wrong removing the instance \"$INSTANCE\"." 1>&2 exit 4 fi echo "Instance \"$INSTANCE\" removed successfully." ================================================ FILE: ci/openstackrc.sh ================================================ #!/usr/bin/env bash # NOTE: We get the passwort from gitlab-ci. export OS_AUTH_URL=https://nova.credativ.com:5000/v2.0 export OS_TENANT_ID=27498e6ffaca49f7b57a14ef505e3098 export OS_TENANT_NAME="database" unset OS_PROJECT_ID unset OS_PROJECT_NAME unset OS_USER_DOMAIN_NAME export OS_USERNAME="app_dbteam" export OS_REGION_NAME="regionOne" if [ -z "$OS_REGION_NAME" ]; then unset OS_REGION_NAME; fi # Use our own certificate export OS_CACERT="$(dirname $0)/credativDeutschlandServerCA-chain.pem" ================================================ FILE: ci/rebuild.sh ================================================ #!/bin/bash # This script prepares a rebuild of our debian package for testing purposes. set -eu DISTTAG="$1" IS_RELEASE_BUILD="${2:-}" # leave empty for CI build export DEBFULLNAME="credativ GmbH" export DEBEMAIL="dbteam@credativ.com" orig_version=$(dpkg-parsechangelog -S version) if [ "$IS_RELEASE_BUILD" ]; then new_version="${orig_version}~$DISTTAG+1" else date="$(date -u +%Y%m%d.%H%M%S)" new_version="${orig_version}~$DISTTAG~${date}" fi dch --force-bad-version -v ${new_version} "Automatic CI rebuild" dch -r "Build for $DISTTAG" ================================================ FILE: ci/setup-gitlab-runner ================================================ #!/usr/bin/ansible-playbook --- - name: setup-gitlab-runner hosts: all become: true tasks: - name: Install apt-transport-https apt: name: apt-transport-https install_recommends: false - name: gitlab-runner repository key apt_key: url: https://packages.gitlab.com/runner/gitlab-ci-multi-runner/gpgkey id: 14219A96E15E78F4 - name: gitlab-runner repository apt_repository: repo: "deb https://packages.gitlab.com/runner/gitlab-ci-multi-runner/debian/ stretch main" filename: 'gitlab-runner' - name: Install gitlab-ci-multi-runner apt: name: gitlab-ci-multi-runner install_recommends: false - name: Add gitlab-runner to sudo group user: name: gitlab-runner groups: sudo - name: allow sudo without password lineinfile: dest: /etc/sudoers line: '%sudo ALL=(ALL:ALL) NOPASSWD: ALL' regexp: '^%sudo' ================================================ FILE: cockpit/cockpit.conf ================================================ [WebService] # Origin is hardcoded to localhost in apache config Origins = https://localhost https://localhost:10090 ================================================ FILE: cockpit/port.conf ================================================ [Socket] ListenStream= ListenStream=10090 ================================================ FILE: debian/.gitignore ================================================ *.debhelper debhelper-build-stamp elephant-shed*.debhelper.log elephant-shed*.substvars elephant-shed*/ files ================================================ FILE: debian/changelog ================================================ elephant-shed (1.3.12) unstable; urgency=medium * sql-exporter: add io/wal and primary/replica metrics. * Grafana: add io/wal and role (primary/replica) panels. * ansible: bump default Postgres version to 17. -- Michael Banck Wed, 21 Jan 2026 16:27:00 +0100 elephant-shed (1.3.11) unstable; urgency=medium * node-exporter: listen on all interfaces. * prometheus: make configuration extendable. * {node,sql}-exporter: add host-specific scrape-targets. * Dashboard: enable multi-host monitoring, overhaul system panels and ship separate node overview dashboard. -- Michael Banck Wed, 29 Oct 2025 14:16:25 +0100 elephant-shed (1.3.10) unstable; urgency=medium * Grafana: small improvements to dashboard. * ansible: bump default Postgres version to 16. -- Michael Banck Wed, 25 Jun 2025 08:49:24 +0200 elephant-shed (1.3.9) unstable; urgency=medium [ Michael Banck ] * Overhaul Grafana dashboard. * Add work-around to sql-exporter instant queries no longer being supported upstream. [ Nicolas Dandrimont ] * Make sql-exporter only generate instant job if there are instant queries. -- Michael Banck Mon, 18 Nov 2024 17:06:10 +0100 elephant-shed (1.3.8) unstable; urgency=medium * Fix sql-exporter queries on Debian 12 / Ubuntu 22.04. * Fix elephant-shed-cockpit postinst. * Add enable-backup and enable-powa-addons features to es_ctlcluster. * Replaced shellinabox with Cockpit's terminal. * Remove vendorized javascript libraries from portal. -- Michael Banck Wed, 28 Feb 2024 09:41:48 +0100 elephant-shed (1.3.7) unstable; urgency=medium * Finish PoWA integration. * Add es_ctlcluster script to enable PoWA in PG clusters. -- Christoph Berg Thu, 30 Jun 2022 16:58:35 +0200 elephant-shed (1.3.6) unstable; urgency=medium [ Christoph Berg ] * Add PoWA integration. [ Michael Banck ] * node-exporter: Remove obsolete --collector.netdev.ignored-devices option. * Fix pg_stat_statements sql-exporter query. -- Christoph Berg Tue, 31 May 2022 12:29:23 +0200 elephant-shed (1.3.5) unstable; urgency=medium * Generate dependency on tmate >= << 2.4 automatically. * debian/clean: Remove build artifacts. * Revert path activation for sql-exporter, doesn't work reliably. * Restart sql-exporter when postgresql.service is started. * Start omnidb-server once to initialize /var/lib/omnidb/.omnidb/. -- Christoph Berg Thu, 29 Jul 2021 15:10:20 +0200 elephant-shed (1.3.4) unstable; urgency=medium * Move from pgadmin4 to OmniDB; pgadmin4 is no longer provided on apt.postgresql.org. * Grafana: Fix syntax in datasource provisioning yaml. * Grafana: Display only the currently selected database(s) in the Freeze Age panel. * ansible: restart prometheus-sql-exporter after postgres cluster creation. * ansible: skip sysctl, io scheduler an grub tasks in LXC containers. * sql-exporter: Add final newline to queries.yml. * Require tmate >= 2.4 since the hostkey syntax changed. [ Nicolas Dandrimont ] * Update pg_stat_statements SQL exporter query for Postgres 13 -- Christoph Berg Mon, 01 Mar 2021 11:44:22 +0100 elephant-shed (1.3.3) unstable; urgency=medium * Added limits to monitoring-queries. * Fixed "Column 'usename' must be type text (string)". -- Christoph Berg Thu, 09 Jan 2020 14:23:31 +0100 elephant-shed (1.3.2) unstable; urgency=medium * Grafana: Fix "WAL Senders"-panel. * Build infrastructure updates. -- Christoph Berg Tue, 06 Aug 2019 15:38:38 +0200 elephant-shed (1.3.1) unstable; urgency=medium * update-prometheus-sql-exporter-config: Support setting interval. Patch by Nicolas Dandrimont, thanks! * e-s-postgresql.postinst: Don't fail if logrotate file was removed. * When building for packages.credativ.com, add dependency on credativ-postgresql-keyring package. -- Christoph Berg Thu, 02 May 2019 16:00:09 +0200 elephant-shed (1.3) unstable; urgency=medium * Start prometheus-sql-exporter-restart.timer on package installation. * Node exporter 0.16 renames many variables, update dashboard. * elephant-shed-postgresql: Stop installing custom logrotate file. * postgresql: Don't set waldir and data checksums by default. * portal: Use AuthExternal pwauth instead of the buggy authnz-pam module. * portal: Use HTTP_HOST instead of SSL_TLS_SNI in cockpit's CSP header. -- Christoph Berg Fri, 07 Dec 2018 16:39:19 +0100 elephant-shed (1.2) unstable; urgency=medium * This release bundles a series of breaking changes: + Upgrade to grafana 5 and prometheus 2. (Existing monitoring data is not migrated.) + Upgrade to prometheus-node-exporter 0.15. + Rewrite update-prometheus-sql-exporter-config using PgCommon.pm, and refactor its config file handling. * prometheus: Use 'localhost' as job_name to bundle this machine's node and sql exporter results. 'sql_job' contains the postgresql-common cluster name now. * The grafana dashboard has been updated to use rate() instead of increase() in most panels to show values per second or minute. -- Christoph Berg Fri, 18 May 2018 17:06:21 +0200 elephant-shed (1.1.1) unstable; urgency=medium [ Adrian Vondendriesch ] * Split prometheus exporters into own packages. [ Alexander Sosna ] * Change docs build process to sphinx * grafana: Fix "Dead Tuples" * grafana: Fix "Locks" * grafana: Change limits for "Next Freeze" * grafana: Add tool tips -- Adrian Vondendriesch Thu, 25 Jan 2018 17:00:25 +0100 elephant-shed (1.0) unstable; urgency=medium [ Christoph Berg ] * cockpit: Add cockpit-packagekit to Recommends. [ Adrian Vondendriesch ] * prometheus: Add sudo to dependencies. * portal: replace the way we show backups. * portal: rework UI, introduce bootstrap. [ Alexander Sosna ] * grafana: Fix for queries for Prometheus 2.0 * portal: Use new Icons [ Caique de Castro ] * portal: Fix link to Grafana -- Adrian Vondendriesch Mon, 16 Oct 2017 18:54:10 +0200 elephant-shed (0.9) unstable; urgency=medium * Team upload. * Initial release. -- Adrian Vondendriesch Mon, 16 Oct 2017 17:38:41 +0200 ================================================ FILE: debian/clean ================================================ ci/fact_cache/ ci/*.retry doc/_build/ rpm/BUILD/ rpm/BUILDROOT/ rpm/RPMS/ rpm/SOURCES/ rpm/SPECS/ rpm/SRPMS/ vagrant/fact_cache/ vagrant/*.retry vagrant/openstack/fact_cache/ vagrant/.vagrant/ ================================================ FILE: debian/compat ================================================ 10 ================================================ FILE: debian/control ================================================ Source: elephant-shed Section: database Priority: optional Maintainer: credativ GmbH Uploaders: Michael Banck , Adrian Vondendriesch , Standards-Version: 4.1.1 Build-Depends: debhelper (>= 10~), python3-recommonmark, python3-sphinx-rtd-theme, python3-sphinx, sphinx-common, tmate, Package: elephant-shed Architecture: all Depends: elephant-shed-cockpit, elephant-shed-grafana, elephant-shed-omnidb , elephant-shed-pgbackrest, elephant-shed-pgbadger, elephant-shed-portal, elephant-shed-postgresql, elephant-shed-powa, elephant-shed-prometheus, elephant-shed-prometheus-node-exporter, elephant-shed-prometheus-sql-exporter, elephant-shed-tmate, ${keyring:Depends}, ${misc:Depends}, Description: PostgreSQL dashboard The Elephant Shed is a web-based PostgreSQL management front-end. . This meta package depends on all Elephant Shed components. Package: elephant-shed-portal Architecture: all Depends: adduser, apache2, libapache2-mod-authnz-external, libapache2-mod-authz-unixgroup, libapache2-mod-wsgi-py3 , libjs-jquery, libjs-bootstrap, libjs-popper.js, libtemplate-perl, libcgi-pm-perl, pwauth, ${misc:Depends}, Description: PostgreSQL dashboard -- web interface The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the web interface. Package: elephant-shed-postgresql Architecture: all Depends: adduser, postgresql-common (>> 183~), ${misc:Depends}, Description: PostgreSQL dashboard -- PostgreSQL integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with PostgreSQL. Package: elephant-shed-omnidb Architecture: all Build-Profiles: Depends: omnidb-server, ${misc:Depends}, Description: PostgreSQL dashboard -- OmniDB integration The Elephant Shed is a web-based PostgreSQL management front-end. . This meta package provides the integration with OmniDB. Package: elephant-shed-pgbadger Architecture: all Depends: pgbadger (>> 9), postgresql-common, ${misc:Depends}, Description: PostgreSQL dashboard -- pgBadger integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with pgBadger. Package: elephant-shed-pgbackrest Architecture: all Depends: pgbackrest, postgresql-common, ${misc:Depends}, Description: PostgreSQL dashboard -- pgBackRest integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with pgBackRest. Package: elephant-shed-grafana Architecture: all Depends: curl, grafana (>> 5), ${misc:Depends}, Description: PostgreSQL dashboard -- Grafana integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with Grafana. Package: elephant-shed-powa Architecture: all Depends: powa-web, postgresql-common, ${misc:Depends}, Description: PostgreSQL dashboard -- PoWA integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with PoWA. Package: elephant-shed-prometheus Architecture: all Depends: prometheus (>= 2.0), ${misc:Depends}, Description: PostgreSQL dashboard -- Prometheus integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with Prometheus. Package: elephant-shed-prometheus-node-exporter Architecture: all Depends: prometheus-node-exporter (>= 0.16.0), ${misc:Depends}, Breaks: elephant-shed-prometheus (<< 1.1) Replaces: elephant-shed-prometheus (<< 1.1) Description: PostgreSQL dashboard -- Node exporter integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with the Prometheus node exporter. Package: elephant-shed-prometheus-sql-exporter Architecture: all Depends: libyaml-perl, postgresql-common, prometheus-sql-exporter, ${misc:Depends}, Breaks: elephant-shed-prometheus (<< 1.1) Replaces: elephant-shed-prometheus (<< 1.1) Description: PostgreSQL dashboard -- SQL exporter integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with the Prometheus SQL exporter. Package: elephant-shed-cockpit Architecture: all Depends: cockpit, ${misc:Depends}, Recommends: cockpit-packagekit, Description: PostgreSQL dashboard -- cockpit integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with cockpit. Package: elephant-shed-shellinabox Architecture: all Depends: shellinabox, ${misc:Depends}, Description: PostgreSQL dashboard -- shellinabox integration The Elephant Shed is a web-based PostgreSQL management front-end. . This meta package provides the integration with shellinabox. Package: elephant-shed-tmate Architecture: all Depends: tmate ${tmate:Depends}, ${misc:Depends}, Description: PostgreSQL dashboard -- tmate integration The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with tmate. ================================================ FILE: debian/copyright ================================================ Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Files: * Copyright: (c) 2017-2019 credativ GmbH License: GPL-3 This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. . This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. . On Debian systems, the complete text of the GNU General Public License can be found in `/usr/share/common-licenses/GPL-3'. ================================================ FILE: debian/elephant-shed-cockpit.install ================================================ cockpit/cockpit.conf etc/cockpit cockpit/port.conf etc/systemd/system/cockpit.socket.d ================================================ FILE: debian/elephant-shed-cockpit.postinst ================================================ #!/bin/sh set -e case $1 in configure) systemctl daemon-reload # not restarting cockpit here, it is socket-activated if [ -z "$2" ]; then /usr/lib/cockpit/cockpit-certificate-ensure fi systemctl restart cockpit.socket ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-grafana.install ================================================ grafana/datasource_prometheus.yml etc/grafana/provisioning/datasources grafana/elephant-shed-grafana etc/default grafana/elephant-shed-grafana.conf etc/systemd/system/grafana-server.service.d grafana/elephant-shed-grafana.ini etc/grafana grafana/postgresql_server_overview.json usr/share/elephant-shed/grafana grafana/postgresql_server_overview.yml etc/grafana/provisioning/dashboards grafana/node_overview.json usr/share/elephant-shed/grafana grafana/node_overview.yml etc/grafana/provisioning/dashboards ================================================ FILE: debian/elephant-shed-grafana.postinst ================================================ #!/bin/sh set -eu case $1 in configure) # we depend on grafana and it has to be enabled # lintian wants us to use "deb-systemd-helper enable" instead, but that # doesn't work with grafana-server.service being located in /usr/systemd/system systemctl enable grafana-server invoke-rc.d grafana-server restart ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-omnidb.install ================================================ omnidb/wsgi.py /usr/share/elephant-shed/omnidb ================================================ FILE: debian/elephant-shed-omnidb.postinst ================================================ #!/bin/sh set -eu case $1 in configure) echo Starting omnidb-server once to initialize /var/lib/omnidb/.omnidb/omnidb-server/ ... systemctl start omnidb-server sleep 5 systemctl stop omnidb-server systemctl disable omnidb-server ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-pgbackrest.install ================================================ pgbackrest/pgbackrest-run usr/share/elephant-shed pgbackrest/pgbackrest-toggle-archiving usr/share/elephant-shed pgbackrest/pgbackrest-archivecommand usr/share/elephant-shed pgbackrest/pgbackrest@.service lib/systemd/system pgbackrest/pgbackrest-incr@.service lib/systemd/system pgbackrest/pgbackrest-toggle-archiving@.service lib/systemd/system pgbackrest/pgbackrest@.timer lib/systemd/system pgbackrest/pgbackrest-incr@.timer lib/systemd/system ================================================ FILE: debian/elephant-shed-pgbackrest.postinst ================================================ #!/bin/sh set -eu BACKUPDIR="/var/lib/pgbackrest" case $1 in configure) if ! test -e /etc/pgbackrest.conf; then cat > /etc/pgbackrest.conf <<-EOF [global] repo-path=$BACKUPDIR retention-full=3 EOF fi if ! test -d /var/www/html/pgbackrest; then mkdir -p /var/www/html/pgbackrest chown postgres:postgres /var/www/html/pgbackrest fi if ! test -d $BACKUPDIR; then mkdir -p $BACKUPDIR chown postgres:postgres $BACKUPDIR fi ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-pgbadger.dirs ================================================ var/lib/pgbadger ================================================ FILE: debian/elephant-shed-pgbadger.install ================================================ pgbadger/pgbadger-generator lib/systemd/system-generators pgbadger/pgbadger-run usr/share/elephant-shed pgbadger/pgbadger.service lib/systemd/system pgbadger/pgbadger.timer lib/systemd/system pgbadger/pgbadger@.service lib/systemd/system ================================================ FILE: debian/elephant-shed-pgbadger.postinst ================================================ #!/bin/sh set -eu case $1 in configure) if test -d /var/lib/pgbadger; then chown postgres:postgres /var/lib/pgbadger fi ;; purge) rm -rf /var/lib/pgbadger ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-portal.install ================================================ doc/_build/html usr/share/elephant-shed/doc portal/cgi-bin usr/lib portal/elephant-shed.conf etc/apache2/sites-available portal/image usr/share/elephant-shed portal/template usr/share/elephant-shed portal/static usr/share/elephant-shed ================================================ FILE: debian/elephant-shed-portal.lintian-overrides ================================================ elephant-shed-portal: extra-license-file usr/share/doc/elephant-shed-portal/html/_sources/chapter/license.md.txt ================================================ FILE: debian/elephant-shed-portal.postinst ================================================ #!/bin/sh set -eu case $1 in configure) # create elephant-shed group addgroup --system elephant-shed # enable apache modules for module in xml2enc proxy proxy_http proxy_html proxy_connect proxy_wstunnel rewrite deflate headers ssl cgid substitute; do a2enmod --maintmode $module done # enable site a2dissite 000-default a2ensite elephant-shed # restart apache invoke-rc.d apache2 restart ;; remove) # disable site a2dissite elephant-shed ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-postgresql.install ================================================ postgresql/elephant-shed.conf etc/postgresql-common/createcluster.d postgresql/es_ctlcluster usr/bin postgresql/rsyslog-postgresql-discard.conf etc/rsyslog.d/postgresql-discard.conf ================================================ FILE: debian/elephant-shed-postgresql.postinst ================================================ #!/bin/sh set -e case $1 in configure) # make sure postgres is in the shadow group if ! id -Gn postgres | grep -qw shadow; then adduser $quiet postgres shadow invoke-rc.d postgresql restart fi # We deploy a rsyslog rule. invoke-rc.d rsyslog restart # Remove old config file diversion if dpkg --compare-versions "$2" lt-nl "1.2.1"; then SHA1SUM="$(sha1sum /etc/logrotate.d/postgresql-common 2> /dev/null || :)" if [ "${SHA1SUM%% *}" = "6c4c0af32cf873a7050650d0ac85403410714ae7" ]; then rm -fv /etc/logrotate.d/postgresql-common fi dpkg-divert --package elephant-shed-postgresql --rename \ --divert /etc/logrotate.d/postgresql-common.real.disabled \ --remove /etc/logrotate.d/postgresql-common fi ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-powa.install ================================================ powa/update-powa-web-config usr/bin ================================================ FILE: debian/elephant-shed-powa.postinst ================================================ #!/bin/sh set -eu case $1 in configure) update-powa-web-config systemctl restart powa-web ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-prometheus-node-exporter.install ================================================ node-exporter/elephant-shed-prometheus-node-exporter etc/default node-exporter/elephant-shed-prometheus-node-exporter.conf etc/systemd/system/prometheus-node-exporter.service.d ================================================ FILE: debian/elephant-shed-prometheus-node-exporter.mainscript ================================================ mv_conffile /etc/default/elephant-shed-prometheus-node-exporter /etc/default/elephant-shed-prometheus-node-exporter 1.1~ elephant-shed-prometheus mv_conffile /etc/systemd/system/prometheus-node-exporter.service.d/elephant-shed-prometheus-node-exporter.conf /etc/systemd/system/prometheus-node-exporter.service.d/elephant-shed-prometheus-node-exporter.conf 1.1~ elephant-shed-prometheus ================================================ FILE: debian/elephant-shed-prometheus-node-exporter.postinst ================================================ #!/bin/sh set -e HOSTNAME=`hostname -f` case $1 in configure) systemctl daemon-reload deb-systemd-helper enable prometheus-node-exporter.service deb-systemd-invoke restart prometheus-node-exporter if [ ! -e /etc/prometheus/node_exporter/$HOSTNAME.yml ]; then mkdir -p /etc/prometheus/node_exporter echo "- targets:" > /etc/prometheus/node_exporter/$HOSTNAME.yml echo " - $HOSTNAME:9100" >> /etc/prometheus/node_exporter/$HOSTNAME.yml fi ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-prometheus-sql-exporter.install ================================================ sql-exporter/elephant-shed.conf lib/systemd/system/prometheus-sql-exporter.service.d sql-exporter/prometheus-sql-exporter-restart.service lib/systemd/system sql-exporter/prometheus-sql-exporter-restart.timer lib/systemd/system sql-exporter/update-prometheus-sql-exporter-config usr/bin sql-exporter/*.yml etc/prometheus-sql-exporter ================================================ FILE: debian/elephant-shed-prometheus-sql-exporter.lintian-overrides ================================================ # use systemctl directly because deb-systemd-invoke doesn't like static units elephant-shed-prometheus-sql-exporter: maintainer-script-calls-systemctl ================================================ FILE: debian/elephant-shed-prometheus-sql-exporter.maintscript ================================================ mv_conffile /etc/prometheus-sql-exporter.yml.in /etc/prometheus-sql-exporter.yml.in 1.1~ elephant-shed-prometheus ================================================ FILE: debian/elephant-shed-prometheus-sql-exporter.postinst ================================================ #!/bin/sh set -e HOSTNAME=$(hostname -f) case $1 in configure) systemctl daemon-reload systemctl enable prometheus-sql-exporter.service systemctl restart prometheus-sql-exporter.service systemctl enable prometheus-sql-exporter-restart.timer systemctl start prometheus-sql-exporter-restart.timer if [ ! -e /etc/prometheus/sql_exporter/$HOSTNAME.yml ]; then mkdir -p /etc/prometheus/sql_exporter echo "- targets:" > /etc/prometheus/sql_exporter/$HOSTNAME.yml echo " - $HOSTNAME:9237" >> /etc/prometheus/sql_exporter/$HOSTNAME.yml fi ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-prometheus-sql-exporter.postrm ================================================ #!/bin/sh set -e case $1 in purge) rm -f /var/run/postgresql/prometheus-sql-exporter.yml # old file location rm -f /etc/prometheus-sql-exporter.yml ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-prometheus.install ================================================ prometheus/elephant-shed-prometheus etc/default prometheus/elephant-shed-prometheus.conf etc/systemd/system/prometheus.service.d prometheus/elephant-shed-prometheus.yml etc/prometheus ================================================ FILE: debian/elephant-shed-prometheus.postinst ================================================ #!/bin/sh set -e case $1 in configure) systemctl daemon-reload deb-systemd-helper enable prometheus.service deb-systemd-invoke restart prometheus ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-shellinabox.postinst ================================================ #!/bin/sh set -eu case $1 in configure) if ! grep -q '^SHELLINABOX_ARGS.*--localhost-only' /etc/default/shellinabox; then sed -i -e 's/^SHELLINABOX_ARGS.*/SHELLINABOX_ARGS="--no-beep --localhost-only"/' /etc/default/shellinabox invoke-rc.d shellinabox restart fi ;; esac #DEBHELPER# ================================================ FILE: debian/elephant-shed-tmate.install ================================================ tmate/tmate.conf etc ================================================ FILE: debian/elephant-shed-tmate.lintian-overrides ================================================ # symlinks in /etc/skel are copied over to home directories, so must be absolute elephant-shed-tmate: symlink-should-be-relative etc/skel/.tmate.conf /etc/tmate.conf ================================================ FILE: debian/elephant-shed-tmate.postinst ================================================ #!/bin/sh set -eu case $1 in configure) if ! test -e /root/.tmate.conf; then ln -s /etc/tmate.conf /root/.tmate.conf fi ;; esac #DEBHELPER# ================================================ FILE: debian/rules ================================================ #!/usr/bin/make -f TMATE_VERSION = $(shell dpkg-query --showformat '$${Version}' --show tmate) TMATE_24 = $(shell dpkg --compare-versions $(TMATE_VERSION) ge 2.4 && echo yes) ifeq ($(TMATE_24),yes) TMATE_DEPENDS = (>= 2.4) else TMATE_DEPENDS = (<< 2.4) endif # when building for packages.credativ.com (from ci/rebuild.sh), add dependency on credativ-postgresql-keyring ifneq ($(findstring deb, $(shell dpkg-parsechangelog -SVersion))$(findstring ubuntu, $(shell dpkg-parsechangelog -SVersion)),) CREDATIV_KEYRING=-Vkeyring:Depends=credativ-postgresql-keyring endif %: dh $@ --with=systemd override_dh_install: dh_install ifneq ($(TMATE_24),yes) sed -i -e 's/^\(set.*fingerprint.*\)/##\1/' debian/elephant-shed-tmate/etc/tmate.conf sed -i -e 's/^#\(set.*fingerprint.*\)/\1/' debian/elephant-shed-tmate/etc/tmate.conf endif ifneq ($(filter stretch,$(DEB_BUILD_PROFILES)),) sed -i -e '/WSGI/d' debian/elephant-shed-portal/etc/apache2/sites-available/elephant-shed.conf endif override_dh_link: mkdir -p debian/elephant-shed-tmate/etc/skel ln -s /etc/tmate.conf debian/elephant-shed-tmate/etc/skel/.tmate.conf dh_link -X'tmate.conf' # don't fix /etc/skel link override_dh_gencontrol: dh_gencontrol -- -Vtmate:Depends='$(TMATE_DEPENDS)' $(CREDATIV_KEYRING) ================================================ FILE: debian/source/format ================================================ 3.0 (native) ================================================ FILE: doc/.gitignore ================================================ _build ================================================ FILE: doc/Makefile ================================================ # Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " applehelp to make an Apple Help Book" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " epub3 to make an epub3" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" @echo " coverage to run coverage check of the documentation (if enabled)" @echo " dummy to check syntax errors of document sources" .PHONY: clean clean: rm -rf $(BUILDDIR)/* .PHONY: html html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: dirhtml dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." .PHONY: singlehtml singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." .PHONY: pickle pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." .PHONY: json json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." .PHONY: htmlhelp htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." .PHONY: qthelp qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/elephant-shed.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/elephant-shed.qhc" .PHONY: applehelp applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @echo "N.B. You won't be able to view it unless you put it in" \ "~/Library/Documentation/Help or install it in your application" \ "bundle." .PHONY: devhelp devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/elephant-shed" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/elephant-shed" @echo "# devhelp" .PHONY: epub epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." .PHONY: epub3 epub3: $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 @echo @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." .PHONY: latex latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." .PHONY: latexpdf latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: latexpdfja latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: text text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." .PHONY: man man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." .PHONY: texinfo texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." .PHONY: info info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." .PHONY: gettext gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." .PHONY: changes changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." .PHONY: linkcheck linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." .PHONY: doctest doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." .PHONY: coverage coverage: $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." .PHONY: xml xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." .PHONY: pseudoxml pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." .PHONY: dummy dummy: $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy @echo @echo "Build finished. Dummy builder generates no files." ================================================ FILE: doc/chapter/components.md ================================================ # Components ## PostgreSQL The Elephant Shed is based on `postgresql-common`, the default PostgreSQL management system for Debian based installations. Tasks like creating, dropping or renaming a PostgreSQL instance ("cluster" in PostgreSQL terms) should be done through `postgresql-common`'s command line utilities `pg_createcluster`, `pg_dropcluster` and `pg_renamecluster`. ### Default Configuration Beside the postgresql-common default configuration the `elephant-shed-postgresql` package adds additional parameters for the cluster creation process. Some of these parameters are required by the Elephant Shed components. E.g. pgBadger requires some special `log_line_prefix`. You can find this configuration under `/etc/postgresql-common/createcluster.d/elephant-shed.conf`. Be careful when changing any of these values. ### Cluster Administration To create a new cluster issue the command `pg_createcluster `. Installed clusters and their status can be listed via `pg_lsclusters`. ``` postgres@stretch:~$ pg_lsclusters Ver Cluster Port Status Owner Data directory Log file 9.6 main 5432 online postgres /9.6/main /log/9.6-main.log 9.6 standby1 5433 online,recovery postgres /9.6/standby1 /log/9.6-standby1.log 9.6 standby2 5434 online,recovery postgres /9.6/standby2 /log/9.6-standby2.log 9.6 test 5435 online postgres /9.6/test /log/9.6-test.log ``` To delete a cluster use `pg_dropcluster ` (be careful, this removes all data in the cluster as well!). To start, stop, restart or reload use `pg_ctlcluster ` with the following commands as action: - `start` - `stop` - `restart` - `reload` Alternatively, you can use systemctl (`systemctl postgresql@-`) or the Cockpit web interface. ![Figure: Cockpit Cluster Service](images/cockpit_postgresql_service.png) *Note:* Future versions of The Elephant Shed will include cluster management in the [portal](#portal). ## Portal The portal serves as an entry point to all other web-based interfaces. It uses HTTPS and basic authentication. Each user within the Unix group `elephant-shed` has access to it (see [Users](#users)). The portal also shows the status of all PostgreSQL cluster including links to the Cockpit service (in order to start or stop the cluster), the log files, pgBadger reports and the backup software pgBackRest. A navigation bar at the top allows switching between the different web services. By default only a self signed certificate for HTTPS is deployed. A corresponding security warning is shown in most browsers. You can change the certificate with a signed one (e.g. from your company CA, or from [Let’s Encrypt](https://letsencrypt.org/)). The web services are served by Apache2. It also acts as a reverse proxy to serve all other web interfaces and to enforce authentication. ### PostgreSQL Cluster ![Figure: PostgreSQL Cluster](images/portal_cluster_list.png) This section presents an overview of the existing PostgreSQL clusters and their status. For each cluster, a set of switches shows the current status. By clicking on a cluster an additional menu with buttons opens. Currently all buttons link to the corresponding components where a confirmation is required so no actions are triggered directly, but this may change in the future. ### systemd - Service This links to the configuration of this PostgreSQL cluster in Cockpit. Here it is possible to configure the corresponding service to be enabled or disabled on system start and also trigger actions like start, stop and reload. ### systemd - Log Links to the corresponding log entries in Cockpit if syslog is enabled for this cluster (which is the default for clusters created by Elephant Shed). ### Report - Run By default pgBadger reports for all clusters are generated once every night. With this service it is possible to generate a report for a specific cluster on demand. ### Report - Show Links to the corresponding pgBadger report overview. See [pgBadger](#reporting-pgbadger) for more information. ### Backup This sections provides several functions for backups using pgBackRest. For more information about the backup tool pgBackRest visit [pgBackRest](#backup-pgbackrest). #### Full Link to Cockpit for starting an ad-hoc full backup. #### Incremental Link to Cockpit for staring an ad-hoc incremental backup. #### Info Shows the status of the backups. This button is only shown after the first backup run. Here the available backups and the content of the WAL archive is shown. Additional information can be found here: ### Switches #### Archiving This switch shows if an archive command is set that uses pgBackRest. It is possible to set one or to deactivate the feature by using '/bin/true'. Archiving is needed for point in time recovery but more importantly for pgBackRest backups. Archiving will be activated automatic if a backup is triggered via the portal or timers. To change it manually the service `pgbackrest-toggle-archiving@-.service` can be started. This toggles the state. #### Full Backup Switch to enable or disable a periodical backup. An enabled backup job (systemd timer) is shown by green color. To start/stop the timer `pgbackrest@-.timer` needs to be started or stopped. Enable/disable is used to enable/disable the timer after the next reboot. #### Incr Backup Switch to enable or disable a periodical backup. An enabled backup job (systemd timer) is shown by green color. To start/stop the timer `pgbackrest-incr@-.timer` needs to be started or stopped. Enable/disable is used to enable/disable the timer after the next reboot. ## Service Web Interface - Cockpit Cockpit allows remote management of all systemd related services via HTTPS. It makes starting, stopping or restarting of services as simple as clicking a button. It also shows system logs in real-time. ![Figure: Cockpit Package Updates](images/cockpit_package_updates.png) ## Monitoring - Prometheus Prometheus is a metric based monitoring system for servers and services. It collects metrics from configured targets at given intervals, evaluates rule expressions, displays the results, and can trigger alerts if some condition is observed to be true. ![Figure: Prometheus graphing the load](images/prometheus-load.png) ![Figure: Prometheus targets](images/prometheus-targets.png) ### Services In this setup the Prometheus stack consists of different components controlled by systemd. The following units are deployed. #### prometheus.service Monitoring system and time series database - This is the monitoring service itself. It actively pulls the metrics from the different sources. It also provides internal metrics and a web interface which is accessible via the portal. Configuration files: * `/etc/prometheus/elephant-shed-prometheus.yml` * `/etc/default/elephant-shed-prometheus` #### prometheus-node-exporter.service Prometheus exporter for machine metrics - This service exports the system metrics and listens on port 9100. These metrics are collected every 30 seconds by default. Configuration files: * `/etc/default/elephant-shed-prometheus-node-exporter` #### prometheus-sql-exporter.service Prometheus exporter for SQL metrics - This service collects the PostgreSQL specific metrics and listens on port 9237. The metrics are retrieved by querying the database. In order to not to generate additional load the metrics are collected only every 60 seconds. **WARNING: It is not advisable to set the monitoring interval for the `prometheus-sql-exporter` lower than 60 Seconds. This could interference with normal applications and has a high impact on the PostgreSQL cluster.** The `prometheus-sql-exporter.service` starts one connection to each database on startup and keeps this connection open. At the beginning of each connection the `prometheus-sql-exporter.service` checks if the extension `pg_stat_statements` is present. If not, the service issues the statement `CREATE EXTENSION pg_stat_statements`. Configuration files: * `/etc/prometheus-sql-exporter.yml` #### update-prometheus-sql-exporter-config.timer This timer triggers the `update-prometheus-sql-exporter-config.service` periodically which generates a new configuration for the `prometheus-sql-exporter` every 10 minutes. This makes sure that every new database cluster and every new database is included in the monitoring automatically. It's possible to call the update-prometheus-sql-exporter-config.service manually to generate a new configuration directly. Configuration template file: * `prometheus-sql-exporter.yml.in` Configuration file (runtime): * `prometheus-sql-exporter.yml` ### Additional Resources * * ## Dashboard - Grafana Grafana is a tool to create graphs and dashboards from a variety of different data sources. A PostgreSQL Server Overview dashboards is included in the default installation to get an overview over the most needed and many helpful metrics to manage and debug PostgreSQL servers. These pre-deployed dashboards are shipped via the `elephant-shed-graphana` Debian package and can change in the future. They are read only and need to be saved under a new name if you do any changes. ### PostgreSQL Server Overview ![Figure: Grafana - PostgreSQL Server Overview](images/grafana-overview.png) This dashboard starts with a summary section with simple gauges to provide a overview of the whole system. These gauges may indicate current problems or give a hint on problems that might occur in the future. After the gauges in-depth metrics are shown as graphs. ### Server metrics Server metrics include e.g.: CPU usage (by type / by core), disk usage, disk utilization, network throughput, and much more. The following template filter are configured: - `Disk`: filter one or more disks - `Interface`: filter on or more interface - `Filesystem`: filter on or more filesystem / mountpoint ### Cluster metrics The next section contains PostgreSQL Cluster wide metrics like connections (by type / by database), number of transactions, database growth and more. Only one cluster is shown by a time. To switch the current shown cluster use the template filter `PostgreSQL Cluster`. ### Database metrics Database level metrics are shown at the end of *PostgreSQL Overview* dashboard. By default metrics for all databases of the current selected PostgreSQL cluster are shown. To filter one or more databases the template filter `Database` could be used. ### Additional Resources * * ## DBA Tool - OmniDB ![Figure: OmniDB](images/omnidb.png) OmniDB is a management tool for PostgreSQL to help DBAs execute many different tasks. It provides user management, DDL functionality, an interactive SQL shell, and more. ### Additional Resources * ## PoWA - PostgreSQL Workload Analyzer The Elephant Shed includes integration with PoWA, a service that collects database metrics, most notably about running queries. There is some overlap with Prometheus and Grafana, but the PoWA statistics are much more detailed on the database level. ### Setup The PoWA web interface maintains a list of known clusters in the system in `/etc/powa-web.conf`. To update that list, use: ``` update-powa-web-config ``` To enable PoWA monitoring for a cluster, run `es_ctlcluster`: ``` es_ctlcluster main enable-powa ``` In case of problems, the web interface will throw "Auth failed" errors for a wide range of possible errors. Consulting the PostgreSQL server log in `/var/log/postgresql/postgresql-NN-main.log` is often a good place to start debugging. ## Backup - pgBackRest The Elephant Shed comes with a preinstalled backup solution, *pgBackRest*. Each PostgreSQL instance can be backed up individually by issuing the command `systemctl start pgbackrest@-` or initiating a backup via Cockpit in the web interface. A shortcut is listed for each instance. Configuration entries for each cluster are created with the first backup run. By default only `db-path` and `db-port` are set. A list of all backups can be obtained by clicking on the pgBackRest icon on the portal site. ![Figure: pgBackRest Backup via Cockpit](images/el-backrest-start.png) pgBackRest knows 3 types of backups full, incremental and differential. We are using full and differential by default. A service file for differential backups is not installed by default. ### Full Backup Full backups represent an complete backup of the database at a given point in time. A backup consists of two parts, the backup itself, stored in `backup` and the WAL files which were written during the backup, stored in `archive`. To ensure these WAL files are in the archive we automatically enable WAL archiving before the first backup is created. **Warning**: If archiving is enabled all WAL files newer than the oldest stored backups are kept as well. This can consume a lot space in the backup location if backups are kept for a long time and archiving is not disabled. By deleting a backup the no longer needed WAL files are removed as well. ### Incremental Backups Incremental backups represent the changed data between a previous full backup and the current data at a given point. Incremental backups can be significant smaller than full backups but depend on a specific previous full backup. Without this full backup they can not be restored. ### Retention To clean up space old backups needs to be deleted. pgBackRest needs to know how many full backups to keep. If the number is reached all additional backups will be deleted starting with the oldest. If a full backup is deleted all incremental backups depending on it will be deleted as well. This is necessary because an incremental backup can not be restored without the matching full backup. ### Configuration The configuration file can be found in `/etc/pgbackrest.conf`. ``` [global] repo-path=/var/lib/pgbackrest [9.6-main] db-path=/var/lib/postgresql/9.6/main db-port=5432 [9.6-test] db-port=5433 db-path=/var/lib/postgresql/9.6/test ``` The global part sets the default configuration for every existing and future database cluster. For each single cluster theses defaults can be changed. Some basic options will be explained here. Please see the documentation for a full overview. If the server is setup using ansible, additionally the following `[global]` parameters are set: ``` [global] retention-full=4 compress-level=6 spool-path=/mnt/backup/pgbackrest_spool archive-async=y archive-queue-max=1099511627776 repo-path=/mnt/backup/pgbackrest ... ``` #### retention-full This option defines how many full backups should be kept. **Danger: If more full backups are stored than `retention-full` pgBackRest will delete the oldest backups to keep exactly `retention-full` full backups!** #### compress-level The gzip compression level to use (6 is the default value). #### archive-async Enables asynchronous archiving of WAL files which allows a higher archiving throughput. #### spool-path Where to keep additional information for asynchronous archiving (status directory). #### archive-queue-max How many WAL segments to keep before throwing segments away. *Note: We configure a value of 1TB to ensure pgbackrest never throws WAL segments away by default* #### repo-path This sets the main directory where backups and WAL files are stored in. It can be set to any desired mount point so backups to remote servers are easily possible. ### Backup For each cluster there is a systemd service which does a full or incremental backup. * pgbackrest@\-\ * pgbackrest-incr@\-\ To create an ad-hoc backup the corresponding service can be started. `systemctl start pgbackrest@9.6-main` would create a full backup of the cluster `9.6-main`. If no previous full backup is available `pgbackrest-incr@` will also create a full backup. ### Automation To automate the creation of backups and the retention policy enforcing there are two systemd timers per cluster. * pgbackrest@-.timer * pgbackrest-incr@-.timer `pgbackrest@-.timer` triggers full backups and `pgbackrest-incr@-.timer` triggers incremental backups. These timers are created for every cluster and are initialized with a default timing. The timers can be enabled independently for every database cluster either via systemd or the web portal. To fully enable a timed backup the `timer` must be *started* **and** *enabled*. If the `timer` is *started* but not *enabled* systemd will not start it after the next reboot. Keep in mind that enabling only the incremental backup is only reasonable for shorter periods of time, special scenarios like not changing databases, or if the full backups are triggered in another way. To keep storage and restore time at an reasonable level periodic full backups are needed. #### pgbackrest@.timer ``` # /lib/systemd/system/pgbackrest@.timer [Unit] Description=Automated pgBackRest full backup of PostgreSQL cluster %i [Timer] OnCalendar=Sun *-*-* 01:00:00 RandomizedDelaySec=2h [Install] WantedBy=multi-user.target ``` This timer triggers a full backup every Sunday in the early morning 01:00 or randomly up to 2 hours later. The random delay set by `RandomizedDelaySec=2h` is set so systemd can schedule many timers over a given time range. Here it is done so that not all backups for all clusters start at the same time blocking the I/O. #### pgbackrest-incr@.timer ``` # /lib/systemd/system/pgbackrest-incr@.timer [Unit] Description=Automated pgBackRest incremental backup of PostgreSQL cluster %i [Timer] OnCalendar=Tue,Thu *-*-* 01:00:00 RandomizedDelaySec=2h [Install] WantedBy=multi-user.target ``` This timer triggers an incremental backup every Tuesday and Thursday in the early morning 01:00 or randomly up to 2 hours later. #### WAL Archiving WAL archiving is disabled by default for new PostgreSQL clusters. It can be activated using the portal (see [portal](#portal)) or by starting `pgbackrest-toggle-archving.service`. The service toggles archiving mode to on or off, depending on the former state. *Note:* If archiving is disabled and a full or incremental backup is started (manual or via timer), archiving is automatically enabled. This step is required to ensure all WAL files need for a restore are archived beside the basebackup. **Archiving is _not_ disabled after the backup run.** ### Restore **Restore is an invasive action that can destroy data if not executed properly!** To restore a backup there are two main methods full and delta. #### Full Restore A full restore restores a given backup (by default the latest) to the given (default) destination. The restore command expects the target directory to be empty. This can be used to setup a cluster on a new machine, small clusters or if most of the remaining data is incorrect. Steps to full restore. 1. Stop the cluster (if still running) 2. Delete or move all remaining data 3. Restore full content from backup *All steps should be run as user `postgres`*. ``` # 1. Stop the cluster pg_ctlcluster stop # 2. Delete or move all remaining data mv /var/lib/postgresql// /var/somewhere-save mkdir /var/lib/postgresql// # 3. Restore full content from backup pgbackrest --stanza=- restore ``` After this the cluster can be started again. If there is enough storage available it should be preferred to move the data to a save place instead of deletion. #### Delta Restore A delta restore does not need a clean target and it only copies files that differ from backup. This approach can be much faster especially if most of the underling files did not change since the last backup. **This has the potential to destroy data!** Because this works on the cluster data it is possible to cause damage. Data that is not in the backup / WAL archive but in the current cluster will be lost! Steps to perform a delta restore. 1. Stop the cluster (if still running) 2. Restore delta content from backup ``` # 1. Stop the cluster pg_ctlcluster stop # 2. Restore full content from backup pgbackrest --stanza=- --delta restore ``` After this the cluster can be started again. #### Point in Time Recovery The shown backups methods do a full restore. This means a all basebackup files and copied back from the archive and all WAL files are applied. If another recovery target should be restored `--type` and `--target` must be specified. Most of the time one would like to restore a database to a given point in time (e.g. '2017-08-24 12:00:00'). This would require the switch `--type=time` and `--target='2017-08-24 12:00:00'`. ``` pgbackrest --stanza=- --type=time --target="" restore ``` ### Additional Resources * * * ## Reporting - pgBadger A pgBadger service is created for each PostgreSQL instance. Those services are autogenerated and updated each time a new cluster is created or dropped (systemd-generators). A pgBadger systemd timer ensures reports are updated on a regular basis. By default this is every day at 23:00. Each pgBadger service parses the PostgreSQL log file of the corresponding PostgreSQL instance. Generated reports are saved within `/var/lib/pgbadger/-` (e.g. `/var/lib/pgbadger/9.6-main/`). All reports are accessible in the web interface. An calendar provides access to daily and weekly reports. A manual update of those reports can be triggered either using the corresponding service (e.g. `pgbadger@9.6-main.service`) or using the [portal](#portal). A update of *all* reports could be triggered using the parent service `pgbadger.service`. *Note:* Changing postgresql.conf settings like `log_line_prefix` or `lc_messages` can lead to pgBadger reports not getting updated anymore. ![Figure: pgBadger overview](images/pgbadger-overview.png) ## Web Terminal - Shell In A Box Shell In A Box is a convenient web based terminal. It can be used like a normal console connection. Explicit login and authentication is required. To change settings (e.g. the color theme) just right click anywhere on the terminal window. ### Additional Resources * ## Remote Control - tmate tmate is a fork of the popular terminal multiplexer tmux. It is used to provide remote support if needed. It is preconfigured to connect to a relay server (`tmate.credativ.com`) and enables the user to share the current terminal with a third party by sending an SSH command including a secret token. There are two modes of operation, read-write and read-only. This enables the user to give a third party temporary access to the current terminal. The user can always watch the terminal and audit the actions taken by the third party. * tmate is not running by default, it needs to be started explicitly * When the initiating shell is closed, the connection is closed as well * The backend to use is fully configurable (in `/etc/tmate.conf`) and preset to `tmate.credativ.com` * tmate is included as a technical preview to evaluate the potential ### Usage Start tmate (opens a new terminal) ``` tmate ``` Show the credentials which need to be given to a third party (securely) ``` tmate show-messages ``` ![Figure: tmate with multiple panes](images/tmate.png) For further usage see the following additional resources regarding tmux. ### Additional Resources * * ## Configuration Revision - etckeeper etckeeper is a set of tools and hooks to keep all configuration in `/etc` in a git repository. Commits can be done manually or will happen automatically via time or by package manager hooks. Configuration changes can be seen and compared to previous versions. If necessary previous settings can be restored. #### Additional Resources * * ================================================ FILE: doc/chapter/first-steps.md ================================================ # First Steps Log into your web browser and go to the server's IP address (e.g. ). The default setup will redirect HTTP requests to HTTPS. The Elephant Shed portal provides information about running PostgreSQL instances and their status. Moreover you get access to all other installed components. The server will ask you for your user credentials. Depending on the deployment process the required user will differ. On a test installation (e.g. using Vagrant) the initial user is **admin** with password **admin**. See also: [Users](users.md). All bundled components except for OmniDB have been configured to use PAM authentication. OmniDB doesn't support PAM authentication yet. It has its own user management system which is decoupled from all system users. The default initial user is **admin** with **admin** as password. ![Figure: Elephant Shed portal](images/el-portal.png) On a new installation you will find one cluster running the current PostgreSQL major version with the name `main`. The configuration for clusters can be found in `/etc/postgresql///`. To use PostgreSQL from external application servers only a few steps are needed. 1. Open a shell connection to the server using SSH or shellinabox . 2. Switch to user `postgres` and launch psql: * `sudo -u postgres psql` 3. Create a database and corresponding application user, options: * `psql: CREATE ROLE appuser1 WITH LOGIN PASSWORD 'testpass';` * `psql: CREATE DATABASE appdb1 OWNER appuser1;` 4. Allow external access for your application servers, your network or everyone. Configuration file: `/etc/postgresql///pg_hba.conf` 5. (optional) Make desired configuration changes and tuning. `/etc/postgresql///postgresql.conf` 6. Reload the configuration, options: * Portal: Click on the button `Service` next to the cluster and choose "Reload" from the dropdown menu * `psql`: `SELECT pg_reload_conf();` 7. (optional) Configure a superuser to be able to use OmniDB or other management tools * Create password for user postgres: `\password` * Create personalized superusers: `CREATE USER "sosna" SUPERUSER;`, `\password "sosna"` ================================================ FILE: doc/chapter/installation.md ================================================ # Installation Elephant Shed consists of the following Debian packages and their dependencies: * `elephant-shed`: Metapackage that includes the following packages. * `elephant-shed-prometheus`: Configuration files and helper scripts for Prometheus and its exporters. * `elephant-shed-cockpit`: Configuration files for cockpit and cockpit-ws. * `elephant-shed-grafana`: Preconfigured Prometheus datasource and dashboard that includes various system and PostgreSQL metrics. * `elephant-shed-omnidb`: Configuration files for OmniDB. * `elephant-shed-pgbackrest`: Systemd service files and generators, helper scripts and preset configuration. * `elephant-shed-pgbadger`: Systemd service files, generators and helper scripts. * `elephant-shed-portal`: Elephant Shed web portal including Apache configuration. * `elephant-shed-postgresql`: Additional preset configuration files for PostgreSQL. * `elephant-shed-shellinabox`: Shell In A Box configuration files. * `elephant-shed-tmate`: Preconfigured tmate installation for easier support. ## Package Installation Prebuilt packages are available from . The repository also contains packages that the `elephant-shed` packages depend on. This includes packages for Grafana, Cockpit and various python libraries. ### Installation on Debian and Ubuntu ``` # Install tools sudo apt-get install curl ca-certificates apt-transport-https # Use official PostgreSQL repo, apt.postgresql.org echo "deb http://apt.postgresql.org/pub/repos/apt/ buster-pgdg main" | sudo tee -a /etc/apt/sources.list.d/pgdg.list curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - # Use credativ repo, packages.credativ.com echo "deb https://packages.credativ.com/public/postgresql/ buster-stable main" | sudo tee -a /etc/apt/sources.list.d/elephant-shed.list curl https://packages.credativ.com/public/postgresql/aptly.key | sudo apt-key add - # Install elephant-shed sudo apt-get update sudo apt-get install elephant-shed # In case of dependency issues regarding omnidb on buster, add backports echo "deb http://deb.debian.org/debian buster-backports main" | sudo tee -a /etc/apt/sources.list.d/backports.list sudo apt-get update sudo apt-get install -t buster-backports python3-django sudo apt-get install elephant-shed # Choose desired PostgreSQL versions to install sudo apt-get install postgresql-10 # Every user in the group "elephant-shed" is allowed to login at the portal # Add all needed users to this group sudo adduser elephant-shed ``` ### Installation on RedHat and CentOS Elephant Shed works with the PostgreSQL packages from the PostgreSQL RPM building project. Go to yum.postgresql.org and install the repository RPMs for the PostgreSQL versions you want to use. Then proceed to install Elephant Shed as below. ```bash # Use credativ repository (will also pull in EPEL) sudo yum install https://packages.credativ.com/public/postgresql/yum/credativ-repo.rpm # On RedHat, activate additional repositories (not on CentOS) subscription-manager repos --enable=rhel-7-server-extras-rpms subscription-manager repos --enable=rhel-7-server-optional-rpms # Choose desired PostgreSQL versions to install sudo yum install postgresql13-server postgresql13-contrib postgresql-common sudo pg_createcluster 13 main --start # Install elephant-shed sudo yum install elephant-shed # Every user in the group "elephant-shed" is allowed to login at the portal # Add all needed users to this group sudo vigr # Unfortunately, shellinabox does not work with SELinux enabled # Disable SELinux if you want to use this component sudo setenforce 0 sudo sed -i -e 's/^SELINUX=.*/SELINUX=permissive/' /etc/selinux/config ``` ## Installation from source The source code is available on GitHub: ### Build Debian Packages All Elephant Shed Debian packages can be built using the command `make deb`. Requirements: - `dpkg-dev` - `devscripts` ### Build Documentation To build the documentation in HTML format type `make docs`. Requirements: - `sphinx` ### Create Testsystem with Vagrant The `make vagrant` command builds all components, creates a new virtual machine using Vagrant and deploys the software using Ansible. This can also be used to redeploy a already running machine. The Vagrant configuration is located in `vagrant/Vagrantfile`. Requirements: - `vagrant` - `virtualbox` or `libvirt` - `ansible` ### Deploy on remote machine To deploy the software on any machine, enter the connection information in the inventory `vagrant/inventory`. The deployment can than be started with the following command `make ansible`. Requirements: - `ansible` ================================================ FILE: doc/chapter/intro.md ================================================ # Intro Elephant Shed is a web-based PostgreSQL management front-end that bundles several utilities and applications for use with PostgreSQL. It currently manages single-node Debian/Ubuntu PostgreSQL servers and appliances. The main components are: * PostgreSQL - * OmniDB - * postgresql-common - * pgBadger - * pgBackRest - * Grafana - * Prometheus - * Cockpit - * Shell In A Box - In addition several other tools are included for configuration management and setup. The number of components bundled and tasks handled add some overhead compared to running just the database server. It is therefore only recommended for adequately sized systems. This document describes the current version. Updated versions of this document will be shipped with the elephant-shed packages and can be found in `/usr/share/doc/elephant-shed-portal` (`/usr/share/doc/elephant-shed*`) and in the web portal under . ================================================ FILE: doc/chapter/issues.md ================================================ # Known Bugs and Issues ## PostgreSQL * The `prometheus-sql-exporter` monitoring agent is permanently keeping connections open to all databases, which prevents `DROP DATABASE` from working. To drop databases, stop `prometheus-sql-exporter` first. This is possible via the web interface Cockpit: [services#/prometheus-sql-exporter.service](/system/services#/prometheus-sql-exporter.service). ## OmniDB * OmniDB does not use PAM authentication. ## Portal * A direct relogin after a logout does not work. Reloading the page is necessary. ## RedHat / CentOS * shellinabox does not work when SELinux is enabled. ================================================ FILE: doc/chapter/license.md ================================================ # License The Elephant Shed itself is licensed under the GPLv3 (). All bundled components are Free/Open-Source software with a known and approved open source license. ================================================ FILE: doc/chapter/support.md ================================================ # Support and more ## Do you have any question or want to know more? * **Project page** [elephant-shed.io](https://elephant-shed.io) * **Git** [github.com/credativ/elephant-shed](https://github.com/credativ/elephant-shed/) * **Web-Chat** [#elephant-shed](https://webchat.oftc.net/?nick=web-user-.&channels=elephant-shed&uio=MT11bmRlZmluZWQmMj10cnVlJjk9dHJ1ZSYxMT0yMzY31) * **IRC** [#elephant-shed](https://webchat.oftc.net/?channels=elephant-shed&uio=MT11bmRlZmluZWQmMj10cnVlJjk9dHJ1ZSYxMT0yMzY31 ) on [irc.oftc.net](https://www.oftc.net/) ## Do you need professional support or additional services? Elephant Shed is an open source project, developed and maintained by credativ. For the Elephant Shed PostgreSQL appliance, credativ offers comprehensive technical support with service level agreements, which are also available on 365 days a year and 24 hours a day as an option. Installation and integration support, as well as an introduction in Elephant Shed PostgreSQL appliance is of course also part of credativ's services. If you are interested, please feel free to contact us. ![](images/logo_credativ_96.png) * **Web** [credativ.de](https://credativ.de) * **E-Mail:** [info@credativ.de](mailto:info@credativ.de) * **Phone:** [+49 2161 9174200](tel:+4921619174200) ================================================ FILE: doc/chapter/users.md ================================================ # Users The web interface is password protected (HTTP basic authentication) and uses the system users via PAM. When deployed via Ansible, the initial user is **admin** with password **admin**. This user works for web access as well as for SSH and PostgreSQL. To create new users, use `adduser`, and add the user to the **elephant-shed** group. ``` adduser myon adduser myon elephant-shed ``` On RedHat/CentOS, use `vigr` to add users to the **elephant-shed** group. ================================================ FILE: doc/conf.py ================================================ # -*- coding: utf-8 -*- # # elephant-shed documentation build configuration file, created by # sphinx-quickstart on Fri Dec 15 14:52:01 2017. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('.')) import recommonmark from recommonmark.parser import CommonMarkParser from recommonmark.transform import AutoStructify # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'recommonmark', 'sphinx.ext.todo', 'sphinx.ext.githubpages', ] source_parsers = { '.md': CommonMarkParser } # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = ['.rst', '.md'] # The encoding of source files. # # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'elephant-shed' copyright = u'2017-2022, credativ' author = u'credativ' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = u'1.3.10' # The full version, including alpha/beta/rc tags. release = u'1.3.10' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # # today = '' # # Else, today_fmt is used as the format for a strftime call. # # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The reST default role (used for this markup: `text`) to use for all # documents. # # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { } # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. # " v documentation" by default. # # html_title = u'elephant-shed v1.0' # A shorter title for the navigation bar. Default is the same as html_title. # # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # # html_logo = None # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # # html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. # # html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # # html_additional_pages = {} # If false, no module index is generated. # # html_domain_indices = True # If false, no index is generated. # # html_use_index = True # If true, the index is split into individual pages for each letter. # # html_split_index = False # If true, links to the reST sources are added to the pages. # # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' # # html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. # # html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. # # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = 'elephant-sheddoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'elephant-shed.tex', u'elephant-shed Documentation', u'credativ', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # # latex_use_parts = False # If true, show page references after internal links. # # latex_show_pagerefs = False # If true, show URL addresses after external links. # # latex_show_urls = False # Documents to append as an appendix to all manuals. # # latex_appendices = [] # It false, will not define \strong, \code, itleref, \crossref ... but only # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added # packages. # # latex_keep_old_macro_names = True # If false, no module index is generated. # # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'elephant-shed', u'elephant-shed Documentation', [author], 1) ] # If true, show URL addresses after external links. # # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'elephant-shed', u'elephant-shed Documentation', author, 'elephant-shed', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. # # texinfo_appendices = [] # If false, no module index is generated. # # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # # texinfo_no_detailmenu = False # -- Options for Epub output ---------------------------------------------- # Bibliographic Dublin Core info. epub_title = project epub_author = author epub_publisher = author epub_copyright = copyright # The basename for the epub file. It defaults to the project name. # epub_basename = project # The HTML theme for the epub output. Since the default themes are not # optimized for small screen space, using the same theme for HTML and epub # output is usually not wise. This defaults to 'epub', a theme designed to save # visual space. # # epub_theme = 'epub' # The language of the text. It defaults to the language option # or 'en' if the language is not set. # # epub_language = '' # The scheme of the identifier. Typical schemes are ISBN or URL. # epub_scheme = '' # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A tuple containing the cover image and cover page html template filenames. # # epub_cover = () # A sequence of (type, uri, title) tuples for the guide element of content.opf. # # epub_guide = () # HTML files that should be inserted before the pages created by sphinx. # The format is a list of tuples containing the path and title. # # epub_pre_files = [] # HTML files that should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. # # epub_post_files = [] # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # The depth of the table of contents in toc.ncx. # # epub_tocdepth = 3 # Allow duplicate toc entries. # # epub_tocdup = True # Choose between 'default' and 'includehidden'. # # epub_tocscope = 'default' # Fix unsupported image types using the Pillow. # # epub_fix_images = False # Scale large images. # # epub_max_image_width = 0 # How to display URL addresses: 'footnote', 'no', or 'inline'. # # epub_show_urls = 'inline' # If false, no index is generated. # # epub_use_index = True ================================================ FILE: doc/index.rst ================================================ .. elephant-shed documentation master file, created by sphinx-quickstart on Fri Dec 15 14:52:01 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Elephant Shed - Documentation ============================= .. toctree:: :maxdepth: 2 :numbered: chapter/intro chapter/installation chapter/first-steps chapter/components chapter/users chapter/issues chapter/license chapter/support ================================================ FILE: grafana/README ================================================ Prometheus queries ------------------ Ordering: `job=~'$job', host=~'$host', sql_job=~'$cluster'`, then other fields Cluster selection: `sql_job=~'$cluster'` on "global" queries, `sql_job=~'$cluster.*'` on "database" queries ================================================ FILE: grafana/dashboard-sed ================================================ #!/bin/sh # To use, select Share > Export > Save to file in Grafana, and do: # grafana/dashboard-sed ~/Desktop/PostgreSQL\ Server\ Overview-* > grafana/postgresql_server_overview.json ; rm -f ~/Desktop/PostgreSQL\ Server\ Overview* # replace DS_PROMETHEUS by prometheus # replace "Overview Copy" by "Overview" sed -e 's/${DS_PROMETHEUS}/prometheus/' \ -e 's/"title": "PostgreSQL Server Overview.*/"title": "PostgreSQL Server Overview",/' \ "$@" ================================================ FILE: grafana/datasource_prometheus.yml ================================================ --- # config file version apiVersion: 1 # list of datasources to insert/update depending # whats available in the database datasources: # name of the datasource. Required - name: prometheus # datasource type. Required type: prometheus # access mode. direct or proxy. Required access: proxy # org id. will default to orgId 1 if not specified orgId: 1 # url url: http://127.0.0.1:9090/prometheus # database password, if used password: # database user, if used user: # database name, if used database: # enable/disable basic auth basicAuth: false # basic auth username basicAuthUser: # basic auth password basicAuthPassword: # enable/disable with credentials headers withCredentials: false # mark as default datasource. Max one per org isDefault: true # fields that will be converted to json and stored in json_data #jsonData: # graphiteVersion: "1.1" # tlsAuth: true # tlsAuthWithCACert: true # json object of data that will be encrypted. #secureJsonData: # tlsCACert: "..." # tlsClientCert: "..." # tlsClientKey: "..." version: 1 # allow users to edit datasources from the UI. editable: false ================================================ FILE: grafana/elephant-shed-grafana ================================================ # Set go garbage collector to clean up more frequently GOGC=40 GOMAXPROCS=4 CONF_FILE=/etc/grafana/elephant-shed-grafana.ini ================================================ FILE: grafana/elephant-shed-grafana.conf ================================================ [Service] EnvironmentFile=/etc/default/elephant-shed-grafana ================================================ FILE: grafana/elephant-shed-grafana.ini ================================================ ##################### Grafana Configuration Example ##################### # # Everything has defaults so you only need to uncomment things you want to # change # possible values : production, development ; app_mode = production # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty ; instance_name = ${HOSTNAME} #################################### Paths #################################### [paths] # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) # ;data = /var/lib/grafana # # Directory where grafana can store logs # ;logs = /var/log/grafana # # Directory where grafana will automatically scan and look for plugins # ;plugins = /var/lib/grafana/plugins # #################################### Server #################################### [server] # Protocol (http, https, socket) protocol = http domain = localhost root_url = %(protocol)s://%(domain)s:/grafana # The ip address to bind to, empty will bind to all interfaces http_addr = 127.0.0.1 # The http port to use http_port = 3000 # The public facing domain name used to access grafana from a browser ;domain = localhost # Redirect to correct domain if host header does not match domain # Prevents DNS rebinding attacks ;enforce_domain = false # The full public facing url you use in browser, used for redirects and emails # If you use reverse proxy and sub path specify full url (with sub path) ;root_url = http://localhost:3000 # Log web requests ;router_logging = false # the path relative working path ;static_root_path = public # enable gzip ;enable_gzip = false # https certs & key file ;cert_file = ;cert_key = # Unix socket path ;socket = #################################### Database #################################### [database] # You can configure the database connection by specifying type, host, name, user and password # as separate properties or as on string using the url propertie. # Either "mysql", "postgres" or "sqlite3", it's your choice ;type = sqlite3 ;host = 127.0.0.1:3306 ;name = grafana ;user = root # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" ;password = # Use either URL or the previous fields to configure the database # Example: mysql://user:secret@host:port/database ;url = # For "postgres" only, either "disable", "require" or "verify-full" ;ssl_mode = disable # For "sqlite3" only, path relative to data_path setting ;path = grafana.db # Max conn setting default is 0 (mean not set) ;max_idle_conn = ;max_open_conn = #################################### Session #################################### [session] # Either "memory", "file", "redis", "mysql", "postgres", default is "file" ;provider = file # Provider config options # memory: not have any config yet # file: session dir path, is relative to grafana data_path # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` # mysql: go-sql-driver/mysql dsn config string, e.g. `user:password@tcp(127.0.0.1:3306)/database_name` # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable ;provider_config = sessions # Session cookie name ;cookie_name = grafana_sess # If you use session in https only, default is false ;cookie_secure = false # Session life time, default is 86400 ;session_life_time = 86400 #################################### Data proxy ########################### [dataproxy] # This enables data proxy logging, default is false ;logging = false #################################### Analytics #################################### [analytics] # Server reporting, sends usage counters to stats.grafana.org every 24 hours. # No ip addresses are being tracked, only simple counters to track # running instances, dashboard and error counts. It is very helpful to us. # Change this option to false to disable reporting. ;reporting_enabled = true # Set to false to disable all checks to https://grafana.net # for new vesions (grafana itself and plugins), check is used # in some UI views to notify that grafana or plugin update exists # This option does not cause any auto updates, nor send any information # only a GET request to http://grafana.com to get latest versions ;check_for_updates = true # Google Analytics universal tracking code, only enabled if you specify an id here ;google_analytics_ua_id = #################################### Security #################################### [security] # default admin user, created on startup admin_user = admin # default admin password, can be changed before first start of grafana, or in profile settings admin_password = admin # used for signing ;secret_key = SW2YcwTIb9zpOOhoPsMm # Auto-login remember days ;login_remember_days = 7 ;cookie_username = grafana_user ;cookie_remember_name = grafana_remember # disable gravatar profile images ;disable_gravatar = false # data source proxy whitelist (ip_or_domain:port separated by spaces) ;data_source_proxy_whitelist = [snapshots] # snapshot sharing options ;external_enabled = true ;external_snapshot_url = https://snapshots-origin.raintank.io ;external_snapshot_name = Publish to snapshot.raintank.io # remove expired snapshot ;snapshot_remove_expired = true # remove snapshots after 90 days ;snapshot_TTL_days = 90 #################################### Users #################################### [users] # disable user signup / registration allow_sign_up = true # Allow non admin users to create organizations ;allow_org_create = true # Set to true to automatically assign new users to the default organization (id 1) auto_assign_org = true # Default role new users will be automatically assigned (if disabled above is set to true) auto_assign_org_role = Editor # Background text for the user field on the login page ;login_hint = email or username # Default UI theme ("dark" or "light") ;default_theme = dark [auth] # Set to true to disable (hide) the login form, useful if you use OAuth, defaults to false ;disable_login_form = false # Set to true to disable the signout link in the side menu. useful if you use auth.proxy, defaults to false ;disable_signout_menu = false #################################### Anonymous Auth ########################## [auth.anonymous] # enable anonymous access ;enabled = false # specify organization name that should be used for unauthenticated users ;org_name = Main Org. # specify role for unauthenticated users ;org_role = Viewer #################################### Github Auth ########################## [auth.github] ;enabled = false ;allow_sign_up = true ;client_id = some_id ;client_secret = some_secret ;scopes = user:email,read:org ;auth_url = https://github.com/login/oauth/authorize ;token_url = https://github.com/login/oauth/access_token ;api_url = https://api.github.com/user ;team_ids = ;allowed_organizations = #################################### Google Auth ########################## [auth.google] ;enabled = false ;allow_sign_up = true ;client_id = some_client_id ;client_secret = some_client_secret ;scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email ;auth_url = https://accounts.google.com/o/oauth2/auth ;token_url = https://accounts.google.com/o/oauth2/token ;api_url = https://www.googleapis.com/oauth2/v1/userinfo ;allowed_domains = #################################### Generic OAuth ########################## [auth.generic_oauth] ;enabled = false ;name = OAuth ;allow_sign_up = true ;client_id = some_id ;client_secret = some_secret ;scopes = user:email,read:org ;auth_url = https://foo.bar/login/oauth/authorize ;token_url = https://foo.bar/login/oauth/access_token ;api_url = https://foo.bar/user ;team_ids = ;allowed_organizations = #################################### Grafana.com Auth #################### [auth.grafana_com] ;enabled = false ;allow_sign_up = true ;client_id = some_id ;client_secret = some_secret ;scopes = user:email ;allowed_organizations = #################################### Auth Proxy ########################## [auth.proxy] enabled = true header_name = X-WEBAUTH-USER header_property = username auto_sign_up = true ;ldap_sync_ttl = 60 ;whitelist = 192.168.1.1, 192.168.2.1 #################################### Basic Auth ########################## [auth.basic] enabled = true #################################### Auth LDAP ########################## [auth.ldap] ;enabled = false ;config_file = /etc/grafana/ldap.toml ;allow_sign_up = true #################################### SMTP / Emailing ########################## [smtp] ;enabled = false ;host = localhost:25 ;user = # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" ;password = ;cert_file = ;key_file = ;skip_verify = false ;from_address = admin@grafana.localhost ;from_name = Grafana [emails] ;welcome_email_on_sign_up = false #################################### Logging ########################## [log] # Either "console", "file", "syslog". Default is console and file # Use space to separate multiple modes, e.g. "console file" ;mode = console file # Either "trace", "debug", "info", "warn", "error", "critical", default is "info" ;level = info # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug ;filters = # For "console" mode only [log.console] ;level = # log line format, valid options are text, console and json ;format = console # For "file" mode only [log.file] ;level = # log line format, valid options are text, console and json ;format = text # This enables automated log rotate(switch of following options), default is true ;log_rotate = true # Max line number of single file, default is 1000000 ;max_lines = 1000000 # Max size shift of single file, default is 28 means 1 << 28, 256MB ;max_size_shift = 28 # Segment log daily, default is true ;daily_rotate = true # Expired days of log file(delete after max days), default is 7 ;max_days = 7 [log.syslog] ;level = # log line format, valid options are text, console and json ;format = text # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. ;network = ;address = # Syslog facility. user, daemon and local0 through local7 are valid. ;facility = # Syslog tag. By default, the process' argv[0] is used. ;tag = #################################### AMQP Event Publisher ########################## [event_publisher] ;enabled = false ;rabbitmq_url = amqp://localhost/ ;exchange = grafana_events ;#################################### Dashboard JSON files ########################## [dashboards.json] enabled = true path = /usr/share/elephant-shed/grafana/dashboards #################################### Alerting ############################ [alerting] # Disable alerting engine & UI features ;enabled = true # Makes it possible to turn off alert rule execution but alerting UI is visible ;execute_alerts = true #################################### Internal Grafana Metrics ########################## # Metrics available at HTTP API Url /api/metrics [metrics] # Disable / Enable internal metrics ;enabled = true # Publish interval ;interval_seconds = 10 # Send internal metrics to Graphite [metrics.graphite] # Enable by setting the address setting (ex localhost:2003) ;address = ;prefix = prod.grafana.%(instance_name)s. #################################### Grafana.com integration ########################## # Url used to import dashboards directly from Grafana.com [grafana_com] ;url = https://grafana.com #################################### External image storage ########################## [external_image_storage] # Used for uploading images to public servers so they can be included in slack/email messages. # you can choose between (s3, webdav) ;provider = [external_image_storage.s3] ;bucket_url = ;access_key = ;secret_key = [external_image_storage.webdav] ;url = ;public_url = ;username = ;password = ================================================ FILE: grafana/node_overview.json ================================================ { "__requires": [ { "type": "panel", "id": "bargauge", "name": "Bar gauge", "version": "" }, { "type": "panel", "id": "gauge", "name": "Gauge", "version": "" }, { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "11.6.1" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" }, { "type": "panel", "id": "stat", "name": "Stat", "version": "" }, { "type": "panel", "id": "timeseries", "name": "Time series", "version": "" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "datasource", "uid": "grafana" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [], "type": "dashboard" }, "type": "dashboard" } ] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, "links": [], "panels": [ { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 261, "panels": [], "title": "Quick CPU / Mem / Disk", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Resource pressure via PSI", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 1, "links": [], "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "percentage", "steps": [ { "color": "green" }, { "color": "dark-yellow", "value": 70 }, { "color": "dark-red", "value": 90 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 4, "w": 3, "x": 0, "y": 1 }, "id": 323, "options": { "displayMode": "basic", "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": false }, "maxVizHeight": 300, "minVizHeight": 10, "minVizWidth": 0, "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showUnfilled": true, "sizing": "auto", "text": {}, "valueMode": "color" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "irate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "instant": true, "legendFormat": "CPU", "range": false, "refId": "A", "step": 240 }, { "editorMode": "code", "exemplar": false, "expr": "irate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "instant": true, "legendFormat": "Mem", "range": false, "refId": "B", "step": 240 }, { "editorMode": "code", "exemplar": false, "expr": "irate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "instant": true, "legendFormat": "I/O", "range": false, "refId": "C", "step": 240 }, { "editorMode": "code", "exemplar": false, "expr": "irate(node_pressure_irq_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "instant": true, "legendFormat": "Irq", "range": false, "refId": "D", "step": 240 } ], "title": "Pressure", "type": "bargauge" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Overall CPU busy percentage (averaged across all cores)", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 1, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(50, 172, 45, 0.97)" }, { "color": "rgba(237, 129, 40, 0.89)", "value": 85 }, { "color": "rgba(245, 54, 54, 0.9)", "value": 95 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 4, "w": 3, "x": 3, "y": 1 }, "id": 20, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", instance=\"$node\"}[$__rate_interval])))", "instant": true, "legendFormat": "", "range": false, "refId": "A", "step": 240 } ], "title": "CPU Busy", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "System load over all CPU cores together", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 1, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(50, 172, 45, 0.97)" }, { "color": "rgba(237, 129, 40, 0.89)", "value": 85 }, { "color": "rgba(245, 54, 54, 0.9)", "value": 95 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 4, "w": 3, "x": 6, "y": 1 }, "id": 155, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "scalar(node_load1{instance=\"$node\",job=\"$job\"}) * 100 / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", "format": "time_series", "instant": true, "range": false, "refId": "A", "step": 240 } ], "title": "Sys Load", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Real RAM usage excluding cache and reclaimable memory", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 1, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(50, 172, 45, 0.97)" }, { "color": "rgba(237, 129, 40, 0.89)", "value": 80 }, { "color": "rgba(245, 54, 54, 0.9)", "value": 90 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 4, "w": 3, "x": 9, "y": 1 }, "id": 16, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "clamp_min((1 - (node_memory_MemAvailable_bytes{instance=\"$node\", job=\"$job\"} / node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"})) * 100, 0)", "format": "time_series", "instant": true, "range": false, "refId": "B", "step": 240 } ], "title": "RAM Used", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Percentage of swap space currently used by the system", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 1, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(50, 172, 45, 0.97)" }, { "color": "rgba(237, 129, 40, 0.89)", "value": 10 }, { "color": "rgba(245, 54, 54, 0.9)", "value": 25 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 4, "w": 3, "x": 12, "y": 1 }, "id": 21, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"})) * 100", "instant": true, "range": false, "refId": "A", "step": 240 } ], "title": "SWAP Used", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Used Root FS", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 1, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(50, 172, 45, 0.97)" }, { "color": "rgba(237, 129, 40, 0.89)", "value": 80 }, { "color": "rgba(245, 54, 54, 0.9)", "value": 90 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 4, "w": 3, "x": 15, "y": 1 }, "id": 154, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "(\n (node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}\n - node_filesystem_avail_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"})\n / node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", mountpoint=\"/\", fstype!=\"rootfs\"}\n) * 100\n", "format": "time_series", "instant": true, "range": false, "refId": "A", "step": 240 } ], "title": "Root FS Used", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 2, "w": 2, "x": 18, "y": 1 }, "id": 14, "maxDataPoints": 100, "options": { "colorMode": "none", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", "instant": true, "legendFormat": "__auto", "range": false, "refId": "A" } ], "title": "CPU Cores", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 0, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 2, "w": 2, "x": 20, "y": 1 }, "id": 75, "maxDataPoints": 100, "options": { "colorMode": "none", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", "instant": true, "range": false, "refId": "A", "step": 240 } ], "title": "RAM Total", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 0, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 2, "w": 2, "x": 22, "y": 1 }, "id": 18, "maxDataPoints": 100, "options": { "colorMode": "none", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}", "instant": true, "range": false, "refId": "A", "step": 240 } ], "title": "SWAP Total", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 0, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(50, 172, 45, 0.97)" }, { "color": "rgba(237, 129, 40, 0.89)", "value": 70 }, { "color": "rgba(245, 54, 54, 0.9)", "value": 90 } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 2, "w": 2, "x": 18, "y": 3 }, "id": 23, "maxDataPoints": 100, "options": { "colorMode": "none", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", "format": "time_series", "instant": true, "range": false, "refId": "A", "step": 240 } ], "title": "RootFS Total", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 1, "mappings": [ { "options": { "match": "null", "result": { "text": "N/A" } }, "type": "special" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 2, "w": 4, "x": 20, "y": 3 }, "id": 15, "maxDataPoints": 100, "options": { "colorMode": "none", "graphMode": "none", "justifyMode": "auto", "orientation": "horizontal", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}", "instant": true, "range": false, "refId": "A", "step": 240 } ], "title": "Uptime", "type": "stat" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, "id": 263, "panels": [], "title": "Basic CPU / Mem / Net / Disk", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "CPU time spent busy vs idle, split by activity type", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "percent" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byName", "options": "Busy Iowait" }, "properties": [ { "id": "color", "value": { "fixedColor": "#890F02", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Idle" }, "properties": [ { "id": "color", "value": { "fixedColor": "#052B51", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Busy System" }, "properties": [ { "id": "color", "value": { "fixedColor": "#EAB839", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Busy User" }, "properties": [ { "id": "color", "value": { "fixedColor": "#0A437C", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Busy Other" }, "properties": [ { "id": "color", "value": { "fixedColor": "#6D1F62", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 6 }, "id": 77, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true, "width": 250 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "exemplar": false, "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "instant": false, "legendFormat": "Busy System", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Busy User", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Busy Iowait", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=~\".*irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Busy IRQs", "range": true, "refId": "D", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq'}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Busy Other", "range": true, "refId": "E", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Idle", "range": true, "refId": "F", "step": 240 } ], "title": "CPU Basic", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "RAM and swap usage overview, including caches", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { "id": "byName", "options": "Swap used" }, "properties": [ { "id": "color", "value": { "fixedColor": "#BF1B00", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Total" }, "properties": [ { "id": "color", "value": { "fixedColor": "#E0F9D7", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.stacking", "value": { "group": false, "mode": "normal" } } ] }, { "matcher": { "id": "byName", "options": "Cache + Buffer" }, "properties": [ { "id": "color", "value": { "fixedColor": "#052B51", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Free" }, "properties": [ { "id": "color", "value": { "fixedColor": "#7EB26D", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 6 }, "id": 78, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Total", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"})", "format": "time_series", "legendFormat": "Used", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Cache + Buffer", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Free", "range": true, "refId": "D", "step": 240 }, { "editorMode": "code", "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", "format": "time_series", "legendFormat": "Swap used", "range": true, "refId": "E", "step": 240 } ], "title": "Memory Basic", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Per-interface network traffic (receive and transmit) in bits per second", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Tx.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 13 }, "id": 74, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", "format": "time_series", "legendFormat": "Rx {{device}}", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", "format": "time_series", "legendFormat": "Tx {{device}} ", "range": true, "refId": "B", "step": 240 } ], "title": "Network Traffic Basic", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Percentage of filesystem space used for each mounted device", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 13 }, "id": 152, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "((node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"} - node_filesystem_avail_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"}) / node_filesystem_size_bytes{instance=\"$node\", job=\"$job\", device!~\"rootfs\"}) * 100", "format": "time_series", "legendFormat": "{{mountpoint}}", "range": true, "refId": "A", "step": 240 } ], "title": "Disk Space Used Basic", "type": "timeseries" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }, "id": 265, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "CPU time usage split by state, normalized across all CPU cores", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 70, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "percent" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byName", "options": "Idle - Waiting for something to happen" }, "properties": [ { "id": "color", "value": { "fixedColor": "#052B51", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Iowait - Waiting for I/O to complete" }, "properties": [ { "id": "color", "value": { "fixedColor": "#EAB839", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Irq - Servicing interrupts" }, "properties": [ { "id": "color", "value": { "fixedColor": "#BF1B00", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Nice - Niced processes executing in user mode" }, "properties": [ { "id": "color", "value": { "fixedColor": "#C15C17", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Softirq - Servicing softirqs" }, "properties": [ { "id": "color", "value": { "fixedColor": "#E24D42", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Steal - Time spent in other operating systems when running in a virtualized environment" }, "properties": [ { "id": "color", "value": { "fixedColor": "#FCE2DE", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "System - Processes executing in kernel mode" }, "properties": [ { "id": "color", "value": { "fixedColor": "#508642", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "User - Normal processes executing in user mode" }, "properties": [ { "id": "color", "value": { "fixedColor": "#5195CE", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Guest CPU usage" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "custom.stacking", "value": { "group": "A", "mode": "none" } } ] } ] }, "gridPos": { "h": 12, "w": 12, "x": 0, "y": 21 }, "id": 3, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 250 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "interval": "", "legendFormat": "System - Processes executing in kernel mode", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{mode=\"user\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "User - Normal processes executing in user mode", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{mode=\"nice\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Nice - Niced processes executing in user mode", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{mode=\"iowait\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Iowait - Waiting for I/O to complete", "range": true, "refId": "D", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{mode=\"irq\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Irq - Servicing interrupts", "range": true, "refId": "E", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{mode=\"softirq\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Softirq - Servicing softirqs", "range": true, "refId": "F", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{mode=\"steal\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment", "range": true, "refId": "G", "step": 240 }, { "editorMode": "code", "expr": "sum(irate(node_cpu_seconds_total{mode=\"idle\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", "format": "time_series", "legendFormat": "Idle - Waiting for something to happen", "range": true, "refId": "H", "step": 240 }, { "editorMode": "code", "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]))) > 0", "format": "time_series", "legendFormat": "Guest CPU usage", "range": true, "refId": "I", "step": 240 } ], "title": "CPU", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Breakdown of physical memory and swap usage. Hardware-detected memory errors are also displayed", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { "id": "byName", "options": "Apps" }, "properties": [ { "id": "color", "value": { "fixedColor": "#629E51", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Buffers" }, "properties": [ { "id": "color", "value": { "fixedColor": "#614D93", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Cache" }, "properties": [ { "id": "color", "value": { "fixedColor": "#6D1F62", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Cached" }, "properties": [ { "id": "color", "value": { "fixedColor": "#511749", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Committed" }, "properties": [ { "id": "color", "value": { "fixedColor": "#508642", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Free" }, "properties": [ { "id": "color", "value": { "fixedColor": "#0A437C", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" }, "properties": [ { "id": "color", "value": { "fixedColor": "#CFFAFF", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Inactive" }, "properties": [ { "id": "color", "value": { "fixedColor": "#584477", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "PageTables" }, "properties": [ { "id": "color", "value": { "fixedColor": "#0A50A1", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Page_Tables" }, "properties": [ { "id": "color", "value": { "fixedColor": "#0A50A1", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "RAM_Free" }, "properties": [ { "id": "color", "value": { "fixedColor": "#E0F9D7", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Slab" }, "properties": [ { "id": "color", "value": { "fixedColor": "#806EB7", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Slab_Cache" }, "properties": [ { "id": "color", "value": { "fixedColor": "#E0752D", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Swap" }, "properties": [ { "id": "color", "value": { "fixedColor": "#BF1B00", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Swap - Swap memory usage" }, "properties": [ { "id": "color", "value": { "fixedColor": "#BF1B00", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Swap_Cache" }, "properties": [ { "id": "color", "value": { "fixedColor": "#C15C17", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Swap_Free" }, "properties": [ { "id": "color", "value": { "fixedColor": "#2F575E", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Unused" }, "properties": [ { "id": "color", "value": { "fixedColor": "#EAB839", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Unused - Free memory unassigned" }, "properties": [ { "id": "color", "value": { "fixedColor": "#052B51", "mode": "fixed" } } ] }, { "matcher": { "id": "byRegexp", "options": "/.*Hardware Corrupted - *./" }, "properties": [ { "id": "custom.stacking", "value": { "group": false, "mode": "normal" } } ] } ] }, "gridPos": { "h": 12, "w": 12, "x": 12, "y": 21 }, "id": 24, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Apps - Memory used by user-space applications", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)", "range": true, "refId": "D", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Cache - Parked file data (file content) cache", "range": true, "refId": "E", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Buffers - Block device (e.g. harddisk) cache", "range": true, "refId": "F", "step": 240 }, { "editorMode": "code", "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Unused - Free memory unassigned", "range": true, "refId": "G", "step": 240 }, { "editorMode": "code", "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", "format": "time_series", "legendFormat": "Swap - Swap space used", "range": true, "refId": "H", "step": 240 }, { "editorMode": "code", "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working", "range": true, "refId": "I", "step": 240 } ], "title": "Memory", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Incoming and outgoing network traffic per interface", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 12, "w": 12, "x": 0, "y": 433 }, "id": 84, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", "format": "time_series", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "Network Traffic", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Network interface utilization as a percentage of its maximum capacity", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 12, "w": 12, "x": 12, "y": 433 }, "id": 338, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n / ignoring(speed) node_network_speed_bytes{instance=\"$node\",job=\"$job\", speed!=\"-1\"}", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "(rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n / ignoring(speed) node_network_speed_bytes{instance=\"$node\",job=\"$job\", speed!=\"-1\"})", "format": "time_series", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "Network Saturation", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Disk I/O operations per second for each device", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "read (-) / write (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "iops" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Read.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 12, "w": 12, "x": 0, "y": 445 }, "id": 229, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", "legendFormat": "{{device}} - Read", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", "legendFormat": "{{device}} - Write", "range": true, "refId": "B", "step": 240 } ], "title": "Disk IOps", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Disk I/O throughput per device", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "read (-) / write (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "Bps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Read*./" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 12, "w": 12, "x": 12, "y": 445 }, "id": 42, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Read", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Write", "range": true, "refId": "B", "step": 240 } ], "title": "Disk Throughput", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Amount of available disk space per mounted filesystem, excluding rootfs. Based on block availability to non-root users", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 12, "w": 12, "x": 0, "y": 457 }, "id": 43, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", "format": "time_series", "legendFormat": "{{mountpoint}}", "metric": "", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", "format": "time_series", "hide": true, "legendFormat": "{{mountpoint}} - Free", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", "format": "time_series", "hide": true, "legendFormat": "{{mountpoint}} - Size", "range": true, "refId": "C", "step": 240 } ], "title": "Filesystem Space Available", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Disk usage (used = total - available) per mountpoint", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 12, "w": 12, "x": 12, "y": 457 }, "id": 156, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", "format": "time_series", "legendFormat": "{{mountpoint}}", "range": true, "refId": "A", "step": 240 } ], "title": "Filesystem Used", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Percentage of time the disk was actively processing I/O operations", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 40, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 12, "w": 12, "x": 0, "y": 469 }, "id": 127, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+\"} [$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "{{device}}", "range": true, "refId": "A", "step": 240 } ], "title": "Disk I/O Utilization", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "How often tasks experience CPU, memory, or I/O delays. “Some” indicates partial slowdown; “Full” indicates all tasks are stalled. Based on Linux PSI metrics:\nhttps://docs.kernel.org/accounting/psi.html", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "some (-) / full (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Some.*/" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 } ] }, { "matcher": { "id": "byRegexp", "options": "/.*Some.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 12, "w": 12, "x": 12, "y": 469 }, "id": 322, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "CPU - Some", "range": true, "refId": "CPU some", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Memory - Some", "range": true, "refId": "Memory some", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_memory_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Memory - Full", "range": true, "refId": "Memory full", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "I/O - Some", "range": true, "refId": "I/O some", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_io_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "I/O - Full", "range": true, "refId": "I/O full", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_irq_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "IRQ - Full", "range": true, "refId": "A", "step": 240 } ], "title": "Pressure Stall Information", "type": "timeseries" } ], "title": "CPU / Memory / Net / Disk", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 }, "id": 266, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Displays committed memory usage versus the system's commit limit. Exceeding the limit is allowed under Linux overcommit policies but may increase OOM risks under high load", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*CommitLimit - *./" }, "properties": [ { "id": "color", "value": { "fixedColor": "#BF1B00", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 732 }, "id": 135, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Committed_AS – Memory promised to processes (not necessarily used)", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "CommitLimit - Max allowable committed memory", "range": true, "refId": "B", "step": 240 } ], "title": "Memory Committed", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Memory currently dirty (modified but not yet written to disk), being actively written back, or held by writeback buffers. High dirty or writeback memory may indicate disk I/O pressure or delayed flushing", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 732 }, "id": 130, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Writeback – Memory currently being flushed to disk", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "WritebackTmp – FUSE temporary writeback buffers", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Dirty – Memory marked dirty (pending write to disk)", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "NFS Unstable – Pages sent to NFS server, awaiting storage commit", "range": true, "refId": "D", "step": 240 } ], "title": "Memory Writeback and Dirty", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Kernel slab memory usage, separated into reclaimable and non-reclaimable categories. Reclaimable memory can be freed under memory pressure (e.g., caches), while unreclaimable memory is locked by the kernel for core functions", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 932 }, "id": 131, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "SUnreclaim – Non-reclaimable slab memory (kernel objects)", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "SReclaimable – Potentially reclaimable slab memory (e.g., inode cache)", "range": true, "refId": "B", "step": 240 } ], "title": "Memory Slab", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Memory used for mapped files (such as libraries) and shared memory (shmem and tmpfs), including variants backed by huge pages", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 932 }, "id": 138, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Mapped – Memory mapped from files (e.g., libraries, mmap)", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Shmem – Shared memory used by processes and tmpfs", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "ShmemHugePages – Shared memory (shmem/tmpfs) allocated with HugePages", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "PMD Mapped – Shmem/tmpfs backed by Transparent HugePages (PMD)", "range": true, "refId": "D", "step": 240 } ], "title": "Memory Shared and Mapped", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Proportion of memory pages in the kernel's active and inactive LRU lists relative to total RAM. Active pages have been recently used, while inactive pages are less recently accessed but still resident in memory", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Active.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byRegexp", "options": "/.*Inactive.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "dark-blue", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 942 }, "id": 136, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "(node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}) \n/ \n(node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})", "format": "time_series", "legendFormat": "Inactive – Less recently used memory, more likely to be reclaimed", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "(node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}) \n/ \n(node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})\n", "format": "time_series", "legendFormat": "Active – Recently used memory, retained unless under pressure", "range": true, "refId": "B", "step": 240 } ], "title": "Memory LRU Active / Inactive (%)", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Breakdown of memory pages in the kernel's active and inactive LRU lists, separated by anonymous (heap, tmpfs) and file-backed (caches, mmap) pages.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 942 }, "id": 191, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Inactive_file - File-backed memory on inactive LRU list", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Inactive_anon – Anonymous memory on inactive LRU (incl. tmpfs & swap cache)", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Active_file - File-backed memory on active LRU list", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Active_anon – Anonymous memory on active LRU (incl. tmpfs & swap cache)", "range": true, "refId": "D", "step": 240 } ], "title": "Memory LRU Active / Inactive Detail", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Tracks kernel memory used for CPU-local structures, per-thread stacks, and bounce buffers used for I/O on DMA-limited devices. These areas are typically small but critical for low-level operations", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 952 }, "id": 160, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "KernelStack – Kernel stack memory (per-thread, non-reclaimable)", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "PerCPU – Dynamically allocated per-CPU memory (used by kernel modules)", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Bounce Memory – I/O buffer for DMA-limited devices", "range": true, "refId": "C", "step": 240 } ], "title": "Memory Kernel / CPU / IO", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Usage of the kernel's vmalloc area, which provides virtual memory allocations for kernel modules and drivers. Includes total, used, and largest free block sizes", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Total.*/" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 952 }, "id": 70, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Vmalloc Free Chunk – Largest available block in vmalloc area", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Vmalloc Total – Total size of the vmalloc memory area", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Vmalloc Used – Portion of vmalloc area currently in use", "range": true, "refId": "C", "step": 240 } ], "title": "Memory Vmalloc", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Memory used by anonymous pages (not backed by files), including standard and huge page allocations. Includes heap, stack, and memory-mapped anonymous regions", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 962 }, "id": 129, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "AnonHugePages – Anonymous memory using HugePages", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "AnonPages – Anonymous memory (non-file-backed)", "range": true, "refId": "B", "step": 240 } ], "title": "Memory Anonymous", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Memory that is locked in RAM and cannot be swapped out. Includes both kernel-unevictable memory and user-level memory locked with mlock()", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { "id": "byName", "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" }, "properties": [ { "id": "color", "value": { "fixedColor": "#CFFAFF", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 962 }, "id": 137, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Unevictable – Kernel-pinned memory (not swappable)", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Mlocked – Application-locked memory via mlock()", "range": true, "refId": "B", "step": 240 } ], "title": "Memory Unevictable and MLocked", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "How much memory is directly mapped in the kernel using different page sizes (4K, 2M, 1G). Helps monitor large page utilization in the direct map region", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { "id": "byName", "options": "Active" }, "properties": [ { "id": "color", "value": { "fixedColor": "#99440A", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Buffers" }, "properties": [ { "id": "color", "value": { "fixedColor": "#58140C", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Cache" }, "properties": [ { "id": "color", "value": { "fixedColor": "#6D1F62", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Cached" }, "properties": [ { "id": "color", "value": { "fixedColor": "#511749", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Committed" }, "properties": [ { "id": "color", "value": { "fixedColor": "#508642", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Dirty" }, "properties": [ { "id": "color", "value": { "fixedColor": "#6ED0E0", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Free" }, "properties": [ { "id": "color", "value": { "fixedColor": "#B7DBAB", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Inactive" }, "properties": [ { "id": "color", "value": { "fixedColor": "#EA6460", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Mapped" }, "properties": [ { "id": "color", "value": { "fixedColor": "#052B51", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "PageTables" }, "properties": [ { "id": "color", "value": { "fixedColor": "#0A50A1", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Page_Tables" }, "properties": [ { "id": "color", "value": { "fixedColor": "#0A50A1", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Slab_Cache" }, "properties": [ { "id": "color", "value": { "fixedColor": "#EAB839", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Swap" }, "properties": [ { "id": "color", "value": { "fixedColor": "#BF1B00", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Swap_Cache" }, "properties": [ { "id": "color", "value": { "fixedColor": "#C15C17", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Total" }, "properties": [ { "id": "color", "value": { "fixedColor": "#511749", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Total RAM" }, "properties": [ { "id": "color", "value": { "fixedColor": "#052B51", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Total RAM + Swap" }, "properties": [ { "id": "color", "value": { "fixedColor": "#052B51", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "VmallocUsed" }, "properties": [ { "id": "color", "value": { "fixedColor": "#EA6460", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 972 }, "id": 128, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "DirectMap 1G – Memory mapped with 1GB pages", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "DirectMap 2M – Memory mapped with 2MB pages", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "DirectMap 4K – Memory mapped with 4KB pages", "range": true, "refId": "C", "step": 240 } ], "title": "Memory DirectMap", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Displays HugePages memory usage in bytes, including allocated, free, reserved, and surplus memory. All values are calculated based on the number of huge pages multiplied by their configured size", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 972 }, "id": 140, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "HugePages Used – Currently allocated", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "HugePages Reserved – Promised but unused", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "HugePages Surplus – Dynamic pool extension", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"} * node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "HugePages Total – Reserved memory", "range": true, "refId": "D", "step": 240 } ], "title": "Memory HugePages", "type": "timeseries" } ], "title": "Memory Meminfo", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 22 }, "id": 267, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of memory pages being read from or written to disk (page-in and page-out operations). High page-out may indicate memory pressure or swapping activity", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 733 }, "id": 176, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Pagesin - Page in ops", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Pagesout - Page out ops", "range": true, "refId": "B", "step": 240 } ], "title": "Memory Pages In / Out", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate at which memory pages are being swapped in from or out to disk. High swap-out activity may indicate memory pressure", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 733 }, "id": 22, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Pswpin - Pages swapped in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Pswpout - Pages swapped out", "range": true, "refId": "B", "step": 240 } ], "title": "Memory Pages Swap In / Out", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of memory page faults, split into total, major (disk-backed), and derived minor (non-disk) faults. High major fault rates may indicate memory pressure or insufficient RAM", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [ { "matcher": { "id": "byName", "options": "Pgfault - Page major and minor fault ops" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.stacking", "value": { "group": false, "mode": "none" } }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 913 }, "id": 175, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 350 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Pgfault - Page major and minor fault ops", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Pgmajfault - Major page fault ops", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval]) - irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Pgminfault - Minor page fault ops", "range": true, "refId": "C", "step": 240 } ], "title": "Memory Page Faults", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of Out-of-Memory (OOM) kill events. A non-zero value indicates the kernel has terminated one or more processes due to memory exhaustion", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [ { "matcher": { "id": "byName", "options": "OOM Kills" }, "properties": [ { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 913 }, "id": 307, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "OOM Kills", "range": true, "refId": "A", "step": 240 } ], "title": "OOM Killer", "type": "timeseries" } ], "title": "Memory Vmstat", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, "id": 293, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Tracks the system clock's estimated and maximum error, as well as its offset from the reference clock (e.g., via NTP). Useful for detecting synchronization drift", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 734 }, "id": 260, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Estimated error", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Offset local vs reference", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Maximum error", "range": true, "refId": "C", "step": 240 } ], "title": "Time Synchronized Drift", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "NTP phase-locked loop (PLL) time constant used by the kernel to control time adjustments. Lower values mean faster correction but less stability", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 734 }, "id": 291, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "PLL Time Constant", "range": true, "refId": "A", "step": 240 } ], "title": "Time PLL Adjust", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Shows whether the system clock is synchronized to a reliable time source, and the current frequency correction ratio applied by the kernel to maintain synchronization", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 884 }, "id": 168, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Sync status (1 = ok)", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Frequency Adjustment", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}", "format": "time_series", "hide": true, "interval": "", "legendFormat": "Tick Interval", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}", "format": "time_series", "hide": true, "interval": "", "legendFormat": "TAI Offset", "range": true, "refId": "D", "step": 240 } ], "title": "Time Synchronized Status", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Displays the PPS signal's frequency offset and stability (jitter) in hertz. Useful for monitoring high-precision time sources like GPS or atomic clocks", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "rothz" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 884 }, "id": 333, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_timex_pps_frequency_hertz{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "PPS Frequency Offset", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_timex_pps_stability_hertz{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "PPS Frequency Stability", "range": true, "refId": "B", "step": 240 } ], "title": "PPS Frequency / Stability", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Tracks PPS signal timing jitter and shift compared to system clock", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 894 }, "id": 334, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_timex_pps_jitter_seconds{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "PPS Jitter", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_timex_pps_shift_seconds{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "PPS Shift", "range": true, "refId": "B", "step": 240 } ], "title": "PPS Time Accuracy", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of PPS synchronization diagnostics including calibration events, jitter violations, errors, and frequency stability exceedances", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 894 }, "id": 335, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_timex_pps_calibration_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "PPS Calibrations/sec", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_timex_pps_error_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "PPS Errors/sec", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "irate(node_timex_pps_stability_exceeded_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "PPS Stability Exceeded/sec", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "irate(node_timex_pps_jitter_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "PPS Jitter Events/sec", "range": true, "refId": "D", "step": 240 } ], "title": "PPS Sync Events", "type": "timeseries" } ], "title": "System Timesync", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 24 }, "id": 312, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Processes currently in runnable or blocked states. Helps identify CPU contention or I/O wait bottlenecks.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 735 }, "id": 62, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_procs_blocked{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Blocked (I/O Wait)", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_procs_running{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Runnable (Ready for CPU)", "range": true, "refId": "B", "step": 240 } ], "title": "Processes Status", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Current number of processes in each state (e.g., running, sleeping, zombie). Requires --collector.processes to be enabled in node_exporter", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "D" }, "properties": [ { "id": "displayName", "value": "Uninterruptible Sleeping" } ] }, { "matcher": { "id": "byName", "options": "I" }, "properties": [ { "id": "displayName", "value": "Idle Kernel Thread" } ] }, { "matcher": { "id": "byName", "options": "R" }, "properties": [ { "id": "displayName", "value": "Running" } ] }, { "matcher": { "id": "byName", "options": "S" }, "properties": [ { "id": "displayName", "value": "Interruptible Sleeping" } ] }, { "matcher": { "id": "byName", "options": "T" }, "properties": [ { "id": "displayName", "value": "Stopped" } ] }, { "matcher": { "id": "byName", "options": "X" }, "properties": [ { "id": "displayName", "value": "Dead" } ] }, { "matcher": { "id": "byName", "options": "Z" }, "properties": [ { "id": "displayName", "value": "Zombie" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 735 }, "id": 315, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_processes_state{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{ state }}", "range": true, "refId": "A", "step": 240 } ], "title": "Processes Detailed States", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of new processes being created on the system (forks/sec).", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 765 }, "id": 148, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_forks_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Process Forks per second", "range": true, "refId": "A", "step": 240 } ], "title": "Processes Forks", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Shows CPU saturation per core, calculated as the proportion of time spent waiting to run relative to total time demanded (running + waiting).", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*waiting.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 765 }, "id": 305, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "hide": true, "interval": "", "legendFormat": "CPU {{ cpu }} - Running", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "hide": true, "interval": "", "legendFormat": "CPU {{cpu}} - Waiting Queue", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])\n/\n(irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) + irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]))\n", "format": "time_series", "interval": "", "legendFormat": "CPU {{cpu}}", "range": true, "refId": "C", "step": 240 } ], "title": "CPU Saturation per Core", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of active PIDs on the system and the configured maximum allowed. Useful for detecting PID exhaustion risk. Requires --collector.processes in node_exporter", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "PIDs limit" }, "properties": [ { "id": "color", "value": { "fixedColor": "#F2495C", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 775 }, "id": 313, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_processes_pids{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Number of PIDs", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_processes_max_processes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "PIDs limit", "range": true, "refId": "B", "step": 240 } ], "title": "PIDs Number and Limit", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of active threads on the system and the configured thread limit. Useful for monitoring thread pressure. Requires --collector.processes in node_exporter", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "Threads limit" }, "properties": [ { "id": "color", "value": { "fixedColor": "#F2495C", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 775 }, "id": 314, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_processes_threads{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Allocated threads", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_processes_max_threads{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Threads limit", "range": true, "refId": "B", "step": 240 } ], "title": "Threads Number and Limit", "type": "timeseries" } ], "title": "System Processes", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 25 }, "id": 269, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Per-second rate of context switches and hardware interrupts. High values may indicate intense CPU or I/O activity", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 816 }, "id": 8, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Context switches", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_intr_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "Interrupts", "range": true, "refId": "B", "step": 240 } ], "title": "Context Switches / Interrupts", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "System load average over 1, 5, and 15 minutes. Reflects the number of active or waiting processes. Values above CPU core count may indicate overload", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "CPU Core Count" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 816 }, "id": 7, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_load1{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Load 1m", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_load5{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Load 5m", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_load15{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Load 15m", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", "format": "time_series", "legendFormat": "CPU Core Count", "range": true, "refId": "D", "step": 240 } ], "title": "System Load", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Real-time CPU frequency scaling per core, including average minimum and maximum allowed scaling frequencies", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "hertz" }, "overrides": [ { "matcher": { "id": "byName", "options": "Max" }, "properties": [ { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } }, { "id": "custom.hideFrom", "value": { "legend": true, "tooltip": false, "viz": false } } ] }, { "matcher": { "id": "byName", "options": "Min" }, "properties": [ { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }, { "id": "custom.hideFrom", "value": { "legend": true, "tooltip": false, "viz": false } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 826 }, "id": 321, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "desc" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_cpu_scaling_frequency_hertz{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "CPU {{ cpu }}", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "avg(node_cpu_scaling_frequency_max_hertz{instance=\"$node\",job=\"$job\"})", "format": "time_series", "interval": "", "legendFormat": "Max", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "avg(node_cpu_scaling_frequency_min_hertz{instance=\"$node\",job=\"$job\"})", "format": "time_series", "interval": "", "legendFormat": "Min", "range": true, "refId": "C", "step": 240 } ], "title": "CPU Frequency Scaling", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of scheduling timeslices executed per CPU. Reflects how frequently the scheduler switches tasks on each core", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 826 }, "id": 306, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "CPU {{ cpu }}", "range": true, "refId": "A", "step": 240 } ], "title": "CPU Schedule Timeslices", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Breaks down hardware interrupts by type and device. Useful for diagnosing IRQ load on network, disk, or CPU interfaces. Requires --collector.interrupts to be enabled in node_exporter", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 836 }, "id": 259, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "{{ type }} - {{ info }}", "range": true, "refId": "A", "step": 240 } ], "title": "IRQ Detail", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of bits of entropy currently available to the system's random number generators (e.g., /dev/random). Low values may indicate that random number generation could block or degrade performance of cryptographic operations", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "decbits" }, "overrides": [ { "matcher": { "id": "byName", "options": "Entropy pool max" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 836 }, "id": 151, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Entropy available", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_entropy_pool_size_bits{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Entropy pool max", "range": true, "refId": "B", "step": 240 } ], "title": "Entropy", "type": "timeseries" } ], "title": "System Misc", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 }, "id": 304, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Monitors hardware sensor temperatures and critical thresholds as exposed by Linux hwmon. Includes CPU, GPU, and motherboard sensors where available", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "celsius" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Critical*./" }, "properties": [ { "id": "color", "value": { "fixedColor": "#E24D42", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 737 }, "id": 158, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{ chip_name }} {{ sensor }}", "range": true, "refId": "A", "step": 240 }, { "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", "format": "time_series", "hide": true, "interval": "", "legendFormat": "{{ chip_name }} {{ sensor }} Critical Alarm", "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{ chip_name }} {{ sensor }} Critical", "range": true, "refId": "C", "step": 240 }, { "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", "format": "time_series", "hide": true, "interval": "", "legendFormat": "{{ chip_name }} {{ sensor }} Critical Historical", "refId": "D", "step": 240 }, { "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", "format": "time_series", "hide": true, "interval": "", "legendFormat": "{{ chip_name }} {{ sensor }} Max", "refId": "E", "step": 240 } ], "title": "Hardware Temperature Monitor", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Shows how hard each cooling device (fan/throttle) is working relative to its maximum capacity", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percent" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Max*./" }, "properties": [ { "id": "color", "value": { "fixedColor": "#EF843C", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 737 }, "id": 300, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "100 * node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"} / node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{ name }} - {{ type }} ", "range": true, "refId": "A", "step": 240 } ], "title": "Cooling Device Utilization", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Shows the online status of power supplies (e.g., AC, battery). A value of 1-Yes indicates the power supply is active/online", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "bool_yes_no" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 747 }, "id": 302, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{ power_supply }} online", "range": true, "refId": "A", "step": 240 } ], "title": "Power Supply", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Displays the current fan speeds (RPM) from hardware sensors via the hwmon interface", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "rotrpm" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 747 }, "id": 325, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_hwmon_fan_rpm{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{ chip_name }} {{ sensor }}", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_hwmon_fan_min_rpm{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", "format": "time_series", "hide": true, "interval": "", "legendFormat": "{{ chip_name }} {{ sensor }} rpm min", "range": true, "refId": "B", "step": 240 } ], "title": "Hardware Fan Speed", "type": "timeseries" } ], "title": "Hardware Misc", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 27 }, "id": 296, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Current number of systemd units in each operational state, such as active, failed, inactive, or transitioning", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "Failed" }, "properties": [ { "id": "color", "value": { "fixedColor": "#F2495C", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Active" }, "properties": [ { "id": "color", "value": { "fixedColor": "#73BF69", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Activating" }, "properties": [ { "id": "color", "value": { "fixedColor": "#C8F2C2", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Deactivating" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "Inactive" }, "properties": [ { "id": "color", "value": { "fixedColor": "dark-blue", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 4228 }, "id": 298, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}", "format": "time_series", "interval": "", "legendFormat": "Activating", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}", "format": "time_series", "interval": "", "legendFormat": "Active", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}", "format": "time_series", "interval": "", "legendFormat": "Deactivating", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}", "format": "time_series", "interval": "", "legendFormat": "Failed", "range": true, "refId": "D", "step": 240 }, { "editorMode": "code", "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}", "format": "time_series", "interval": "", "legendFormat": "Inactive", "range": true, "refId": "E", "step": 240 } ], "title": "Systemd Units State", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Current number of active connections per systemd socket, as reported by the Node Exporter systemd collector", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 4228 }, "id": 331, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_systemd_socket_current_connections{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{ name }}", "range": true, "refId": "A", "step": 240 } ], "title": "Systemd Sockets Current", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of accepted connections per second for each systemd socket", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "eps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 4238 }, "id": 297, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "{{ name }}", "range": true, "refId": "A", "step": 240 } ], "title": "Systemd Sockets Accepted", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of systemd socket connection refusals per second, typically due to service unavailability or backlog overflow", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "eps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 4238 }, "id": 332, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_systemd_socket_refused_connections_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "{{ name }}", "range": true, "refId": "A", "step": 240 } ], "title": "Systemd Sockets Refused", "type": "timeseries" } ], "title": "Systemd", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 28 }, "id": 270, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of I/O operations completed per second for the device (after merges), including both reads and writes", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "read (–) / write (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "iops" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Read.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] }, { "matcher": { "id": "byRegexp", "options": "/sda.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 29 }, "id": 9, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "legendFormat": "{{device}} - Read", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "legendFormat": "{{device}} - Write", "range": true, "refId": "B", "step": 240 } ], "title": "Disk Read/Write IOps", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of bytes read from or written to the device per second", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "read (–) / write (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "Bps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Read.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] }, { "matcher": { "id": "byRegexp", "options": "/sda.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 29 }, "id": 33, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Read", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "exemplar": false, "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "instant": false, "legendFormat": "{{device}} - Write", "range": true, "refId": "B", "step": 240 } ], "title": "Disk Read/Write Data", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "read (–) / write (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Read.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] }, { "matcher": { "id": "byRegexp", "options": "/sda.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 389 }, "id": 37, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}} - Read", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}} - Write", "range": true, "refId": "B", "step": 240 } ], "title": "Disk Average Wait Time", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Average queue length of the requests that were issued to the device", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "none" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/sda_*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "#7EB26D", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 389 }, "id": 35, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}}", "range": true, "refId": "A", "step": 240 } ], "title": "Average Queue Size", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of read and write requests merged per second that were queued to the device", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "read (–) / write (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "iops" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Read.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] }, { "matcher": { "id": "byRegexp", "options": "/sda.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 399 }, "id": 133, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "legendFormat": "{{device}} - Read", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "legendFormat": "{{device}} - Write", "range": true, "refId": "B", "step": 240 } ], "title": "Disk R/W Merged", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Percentage of time the disk spent actively processing I/O operations, including general I/O, discards (TRIM), and write cache flushes", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/sda.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 399 }, "id": 36, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}} - General IO", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}} - Discard/TRIM", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_flush_requests_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}} - Flush (write cache)", "range": true, "refId": "C", "step": 240 } ], "title": "Time Spent Doing I/Os", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Per-second rate of discard (TRIM) and flush (write cache) operations. Useful for monitoring low-level disk activity on SSDs and advanced storage", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ops" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/sda.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 409 }, "id": 301, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}} - Discards completed", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}} - Discards merged", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "irate(node_disk_flush_requests_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}} - Flush", "range": true, "refId": "C", "step": 240 } ], "title": "Disk Ops Discards / Flush", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Shows how many disk sectors are discarded (TRIMed) per second. Useful for monitoring SSD behavior and storage efficiency", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/sda.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 409 }, "id": 326, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_disk_discarded_sectors_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "{{device}}", "range": true, "refId": "A", "step": 240 } ], "title": "Disk Sectors Discarded Successfully", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of in-progress I/O requests at the time of sampling (active requests in the disk queue)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "none" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/sda.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 419 }, "id": 34, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_disk_io_now{instance=\"$node\",job=\"$job\"}", "interval": "", "legendFormat": "{{device}}", "range": true, "refId": "A", "step": 240 } ], "title": "Instantaneous Queue Size", "type": "timeseries" } ], "title": "Storage Disk", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 29 }, "id": 271, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of file descriptors currently allocated system-wide versus the system limit. Important for detecting descriptor exhaustion risks", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Max.*/" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 30 }, "id": 28, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Max open files", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "Open files", "range": true, "refId": "B", "step": 240 } ], "title": "File Descriptor", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of free file nodes (inodes) available per mounted filesystem. A low count may prevent file creation even if disk space is available", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 30 }, "id": 41, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}", "format": "time_series", "legendFormat": "{{mountpoint}}", "range": true, "refId": "A", "step": 240 } ], "title": "File Nodes Free", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Indicates filesystems mounted in read-only mode or reporting device-level I/O errors.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bool_yes_no" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 370 }, "id": 44, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}", "format": "time_series", "legendFormat": "{{mountpoint}} - ReadOnly", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}", "format": "time_series", "interval": "", "legendFormat": "{{mountpoint}} - Device error", "range": true, "refId": "B", "step": 240 } ], "title": "Filesystem in ReadOnly / Error", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of file nodes (inodes) available per mounted filesystem. Reflects maximum file capacity regardless of disk size", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 370 }, "id": 219, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}", "format": "time_series", "legendFormat": "{{mountpoint}}", "range": true, "refId": "A", "step": 240 } ], "title": "File Nodes Size", "type": "timeseries" } ], "title": "Storage Filesystem", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 }, "id": 272, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of network packets received and transmitted per second, by interface.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 31 }, "id": 60, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "rate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "Network Traffic by Packets", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of packet-level errors for each network interface. Receive errors may indicate physical or driver issues; transmit errors may reflect collisions or hardware faults", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 31 }, "id": 142, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "rate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "Network Traffic Errors", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of dropped packets per network interface. Receive drops can indicate buffer overflow or driver issues; transmit drops may result from outbound congestion or queuing limits", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 251 }, "id": 143, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "rate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "Network Traffic Drop", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of compressed network packets received and transmitted per interface. These are common in low-bandwidth or special interfaces like PPP or SLIP", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 251 }, "id": 141, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_compressed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "rate(node_network_transmit_compressed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "Network Traffic Compressed", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of incoming multicast packets received per network interface. Multicast is used by protocols such as mDNS, SSDP, and some streaming or cluster services", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 261 }, "id": 146, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_multicast_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 } ], "title": "Network Traffic Multicast", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of received packets that could not be processed due to missing protocol or handler in the kernel. May indicate unsupported traffic or misconfiguration", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 261 }, "id": 327, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_nohandler_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 } ], "title": "Network Traffic NoHandler", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of frame errors on received packets, typically caused by physical layer issues such as bad cables, duplex mismatches, or hardware problems", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 271 }, "id": 145, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_frame_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 } ], "title": "Network Traffic Frame", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Tracks FIFO buffer overrun errors on network interfaces. These occur when incoming or outgoing packets are dropped due to queue or buffer overflows, often indicating congestion or hardware limits", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 271 }, "id": 144, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_receive_fifo_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "rate(node_network_transmit_fifo_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "Network Traffic Fifo", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of packet collisions detected during transmission. Mostly relevant on half-duplex or legacy Ethernet networks", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 281 }, "id": 232, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_transmit_colls_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "A", "step": 240 } ], "title": "Network Traffic Collision", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of carrier errors during transmission. These typically indicate physical layer issues like faulty cabling or duplex mismatches", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 281 }, "id": 231, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "rate(node_network_transmit_carrier_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "{{device}} - Tx out", "range": true, "refId": "A", "step": 240 } ], "title": "Network Traffic Carrier Errors", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of ARP entries per interface. Useful for detecting excessive ARP traffic or table growth due to scanning or misconfiguration", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 291 }, "id": 230, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_arp_entries{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "{{ device }} ARP Table", "range": true, "refId": "A", "step": 240 } ], "title": "ARP Entries", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Current and maximum connection tracking entries used by Netfilter (nf_conntrack). High usage approaching the limit may cause packet drops or connection issues", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "NF conntrack limit" }, "properties": [ { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 291 }, "id": 61, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "NF conntrack entries", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "NF conntrack limit", "range": true, "refId": "B", "step": 240 } ], "title": "NF Conntrack", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Operational and physical link status of each network interface. Values are Yes for 'up' or link present, and No for 'down' or no carrier.\"", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bool_yes_no" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 301 }, "id": 309, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}", "format": "time_series", "hide": true, "legendFormat": "{{interface}} - Operational state UP", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}", "format": "time_series", "instant": false, "legendFormat": "{{device}} - Physical link", "refId": "B" } ], "title": "Network Operational Status", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Maximum speed of each network interface as reported by the operating system. This is a static hardware capability, not current throughput", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 0, "fieldMinMax": false, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 6, "x": 12, "y": 301 }, "id": 280, "options": { "displayMode": "basic", "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": false }, "maxVizHeight": 30, "minVizHeight": 16, "minVizWidth": 8, "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showUnfilled": true, "sizing": "manual", "valueMode": "color" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"} * 8", "format": "time_series", "legendFormat": "{{ device }}", "range": true, "refId": "A", "step": 240 } ], "title": "Speed", "type": "bargauge" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "MTU (Maximum Transmission Unit) in bytes for each network interface. Affects packet size and transmission efficiency", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 0, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 10, "w": 6, "x": 18, "y": 301 }, "id": 288, "options": { "displayMode": "basic", "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": false }, "maxVizHeight": 30, "minVizHeight": 16, "minVizWidth": 8, "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showUnfilled": true, "sizing": "manual", "valueMode": "color" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_network_mtu_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "legendFormat": "{{ device }}", "range": true, "refId": "A", "step": 240 } ], "title": "MTU", "type": "bargauge" } ], "title": "Network Traffic", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 31 }, "id": 273, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Tracks TCP socket usage and memory per node", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 32 }, "id": 63, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_sockstat_TCP_alloc{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Allocated Sockets", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "In-Use Sockets", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_TCP_orphan{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Orphaned Sockets", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_TCP_tw{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "TIME_WAIT Sockets", "range": true, "refId": "D", "step": 240 } ], "title": "Sockstat TCP", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of UDP and UDPLite sockets currently in use", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 32 }, "id": 124, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_sockstat_UDPLITE_inuse{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "UDPLite - In-Use Sockets", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "UDP - In-Use Sockets", "range": true, "refId": "B", "step": 240 } ], "title": "Sockstat UDP", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Total number of sockets currently in use across all protocols (TCP, UDP, UNIX, etc.), as reported by /proc/net/sockstat", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 42 }, "id": 126, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_sockstat_sockets_used{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Total sockets", "range": true, "refId": "A", "step": 240 } ], "title": "Sockstat Used", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of FRAG and RAW sockets currently in use. RAW sockets are used for custom protocols or tools like ping; FRAG sockets are used internally for IP packet defragmentation", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 42 }, "id": 125, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_sockstat_FRAG_inuse{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "FRAG - In-Use Sockets", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_RAW_inuse{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "RAW - In-Use Sockets", "range": true, "refId": "C", "step": 240 } ], "title": "Sockstat FRAG / RAW", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Kernel memory used by TCP, UDP, and IP fragmentation buffers", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 52 }, "id": 220, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "TCP", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "UDP", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_FRAG_memory{instance=\"$node\",job=\"$job\"}", "interval": "", "legendFormat": "Fragmentation", "range": true, "refId": "C" } ], "title": "Sockstat Memory Size", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Average memory used per socket (TCP/UDP). Helps tune net.ipv4.tcp_rmem / tcp_wmem", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 52 }, "id": 339, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"} / node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "TCP", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"} / node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "UDP", "range": true, "refId": "B", "step": 240 } ], "title": "Sockstat Average Socket Memory", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "TCP/UDP socket memory usage in kernel (in pages)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 62 }, "id": 336, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_sockstat_TCP_mem{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "TCP", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_sockstat_UDP_mem{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "UDP", "range": true, "refId": "B", "step": 240 } ], "title": "TCP/UDP Kernel Buffer Memory Pages", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Packets processed and dropped by the softnet network stack per CPU. Drops may indicate CPU saturation or network driver limitations", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "drop (-) / process (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Dropped.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 62 }, "id": 290, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_softnet_processed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "CPU {{cpu}} - Processed", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_softnet_dropped_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "CPU {{cpu}} - Dropped", "range": true, "refId": "B", "step": 240 } ], "title": "Softnet Packets", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "How often the kernel was unable to process all packets in the softnet queue before time ran out. Frequent squeezes may indicate CPU contention or driver inefficiency", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "eps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 72 }, "id": 310, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_softnet_times_squeezed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "CPU {{cpu}} - Times Squeezed", "range": true, "refId": "A", "step": 240 } ], "title": "Softnet Out of Quota", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Tracks the number of packets processed or dropped by Receive Packet Steering (RPS), a mechanism to distribute packet processing across CPUs", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Dropped.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" }, { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 72 }, "id": 330, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_softnet_received_rps_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "CPU {{cpu}} - Processed", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_softnet_flow_limit_count_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "CPU {{cpu}} - Dropped", "range": true, "refId": "B", "step": 240 } ], "title": "Softnet RPS", "type": "timeseries" } ], "title": "Network Sockstat", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, "id": 274, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of octets sent and received at the IP layer, as reported by /proc/net/netstat", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "Bps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 163 }, "id": 221, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "width": 300 }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_IpExt_InOctets{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "IP Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_IpExt_OutOctets{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "legendFormat": "IP Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "Netstat IP In / Out Octets", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of TCP segments sent and received per second, including data and control segments", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] }, { "matcher": { "id": "byRegexp", "options": "/.*Snd.*/" }, "properties": [] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 163 }, "id": 299, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "instant": false, "interval": "", "legendFormat": "TCP Rx in", "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "TCP Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "TCP In / Out", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of UDP datagrams sent and received per second, based on /proc/net/netstat", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 193 }, "id": 55, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "UDP Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "UDP Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "UDP In / Out", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of ICMP messages sent and received per second, including error and control messages", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 193 }, "id": 115, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_Icmp_InMsgs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "ICMP Rx in", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_Icmp_OutMsgs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "ICMP Tx out", "range": true, "refId": "B", "step": 240 } ], "title": "ICMP In / Out", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Tracks various TCP error and congestion-related events, including retransmissions, timeouts, dropped connections, and buffer issues", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 203 }, "id": 104, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "Listen Overflows", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "Listen Drops", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "SYN Retransmits", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "Segment Retransmits", "range": true, "refId": "D" }, { "editorMode": "code", "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "Receive Errors", "range": true, "refId": "E" }, { "editorMode": "code", "expr": "irate(node_netstat_Tcp_OutRsts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "RST Sent", "range": true, "refId": "F" }, { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_TCPRcvQDrop{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "Receive Queue Drops", "range": true, "refId": "G" }, { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_TCPOFOQueue{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "Out-of-order Queued", "range": true, "refId": "H" }, { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_TCPTimeouts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "TCP Timeouts", "range": true, "refId": "I" } ], "title": "TCP Errors", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of UDP and UDPLite datagram delivery errors, including missing listeners, buffer overflows, and protocol-specific issues", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "pps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 203 }, "id": 109, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "UDP Rx in Errors", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "UDP No Listener", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "interval": "", "legendFormat": "UDPLite Rx in Errors", "range": true, "refId": "C" }, { "editorMode": "code", "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "UDP Rx in Buffer Errors", "range": true, "refId": "D", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "UDP Tx out Buffer Errors", "range": true, "refId": "E", "step": 240 } ], "title": "UDP Errors", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of incoming ICMP messages that contained protocol-specific errors, such as bad checksums or invalid lengths", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "out (-) / in (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "pps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*out.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 213 }, "id": 50, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_Icmp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "ICMP Rx In", "range": true, "refId": "A", "step": 240 } ], "title": "ICMP Errors", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of TCP SYN cookies sent, validated, and failed. These are used to protect against SYN flood attacks and manage TCP handshake resources under load", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "eps" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Failed.*/" }, "properties": [ { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 213 }, "id": 91, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_SyncookiesFailed{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "SYN Cookies Failed", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_SyncookiesRecv{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "SYN Cookies Validated", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_TcpExt_SyncookiesSent{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "SYN Cookies Sent", "range": true, "refId": "C", "step": 240 } ], "title": "TCP SynCookie", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of currently established TCP connections and the system's max supported limit. On Linux, MaxConn may return -1 to indicate a dynamic/unlimited configuration", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Max*./" }, "properties": [ { "id": "color", "value": { "fixedColor": "#890F02", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } } ] } ] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 223 }, "id": 85, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Current Connections", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Max Connections", "range": true, "refId": "B", "step": 240 } ], "title": "TCP Connections", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of UDP packets currently queued in the receive (RX) and transmit (TX) buffers. A growing queue may indicate a bottleneck", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 223 }, "id": 337, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_udp_queues{instance=\"$node\",job=\"$job\",ip=\"v4\",queue=\"rx\"}", "format": "time_series", "interval": "", "legendFormat": "UDP Rx in Queue", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_udp_queues{instance=\"$node\",job=\"$job\",ip=\"v4\",queue=\"tx\"}", "format": "time_series", "interval": "", "legendFormat": "UDP Tx out Queue", "range": true, "refId": "B", "step": 240 } ], "title": "UDP Queue", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of TCP connection initiations per second. 'Active' opens are initiated by this host. 'Passive' opens are accepted from incoming connections", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "eps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 233 }, "id": 82, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "Active Opens", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "Passive Opens", "range": true, "refId": "B", "step": 240 } ], "title": "TCP Direct Transition", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of TCP sockets in key connection states. Requires the --collector.tcpstat flag on node_exporter", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "noValue": "0", "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 233 }, "id": 320, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_tcp_connection_states{state=\"established\",instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Established", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "node_tcp_connection_states{state=\"fin_wait2\",instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "FIN_WAIT2", "range": true, "refId": "B", "step": 240 }, { "editorMode": "code", "expr": "node_tcp_connection_states{state=\"listen\",instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "Listen", "range": true, "refId": "C", "step": 240 }, { "editorMode": "code", "expr": "node_tcp_connection_states{state=\"time_wait\",instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "TIME_WAIT", "range": true, "refId": "D", "step": 240 }, { "editorMode": "code", "expr": "node_tcp_connection_states{state=\"close_wait\", instance=\"$node\", job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "CLOSE_WAIT", "range": true, "refId": "E", "step": 240 } ], "title": "TCP Stat", "type": "timeseries" } ], "title": "Network Netstat", "type": "row" }, { "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 33 }, "id": 279, "panels": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Duration of each individual collector executed during a Node Exporter scrape. Useful for identifying slow or failing collectors", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 0, "y": 164 }, "id": 40, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{collector}}", "range": true, "refId": "A", "step": 240 } ], "title": "Node Exporter Scrape Time", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Rate of CPU time used by the process exposing this metric (user + system mode)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 10, "w": 12, "x": 12, "y": 164 }, "id": 308, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "irate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", "format": "time_series", "interval": "", "legendFormat": "Process CPU Usage", "range": true, "refId": "A", "step": 240 } ], "title": "Exporter Process CPU Usage", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Tracks the memory usage of the process exposing this metric (e.g., node_exporter), including current virtual memory and maximum virtual memory limit", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "bytes" }, "overrides": [ { "matcher": { "id": "byName", "options": "Virtual Memory Limit" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } }, { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } } ] }, { "__systemRef": "hideSeriesFrom", "matcher": { "id": "byNames", "options": { "mode": "exclude", "names": [ "Virtual Memory" ], "prefix": "All except:", "readOnly": true } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": false, "tooltip": false, "viz": true } } ] } ] }, "gridPos": { "h": 10, "w": 10, "x": 0, "y": 174 }, "id": 149, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}", "interval": "", "legendFormat": "Virtual Memory", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "process_virtual_memory_max_bytes{instance=\"$node\",job=\"$job\"}", "interval": "", "legendFormat": "Virtual Memory Limit", "range": true, "refId": "B", "step": 240 } ], "title": "Exporter Processes Memory", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Number of file descriptors used by the exporter process versus its configured limit", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Max*./" }, "properties": [ { "id": "color", "value": { "fixedColor": "#890F02", "mode": "fixed" } }, { "id": "custom.fillOpacity", "value": 0 }, { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } } ] }, { "__systemRef": "hideSeriesFrom", "matcher": { "id": "byNames", "options": { "mode": "exclude", "names": [ "Open file descriptors" ], "prefix": "All except:", "readOnly": true } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": false, "tooltip": false, "viz": true } } ] } ] }, "gridPos": { "h": 10, "w": 10, "x": 10, "y": 174 }, "id": 64, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}", "interval": "", "legendFormat": "Maximum open file descriptors", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}", "interval": "", "legendFormat": "Open file descriptors", "range": true, "refId": "B", "step": 240 } ], "title": "Exporter File Descriptor Usage", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "description": "Shows whether each Node Exporter collector scraped successfully (1 = success, 0 = failure), and whether the textfile collector returned an error.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "dark-red", "value": 0 }, { "color": "green", "value": 1 } ] }, "unit": "bool" }, "overrides": [] }, "gridPos": { "h": 10, "w": 4, "x": 20, "y": 174 }, "id": 157, "options": { "displayMode": "basic", "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": false }, "maxVizHeight": 300, "minVizHeight": 16, "minVizWidth": 8, "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showUnfilled": true, "sizing": "auto", "valueMode": "color" }, "pluginVersion": "11.6.1", "targets": [ { "editorMode": "code", "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "{{collector}}", "range": true, "refId": "A", "step": 240 }, { "editorMode": "code", "expr": "1 - node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}", "format": "time_series", "interval": "", "legendFormat": "textfile", "range": true, "refId": "B", "step": 240 } ], "title": "Node Exporter Scrape", "type": "bargauge" } ], "title": "Node Exporter", "type": "row" } ], "refresh": "1m", "schemaVersion": 41, "tags": [ "linux" ], "templating": { "list": [ { "current": {}, "hide": 2, "includeAll": false, "label": "Datasource", "name": "ds_prometheus", "options": [], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "current": {}, "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "definition": "", "hide": 2, "includeAll": false, "label": "Job", "name": "job", "options": [], "query": { "query": "label_values(node_uname_info, job)", "refId": "Prometheus-job-Variable-Query" }, "refresh": 1, "regex": "", "sort": 1, "type": "query" }, { "current": {}, "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "definition": "label_values(node_uname_info{job=\"$job\"}, nodename)", "includeAll": false, "label": "Server", "name": "nodename", "options": [], "query": { "query": "label_values(node_uname_info{job=\"$job\"}, nodename)", "refId": "Prometheus-nodename-Variable-Query" }, "refresh": 1, "regex": "", "sort": 1, "type": "query" }, { "current": {}, "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, "definition": "label_values(node_uname_info{job=\"$job\", nodename=\"$nodename\"}, instance)", "hide": 2, "includeAll": false, "label": "Instance", "name": "node", "options": [], "query": { "query": "label_values(node_uname_info{job=\"$job\", nodename=\"$nodename\"}, instance)", "refId": "Prometheus-node-Variable-Query" }, "refresh": 1, "regex": "", "sort": 1, "type": "query" } ] }, "time": { "from": "now-3h", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "Node Overview", "uid": "rYdddlPWk", "version": 98, "weekStart": "", "gnetId": 1860 } ================================================ FILE: grafana/node_overview.yml ================================================ --- # config file version apiVersion: 1 providers: - name: 'node_overview' orgId: 1 folder: '' type: file options: path: /usr/share/elephant-shed/grafana/node_overview.json datasource: prometheus ================================================ FILE: grafana/postgresql_server_overview.json ================================================ { "__inputs": [ { "name": "DS_PROMETHEUS", "label": "DS_PROMETHEUS", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "5.2.3" }, { "type": "panel", "id": "graph", "name": "Graph", "version": "5.0.0" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "5.0.0" }, { "type": "panel", "id": "singlestat", "name": "Singlestat", "version": "5.0.0" }, { "type": "panel", "id": "table", "name": "Table", "version": "5.0.0" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "Overview over the most needed and many helpful metrics to manage and debug PostgreSQL servers.", "editable": true, "gnetId": null, "graphTooltip": 0, "id": null, "iteration": 1536240081130, "links": [], "panels": [ { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 100, "panels": [], "title": "Settings", "type": "row" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 3, "w": 2, "x": 0, "y": 1 }, "id": 119, "interval": null, "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "mean" ], "fields": "", "values": false }, "text": {}, "textMode": "name" }, "pluginVersion": "7.4.3", "targets": [ { "exemplar": false, "expr": "sql_server{host=~'$host',instance=~'$instance',sql_job=~'$cluster',col=\"server_start_time\"}", "instant": false, "interval": "", "legendFormat": "{{server_version}}", "refId": "A" } ], "title": "Version", "transformations": [], "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 3, "w": 2, "x": 2, "y": 1 }, "id": 115, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "count(count(node_cpu_seconds_total{instance=~'$instance'}) without (mode,job)) without (cpu)", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "# Cores", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 3, "w": 2, "x": 4, "y": 1 }, "id": 112, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "node_memory_MemTotal_bytes{instance=~'$instance'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Total Memory", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 6, "y": 1 }, "id": 110, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"max_connections\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Max Connections", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 9, "y": 1 }, "id": 104, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"work_mem\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Work Mem", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 12, "y": 1 }, "id": 105, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"shared_buffers\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Shared Buffers", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 15, "y": 1 }, "id": 109, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"max_wal_size\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Max WAL Size", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [ { "from": "", "id": 1, "text": "Off", "to": "", "type": 1, "value": "0" }, { "from": "", "id": 2, "text": "On", "to": "", "type": 1, "value": "1" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] } }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 18, "y": 1 }, "id": 102, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"data_checksums\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Data Checksums", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "noValue": "N/A", "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 21, "y": 1 }, "id": 114, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sum by (checksum_failures) (sql_pg_stat_database{col=\"checksum_failures\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'})", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Checksum Failures", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [ { "options": { "0": { "index": 1, "text": "Standby" }, "1": { "color": "dark-green", "index": 0, "text": "Primary" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": 0 } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 3, "w": 2, "x": 0, "y": 4 }, "id": 126, "maxDataPoints": 100, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showPercentChange": false, "text": {}, "textMode": "value", "wideLayout": true }, "pluginVersion": "12.3.1", "targets": [ { "datasource": { "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "exemplar": false, "expr": "sql_server{col='server_is_primary',host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "format": "time_series", "instant": true, "interval": "", "legendFormat": "", "range": false, "refId": "A" } ], "title": "Role", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "dateTimeFromNow" }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 2, "y": 4 }, "id": 106, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_server{col=\"server_start_time\", host=~'$host',instance=~'$instance',sql_job=~'$cluster'} * 1000", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Server Start Time", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "description": "", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 6, "y": 4 }, "id": 120, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"max_worker_processes\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Max Worker Processes", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 9, "y": 4 }, "id": 107, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"maintenance_work_mem\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Maintenance Work Mem", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 12, "y": 4 }, "id": 108, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"effective_cache_size\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Effective Cache Size", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 15, "y": 4 }, "id": 111, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"checkpoint_timeout\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Checkpoint Timeout", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [ { "from": "", "id": 1, "text": "Off", "to": "", "type": 1, "value": "0" }, { "from": "", "id": 2, "text": "On", "to": "", "type": 1, "value": "1" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] } }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 18, "y": 4 }, "id": 103, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "sql_settings{col=\"jit\",host=~'$host',instance=~'$instance',sql_job=~'$cluster'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "JIT", "type": "stat" }, { "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": {}, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgb(255, 255, 255)", "value": null } ] }, "unit": "none" }, "overrides": [ { "matcher": { "id": "byName", "options": "requested (explicit, wal or backup-based)" }, "properties": [ { "id": "displayName" } ] } ] }, "gridPos": { "h": 3, "w": 3, "x": 21, "y": 4 }, "id": 113, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "7.4.3", "targets": [ { "expr": "node_vmstat_oom_kill{instance=~'$instance'}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Out-of-Memory Kills", "type": "stat" }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 }, "id": 69, "panels": [], "repeat": null, "title": "Summary for $instance", "type": "row" }, { "id": 124, "type": "timeseries", "title": "System Stats", "gridPos": { "x": 0, "y": 8, "h": 7, "w": 15 }, "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", "lineInterpolation": "linear", "barAlignment": 0, "barWidthFactor": 0.6, "lineWidth": 1, "fillOpacity": 10, "gradientMode": "none", "spanNulls": false, "insertNulls": false, "showPoints": "never", "pointSize": 5, "stacking": { "mode": "none", "group": "A" }, "axisPlacement": "auto", "axisLabel": "", "axisColorMode": "text", "axisBorderShow": false, "scaleDistribution": { "type": "linear" }, "axisCenteredZero": false, "hideFrom": { "tooltip": false, "viz": false, "legend": false }, "thresholdsStyle": { "mode": "off" }, "lineStyle": { "fill": "solid" } }, "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "min": 0, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byValue", "options": { "op": "gte", "reducer": "allIsZero", "value": 0 } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": true, "tooltip": true, "viz": false } } ] }, { "matcher": { "id": "byValue", "options": { "op": "gte", "reducer": "allIsNull", "value": 0 } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": true, "tooltip": true, "viz": false } } ] }, { "matcher": { "id": "byValue", "options": { "op": "gte", "reducer": "allIsZero", "value": 0 } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": true, "tooltip": true, "viz": false } } ] }, { "matcher": { "id": "byValue", "options": { "op": "gte", "reducer": "allIsNull", "value": 0 } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": true, "tooltip": true, "viz": false } } ] }, { "matcher": { "id": "byValue", "options": { "op": "gte", "reducer": "allIsZero", "value": 0 } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": true, "tooltip": true, "viz": false } } ] }, { "matcher": { "id": "byValue", "options": { "op": "gte", "reducer": "allIsNull", "value": 0 } }, "properties": [ { "id": "custom.hideFrom", "value": { "legend": true, "tooltip": true, "viz": false } } ] } ] }, "pluginVersion": "12.0.2", "targets": [ { "datasource": { "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "1 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\", instance=~'$instance'}[$rate_interval])))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "CPU", "metric": "node_cpu", "range": true, "refId": "A", "step": 40 }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "clamp_min((1 - (node_memory_MemAvailable_bytes{instance='$instance'} / node_memory_MemTotal_bytes{instance='$instance'})), 0)", "hide": false, "instant": false, "legendFormat": "Memory", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "1 - min(node_filesystem_free_bytes{instance=~'$instance', mountpoint=~'$filesystem'} / node_filesystem_size_bytes{instance=~'$instance', mountpoint=~'$filesystem'})", "hide": false, "instant": false, "legendFormat": "Storage", "range": true, "refId": "C" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "max(irate(node_disk_io_time_seconds_total{instance='$instance',device=~'$disk'}[$rate_interval]))", "hide": false, "instant": false, "legendFormat": "I/O", "range": true, "refId": "D" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "node_memory_SwapCached_bytes{instance=~'$instance'} / (node_memory_SwapCached_bytes{instance=~'$instance'} + node_memory_SwapFree_bytes{instance=~'$instance'})", "hide": false, "instant": false, "legendFormat": "Swap", "range": true, "refId": "E" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "sum(avg_over_time(sql_pg_stat_activity{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col=\"count\"}[$rate_interval])) / (sum(sql_settings{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='max_connections'}) - sum(sql_settings{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='superuser_reserved_connections'}))", "hide": false, "instant": false, "legendFormat": "Connections", "range": true, "refId": "F" } ], "datasource": { "uid": "${DS_PROMETHEUS}" }, "options": { "tooltip": { "mode": "multi", "sort": "none", "hideZeros": false }, "legend": { "showLegend": true, "displayMode": "table", "placement": "right", "calcs": [ "min", "mean", "max" ] }, "alertThreshold": true } }, { "datasource": { "uid": "${DS_PROMETHEUS}" }, "description": "How often tasks experience CPU, memory, or I/O delays. “Some” indicates partial slowdown; “Full” indicates all tasks are stalled. Based on Linux PSI metrics:\nhttps://docs.kernel.org/accounting/psi.html", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "some (-) / full (+)", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 } ] }, "unit": "percentunit" }, "overrides": [ { "matcher": { "id": "byRegexp", "options": "/.*Some.*/" }, "properties": [ { "id": "custom.fillOpacity", "value": 0 } ] }, { "matcher": { "id": "byRegexp", "options": "/.*Some.*/" }, "properties": [ { "id": "custom.transform", "value": "negative-Y" } ] } ] }, "gridPos": { "h": 7, "w": 9, "x": 15, "y": 8 }, "id": 125, "options": { "legend": { "calcs": [ "min", "mean", "max" ], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "pluginVersion": "12.2.1", "targets": [ { "editorMode": "code", "expr": "rate(node_pressure_cpu_waiting_seconds_total{instance='$instance'}[$rate_interval])", "format": "time_series", "legendFormat": "CPU - Some", "range": true, "refId": "CPU some", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_memory_waiting_seconds_total{instance='$instance'}[$rate_interval])", "format": "time_series", "legendFormat": "Memory - Some", "range": true, "refId": "Memory some", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_memory_stalled_seconds_total{instance='$instance'}[$rate_interval])", "format": "time_series", "legendFormat": "Memory - Full", "range": true, "refId": "Memory full", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_io_waiting_seconds_total{instance='$instance'}[$rate_interval])", "format": "time_series", "legendFormat": "I/O - Some", "range": true, "refId": "I/O some", "step": 240 }, { "editorMode": "code", "expr": "rate(node_pressure_io_stalled_seconds_total{instance='$instance'}[$rate_interval])", "format": "time_series", "legendFormat": "I/O - Full", "range": true, "refId": "I/O full", "step": 240 } ], "title": "Pressure Stall Information", "type": "timeseries" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "${DS_PROMETHEUS}", "decimals": 0, "description": "Number of WAL files that still need to be archived.\n\nCan grow under write load but should not increase over longer periods of time.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "format": "none", "gauge": { "maxValue": 3000, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 6, "w": 4, "x": 0, "y": 14 }, "id": 45, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.02)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sql_archive_ready{host=~'$host', instance=~'$instance', sql_job=~'$cluster'}", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 600 } ], "thresholds": "1000,2000", "title": "WAL Files Ready", "transparent": true, "type": "singlestat", "valueFontSize": "70%", "valueMaps": [ { "op": "=", "text": "0", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "${DS_PROMETHEUS}", "decimals": 0, "description": "The amount of WAL data that still needs to be archived.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "format": "MBs", "gauge": { "maxValue": 15, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 6, "w": 4, "x": 4, "y": 14 }, "id": 50, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "delta(sql_archive_ready{host=~'$host', instance=~'$instance', sql_job=~'$cluster'}[1h]) * 16 / 3600", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 600 } ], "thresholds": "5,10", "title": "WAL Ready Growth [1h]", "transparent": true, "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "${DS_PROMETHEUS}", "decimals": 2, "description": "Shows the rate/increase in failed archive actions.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "format": "none", "gauge": { "maxValue": 3, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 6, "w": 4, "x": 8, "y": 14 }, "id": 46, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "increase(sql_pg_stat_archiver{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='failed_count'}[$rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 600 } ], "thresholds": "0.9,2.1", "title": "WAL Archive Fails [$rate_interval]", "transparent": true, "type": "singlestat", "valueFontSize": "70%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "${DS_PROMETHEUS}", "decimals": null, "description": "Active WAL senders in relation to max_wal_senders.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "format": "percentunit", "gauge": { "maxValue": 1, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 6, "w": 4, "x": 12, "y": 14 }, "id": 43, "interval": null, "links": [], "mappingType": 2, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "No WS", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "count(sql_pg_stat_replication{host=~'$host', instance=~'$instance', sql_job=~'$cluster', pid!='0', col='flush_lag_bytes'})/ scalar(sql_settings{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='max_wal_senders'})", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 600 } ], "thresholds": "0.7,0.85", "title": "WAL Senders", "transparent": true, "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "", "value": "" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "${DS_PROMETHEUS}", "description": "Shows the used prepared transactions.\n\n.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "format": "none", "gauge": { "maxValue": 1, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 6, "w": 4, "x": 16, "y": 14 }, "id": 49, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum(sql_prepared_transactions{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='count', datname=~'$datname'})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 600 } ], "thresholds": "0.8,0.9", "title": "Prepared Transactions", "transparent": true, "type": "singlestat", "valueFontSize": "70%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": true, "colors": [ "rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)" ], "datasource": "${DS_PROMETHEUS}", "decimals": 1, "description": "Shows the maximum of send_lag_bytes, flush_lag_bytes, replay_lag_bytes over all connected streaming receivers.\nThis is not a sufficient way to monitor if a standby is up to date!\nCan only give a hint if streaming is delayed.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "format": "decbytes", "gauge": { "maxValue": 1000000000, "minValue": 0, "show": true, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 6, "w": 4, "x": 20, "y": 14 }, "id": 64, "interval": null, "links": [], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "max(sql_pg_stat_replication{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col=~'(send_lag_bytes|flush_lag_bytes|replay_lag_bytes)'})", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "", "metric": "node_filesystem_free", "refId": "A", "step": 300 } ], "thresholds": "500000000,750000000", "title": "Max Replication Lag", "transparent": true, "type": "singlestat", "valueFontSize": "50%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 62 }, "id": 76, "panels": [], "repeat": null, "title": "PostgreSQL Database Size", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 63 }, "hiddenSeries": false, "id": 67, "legend": { "avg": false, "current": false, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', datname=~'$datname', col='dbsize'}[$rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{datname}}", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Database Growth [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": "", "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 63 }, "hiddenSeries": false, "id": 68, "legend": { "avg": true, "current": false, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', datname=~'$datname', col='dbsize'}", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{datname}}", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Database Size - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": "", "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 70 }, "id": 75, "panels": [], "repeat": null, "title": "PostgreSQL Connections", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 71 }, "hiddenSeries": false, "id": 6, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": false, "show": true, "sort": "avg", "sortDesc": false, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "$$hashKey": "object:5500", "alias": "waiting", "color": "#F2495C" }, { "$$hashKey": "object:5508", "alias": "idle in transaction", "color": "#FF9830" }, { "$$hashKey": "object:5516", "alias": "active", "color": "#73BF69" }, { "$$hashKey": "object:5530", "alias": "idle", "color": "#5794F2" }, { "$$hashKey": "object:743", "alias": "idle in transaction (aborted)", "color": "#FADE2A" } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sort_desc(sum by (state) (sql_pg_stat_activity{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='count', datname=~'$datname'}))", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 2, "legendFormat": "{{state}}", "metric": "sql_pg_stat_activity", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Connections by State - $datname", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "transformations": [], "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:5021", "decimals": 0, "format": "short", "label": "", "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:5022", "decimals": 3, "format": "short", "label": "", "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 71 }, "hiddenSeries": false, "id": 9, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "$$hashKey": "object:1166" } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum by (datname) (sql_pg_stat_activity{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='count', datname=~'$datname'})", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{datname}}", "metric": "sql_pg_stat_activity", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Connections by Database - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:715", "decimals": 0, "format": "short", "label": "", "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:716", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 80 }, "id": 93, "panels": [], "title": "PostgreSQL Transactions and Locks", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 7, "x": 0, "y": 81 }, "hiddenSeries": false, "id": 10, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum by (datname) (rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='xact_commit', datname=~'$datname'}[$rate_interval]))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "commits {{datname}}", "metric": "sql_pg_stat_database", "refId": "A", "step": 60 }, { "expr": "sum by (datname) (rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='xact_rollback', datname=~'$datname'}[$rate_interval]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "rollbacks {{datname}}", "refId": "B", "step": 60 }, { "expr": "rate(sql_txid{host=~'$host', instance=~'$instance', sql_job=~'$cluster'}[$rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "TXIDs (global)", "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Transactions by database [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1487", "format": "ops", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:1488", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 5, "x": 7, "y": 81 }, "hiddenSeries": false, "id": 91, "legend": { "avg": false, "current": false, "max": true, "min": false, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "max by (datname) (sql_pg_stat_activity{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='max_tx_duration', datname=~'$datname'})", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "{{datname}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Maximum Transaction Age", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1565", "format": "s", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:1566", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": { "AccessExclusiveLock": "#99440A", "ExclusiveLock": "#BF1B00" }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 12, "y": 81 }, "hiddenSeries": false, "id": 42, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "$$hashKey": "object:1299", "alias": "ExclusiveLock", "color": "#BF1B00", "fill": 6, "linewidth": 4 }, { "$$hashKey": "object:1300", "alias": "AccessExclusiveLock", "color": "#58140C" } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum (sql_pg_locks{host=~'$host', instance=~'$instance', sql_job=~'$cluster', datname=~'$datname'}) by (mode)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ mode }}", "refId": "A", "step": 20 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Locks - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1317", "decimals": 0, "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:1318", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "description": "", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 18, "y": 81 }, "hiddenSeries": false, "id": 121, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "$$hashKey": "object:1071", "alias": "/.*/", "color": "#FADE2A" } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='deadlocks', datname=~'$datname'}[90s]) * 60", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "deadlocks {{datname}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Deadlocks -$datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1085", "decimals": 0, "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:1086", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 88 }, "id": 78, "panels": [], "repeat": null, "title": "PostgreSQL Tuple Activity", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 14, "x": 0, "y": 89 }, "hiddenSeries": false, "id": 24, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', datname=~'$datname', col='tup_returned'}[$rate_interval])", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 2, "legendFormat": "Select (table scan) {{datname}}", "refId": "A", "step": 120 }, { "expr": "rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', datname=~'$datname', col='tup_fetched'}[$rate_interval])", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 2, "legendFormat": "Select (index scan) {{datname}}", "refId": "B", "step": 120 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Read Stats [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:3090", "format": "short", "label": "Rows", "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:3091", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 5, "fillGradient": 0, "gridPos": { "h": 7, "w": 10, "x": 14, "y": 89 }, "hiddenSeries": false, "id": 16, "legend": { "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "topk($ntop_relations, sql_pg_stat_user_tables{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', col='n_live_tup', datname=~'$datname'})", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "live {{schemaname}}.{{relname}} ({{database}})", "metric": "sql_pg_stat_user_tables", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Live Tuples (top $ntop_relations) - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": "", "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 14, "x": 0, "y": 96 }, "hiddenSeries": false, "id": 122, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', datname=~'$datname', col='tup_inserted'}[$rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "Insert {{datname}}", "refId": "A", "step": 120 }, { "expr": "rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', datname=~'$datname', col='tup_updated'}[$rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "Update {{datname}}", "refId": "B", "step": 120 }, { "expr": "rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', datname=~'$datname', col='tup_deleted'}[$rate_interval])", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "Delete {{datname}}", "refId": "C", "step": 120 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Change Stats [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:3016", "format": "short", "label": "Rows", "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:3017", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 5, "fillGradient": 0, "gridPos": { "h": 7, "w": 5, "x": 14, "y": 96 }, "hiddenSeries": false, "id": 12, "legend": { "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "topk($ntop_relations, sql_pg_stat_user_tables{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', col='n_dead_tup', database=~'$datname'})", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "dead {{schemaname}}.{{relname}} ({{database}})", "metric": "", "refId": "B", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Dead Tuples (top $ntop_relations) - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 5, "x": 19, "y": 96 }, "hiddenSeries": false, "id": 123, "legend": { "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='temp_bytes', datname=~'$datname'}[$rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "temp bytes {{datname}}", "metric": "", "refId": "B", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Temp Files [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:3200", "format": "binBps", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:3201", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 103 }, "id": 77, "panels": [], "repeat": null, "title": "PostgreSQL Table/Index Scans and Buffers", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 5, "fillGradient": 0, "gridPos": { "h": 8, "w": 9, "x": 0, "y": 104 }, "hiddenSeries": false, "id": 5, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": false, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "topk($ntop_relations, rate(sql_pg_stat_user_tables{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', col='seq_tup_read', database=~'$datname'}[$rate_interval]))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "seq scan tuples {{schemaname}}.{{relname}} ({{database}})", "metric": "", "refId": "B", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Sequential Scan Tuples [$rate_interval] (top $ntop_relations) - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 5, "fillGradient": 0, "gridPos": { "h": 8, "w": 9, "x": 9, "y": 104 }, "hiddenSeries": false, "id": 55, "legend": { "alignAsTable": true, "avg": true, "current": true, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": false, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "topk($ntop_relations, rate(sql_pg_stat_user_tables{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', col='idx_tup_fetch', database=~'$datname'}[$rate_interval]))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "index scan tuples {{schemaname}}.{{relname}} ({{database}})", "metric": "sql_pg_stat_user_tables", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Index Scan Tuples [$rate_interval] (top $ntop_relations) - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 6, "x": 18, "y": 104 }, "hiddenSeries": false, "id": 27, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(increase(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', datname=~'$datname', col='blks_hit'}[$rate_interval])) / (sum(increase(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', datname=~'$datname', col='blks_read'}[$rate_interval])) + sum(increase(sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', datname=~'$datname', col='blks_hit'}[$rate_interval])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "hit rate", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Buffer Hit Rate [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", "label": null, "logBase": 1, "max": 1, "min": "null", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 112 }, "id": 126, "panels": [], "title": "PostgreSQL I/O", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineStyle": { "fill": "solid" }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 80 } ] }, "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 113 }, "id": 127, "options": { "legend": { "calcs": [ "mean" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "12.3.0", "targets": [ { "editorMode": "code", "expr": "sum(rate(sql_pg_stat_io{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', col='reads'}[$rate_interval])) by (backend_type)", "legendFormat": "{{backend_type}}", "range": true, "refId": "A" } ], "title": "Read IOPS by backend type", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "showValues": false, "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 80 } ] }, "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 113 }, "id": 128, "options": { "legend": { "calcs": [ "mean" ], "displayMode": "table", "placement": "bottom", "showLegend": true, "sortBy": "Mean", "sortDesc": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "pluginVersion": "12.3.0", "targets": [ { "editorMode": "code", "expr": "sum(rate(sql_pg_stat_io{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', col='writes'}[$rate_interval])) by (backend_type)", "legendFormat": "{{backend_type}}", "range": true, "refId": "A" } ], "title": "Write IOPS by backend type", "type": "timeseries" }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 120 }, "id": 80, "panels": [], "repeat": null, "title": "PostgreSQL Transaction Log and Checkpoints", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 7, "x": 0, "y": 121 }, "hiddenSeries": false, "id": 26, "legend": { "alignAsTable": false, "avg": true, "current": true, "hideEmpty": false, "hideZero": false, "max": true, "min": true, "rightSide": false, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(sql_waldistance{host=~'$host', instance=~'$instance', sql_job=~'$cluster'}[$rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "WAL traffic", "metric": "sql_waldistance", "refId": "A", "step": 60 }, { "expr": "rate(sql_checkpoints{col=\"buffers\", host=~'$host', instance=~'$instance', sql_job=~'$cluster'}[$rate_interval]) * 8192", "hide": false, "interval": "", "intervalFactor": 2, "legendFormat": "Checkpoint traffic", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "WAL/Checkpoint Traffic [$rate_interval]", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "Bps", "label": "", "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "decbytes", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 9, "x": 7, "y": 121 }, "hiddenSeries": false, "id": 29, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "$$hashKey": "object:795", "alias": "requested (explicit, wal or backup-based)", "color": "#FADE2A" }, { "$$hashKey": "object:803", "alias": "timed", "color": "#73BF69" } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "floor(increase(sql_checkpoints{host=~'$host', instance=~'$instance', sql_job=~'$cluster'}[55s]))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{col}}", "metric": "sql_checkpoints", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Checkpoints", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "transformations": [ { "id": "renameByRegex", "options": { "regex": "requested", "renamePattern": "requested (explicit, wal or backup-based)" } }, { "id": "filterFieldsByName", "options": { "include": { "names": [ "Time", "requested (explicit, wal or backup-based)", "timed" ] } } } ], "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:673", "decimals": 0, "format": "opm", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": { "distance": "#CCA300" }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 16, "y": 121 }, "hiddenSeries": false, "id": 31, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sql_LastCheckpointDistance{host=~'$host', instance=~'$instance', sql_job=~'$cluster'}", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{col}}", "metric": "sql_LastCheckpointDistance", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "WAL since last Checkpoint", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 128 }, "id": 81, "panels": [], "repeat": null, "title": "PostgreSQL Vacuum and Analyze", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 10, "x": 0, "y": 129 }, "hiddenSeries": false, "id": 30, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "increase(sql_Maintenancecounters{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', datname=~'$datname'}[$rate_interval])", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{col}} ({{database}})", "metric": "sql_Maintenancecounters", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Maintenance Operations per Minute [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "opm", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 9, "x": 10, "y": 129 }, "hiddenSeries": false, "id": 53, "legend": { "avg": false, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sql_Maintenancecounters{host=~'$host', instance=~'$instance', sql_job=~'$cluster.*', database=~'$datname'}", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{col}} ({{database}})", "metric": "sql_Maintenancecounters", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Maintenance Counters - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "description": "Age of oldest XID in database\n\nWhen reaching autovacuum_max_freeze_age (default: 200 M), tables will be frozen by autovacuum.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 5, "x": 19, "y": 129 }, "hiddenSeries": false, "id": 54, "legend": { "alignAsTable": false, "avg": false, "current": true, "max": false, "min": false, "rightSide": false, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sql_pg_stat_database{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='freeze_age', datname=~'$datname'}", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{ datname }}", "metric": "sql_freeze_age", "refId": "A", "step": 120 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Database Freeze Age - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 136 }, "id": 85, "panels": [], "repeat": null, "title": "PostgreSQL Archiving and Replication", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "description": "Number of WAL files that still need to be archived.\n\nCan grow under write load but should not increase over longer periods of time.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 7, "x": 0, "y": 137 }, "hiddenSeries": false, "id": 51, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sql_archive_ready{host=~'$host', instance=~'$instance', sql_job=~'$cluster'}", "format": "time_series", "intervalFactor": 2, "legendFormat": "WAL Files Ready", "refId": "A", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "WAL Files Ready", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": { "failed_count": "#BF1B00" }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "description": "Successful and failed archive commands per minute.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 7, "x": 7, "y": 137 }, "hiddenSeries": false, "id": 47, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "increase(sql_pg_stat_archiver{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col=~'archived_count|failed_count'}[$rate_interval])", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 2, "legendFormat": "{{col}}", "metric": "", "refId": "B", "step": 60 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "WAL Archiving per Minute [$rate_interval]", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "opm", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": { "flush_lag_bytes": "#E5AC0E", "replay_lag_bytes": "#629E51", "send_lag_bytes": "#BF1B00" }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "description": "The amount of data a stream receiver like a standby server lags behind the master.\n\nsend_lag: Data that still needs to be send (should be small).\n\nflush_lag: Data that was received but must be flushed to persistent storage (this data can be lost if the standby is terminated).\n\nreplay_lag: Data that still needs to be replayed by PostgreSQL on the standby (can lag behind during writes in asynchronous configurations).", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 10, "x": 14, "y": 137 }, "hiddenSeries": false, "id": 61, "legend": { "alignAsTable": false, "avg": false, "current": false, "hideEmpty": true, "hideZero": false, "max": false, "min": false, "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sql_pg_stat_replication{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='send_lag_bytes', application_name != ''}", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "{{application_name}} (send)", "refId": "A", "step": 40 }, { "expr": "sql_pg_stat_replication{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='flush_lag_bytes', application_name != ''}", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "{{application_name}} (flush)", "refId": "B" }, { "expr": "sql_pg_stat_replication{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='replay_lag_bytes', application_name != ''}", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "{{application_name}} (replay)", "refId": "C" }, { "expr": "sql_pg_stat_replication{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col=~'replay_lag_bytes|flush_lag_bytes|send_lag_bytes', application_name=''}>0", "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "MISSING application_name ({{col}})", "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Replication Lag", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "${DS_PROMETHEUS}", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 143 }, "id": 83, "panels": [], "repeat": null, "title": "PostgreSQL Statements", "type": "row" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "description": "Statement average execution time", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 11, "w": 8, "x": 0, "y": 143 }, "hiddenSeries": false, "id": 57, "legend": { "alignAsTable": true, "avg": true, "current": false, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": false, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "topk($ntop_relations, rate(sql_pg_stat_statements{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='total_time', datname=~'$datname'}[$rate_interval]) / ignoring(col) rate(sql_pg_stat_statements{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='calls', datname=~'$datname'}[$rate_interval]))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "({{datname}}) {{query}}", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Top $ntop_relations Statements by Average Execution Time [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ms", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "description": "", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 11, "w": 8, "x": 8, "y": 144 }, "hiddenSeries": false, "id": 98, "legend": { "alignAsTable": true, "avg": true, "current": false, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": false, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "topk($ntop_relations, sum by (query, datname) (rate(sql_pg_stat_statements{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col=~'shared_blks_.*', datname=~'$datname'}[$rate_interval])*8))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "({{datname}}) {{query}}", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Top $ntop_relations Statements by Buffers [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "KBs", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_PROMETHEUS}", "description": "Statements called per second", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 11, "w": 8, "x": 16, "y": 144 }, "hiddenSeries": false, "id": 59, "legend": { "alignAsTable": true, "avg": true, "current": false, "hideEmpty": true, "hideZero": true, "max": true, "min": false, "rightSide": false, "show": true, "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.4.3", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "topk($ntop_relations, rate(sql_pg_stat_statements{host=~'$host', instance=~'$instance', sql_job=~'$cluster', col='calls', datname=~'$datname'}[$rate_interval]))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "({{datname}}) {{query}}", "refId": "A", "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Top $ntop_relations Statements by Calls [$rate_interval] - $datname", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "ops", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "columns": [], "datasource": "${DS_PROMETHEUS}", "description": "Shows how long statements took in the chosen time period.", "fieldConfig": { "defaults": { "custom": { "cellOptions": { "type": "auto" }, "inspect": false }, "decimals": 2, "displayName": "", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "Time" }, "properties": [ { "id": "displayName", "value": "Time" }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "__name__" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "col" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "database" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "driver" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "host" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "job" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "instance" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "queryid" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "sql_job" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "user" }, "properties": [ { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.hidden", "value": true }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "Value" }, "properties": [ { "id": "displayName", "value": "Total Time" }, { "id": "unit", "value": "ms" }, { "id": "decimals", "value": 3 }, { "id": "custom.align" }, { "id": "custom.width", "value": 100 } ] }, { "matcher": { "id": "byName", "options": "query" }, "properties": [ { "id": "displayName", "value": "Query" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" } ] }, { "matcher": { "id": "byName", "options": "datname" }, "properties": [ { "id": "displayName", "value": "Database" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" }, { "id": "custom.width", "value": 125 } ] }, { "matcher": { "id": "byName", "options": "usename" }, "properties": [ { "id": "displayName", "value": "User" }, { "id": "unit", "value": "short" }, { "id": "decimals", "value": 2 }, { "id": "custom.align" }, { "id": "custom.width", "value": 125 } ] } ] }, "gridPos": { "h": 12, "w": 24, "x": 0, "y": 155 }, "id": 58, "options": { "cellHeight": "sm", "footer": { "countRows": false, "fields": "", "reducer": [ "sum" ], "show": false }, "showHeader": true }, "pluginVersion": "11.3.1", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "topk($ntop_relations, increase(sql_pg_stat_statements{host=~'$host', instance=~'$instance', sql_job=~'$cluster', host=~'$host', col='total_time', datname=~'$datname'}[$__range]))", "format": "table", "instant": true, "interval": "", "intervalFactor": 2, "legendFormat": "", "refId": "A", "step": 2 } ], "title": "Top $ntop_relations Statements by Total Time - $datname", "transformations": [ { "id": "organize", "options": { "excludeByName": {}, "indexByName": { "Time": 0, "Value": 10, "col": 1, "database": 2, "datname": 3, "driver": 6, "host": 7, "instance": 8, "job": 9, "query": 11, "queryid": 12, "sql_job": 13, "usename": 4, "user": 5 }, "renameByName": {} } } ], "type": "table" } ], "refresh": "1m", "schemaVersion": 27, "style": "dark", "tags": [ "prometheus", "system", "postgresql", "database" ], "templating": { "list": [ { "hide": 2, "label": "datasource", "name": "DS_PROMETHEUS", "options": [], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": null, "current": { "selected": false, "text": "localhost", "value": "localhost" }, "datasource": "${DS_PROMETHEUS}", "definition": "", "description": null, "error": null, "hide": 0, "includeAll": false, "label": "Server", "multi": false, "name": "instance", "options": [], "query": { "query": "label_values(sql_settings{col='max_connections'}, instance)", "refId": "prometheus-job-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "selected": false, "text": "11/main", "value": "11/main" }, "datasource": "${DS_PROMETHEUS}", "definition": "", "description": null, "error": null, "hide": 0, "includeAll": false, "label": "Cluster", "multi": false, "name": "cluster", "options": [], "query": { "query": "label_values(sql_settings{instance='$instance'}, sql_job)", "refId": "prometheus-cluster-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "selected": false, "text": "All", "value": "$__all" }, "datasource": "${DS_PROMETHEUS}", "definition": "", "description": null, "error": null, "hide": 2, "includeAll": true, "label": "PostgreSQL Port", "multi": false, "name": "host", "options": [], "query": { "query": "label_values(sql_settings{instance='$instance'}, host)", "refId": "prometheus-host-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": "", "current": { "selected": false, "text": "All", "value": "$__all" }, "datasource": "${DS_PROMETHEUS}", "definition": "", "description": null, "error": null, "hide": 0, "includeAll": true, "label": "Database", "multi": false, "name": "datname", "options": [], "query": { "query": "label_values(sql_pg_stat_database{col='dbsize', host=~'$host', instance=~'$instance', sql_job=~'$cluster'}, datname)", "refId": "prometheus-datname-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "auto": false, "auto_count": 30, "auto_min": "10s", "current": { "selected": false, "text": "5m", "value": "5m" }, "description": null, "error": null, "hide": 0, "label": "Rate Interval", "name": "rate_interval", "options": [ { "selected": false, "text": "1m", "value": "1m" }, { "selected": true, "text": "5m", "value": "5m" }, { "selected": false, "text": "10m", "value": "10m" }, { "selected": false, "text": "30m", "value": "30m" }, { "selected": false, "text": "1h", "value": "1h" } ], "query": "1m,5m,10m,30m,1h", "refresh": 2, "skipUrlSync": false, "type": "interval" }, { "allValue": null, "current": { "selected": false, "text": "All", "value": "$__all" }, "datasource": "${DS_PROMETHEUS}", "definition": "", "description": null, "error": null, "hide": 0, "includeAll": true, "label": "Disk", "multi": true, "name": "disk", "options": [], "query": { "query": "label_values(node_disk_written_bytes_total, device)", "refId": "prometheus-disk-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "selected": true, "text": "5", "value": "5" }, "description": null, "error": null, "hide": 0, "includeAll": false, "label": "Top k", "multi": false, "name": "ntop_relations", "options": [ { "selected": false, "text": "1", "value": "1" }, { "selected": false, "text": "3", "value": "3" }, { "selected": true, "text": "5", "value": "5" }, { "selected": false, "text": "10", "value": "10" }, { "selected": false, "text": "20", "value": "20" }, { "selected": false, "text": "50", "value": "50" }, { "selected": false, "text": "100", "value": "100" } ], "query": "1,3,5,10,20,50,100", "skipUrlSync": false, "type": "custom" }, { "allValue": null, "current": { "selected": false, "text": "All", "value": "$__all" }, "datasource": "${DS_PROMETHEUS}", "definition": "", "description": null, "error": null, "hide": 0, "includeAll": true, "label": "Interface", "multi": false, "name": "interface", "options": [], "query": { "query": "label_values(node_network_receive_bytes_total, device)", "refId": "prometheus-interface-Variable-Query" }, "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "selected": false, "text": "All", "value": "$__all" }, "datasource": "${DS_PROMETHEUS}", "definition": "", "description": null, "error": null, "hide": 0, "includeAll": true, "label": "Filesystem", "multi": true, "name": "filesystem", "options": [], "query": { "query": "label_values(node_filesystem_size_bytes{instance='$instance'}, mountpoint)", "refId": "prometheus-filesystem-Variable-Query" }, "refresh": 2, "regex": "(?!/media).*", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { "selected": false, "text": "All", "value": "$__all" }, "datasource": "${DS_PROMETHEUS}", "definition": "", "description": null, "error": null, "hide": 0, "includeAll": true, "label": "Streaming Receiver", "multi": false, "name": "application_name", "options": [], "query": { "query": "label_values(sql_pg_stat_replication{host=~'$host', instance=~'$instance', sql_job=~'$cluster'}, application_name)", "refId": "prometheus-application_name-Variable-Query" }, "refresh": 2, "regex": "^.+.+", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-3h", "to": "now" }, "timepicker": { "refresh_intervals": [ "30s", "1m", "5m", "15m", "1h" ], "time_options": [ "1m", "5m", "15m", "1h" ] }, "timezone": "browser", "title": "PostgreSQL Server Overview", "uid": "5CGjHlRiz", "version": 1 } ================================================ FILE: grafana/postgresql_server_overview.yml ================================================ --- # config file version apiVersion: 1 providers: - name: 'postgresql_server_overview' orgId: 1 folder: '' type: file options: path: /usr/share/elephant-shed/grafana/postgresql_server_overview.json datasource: prometheus ================================================ FILE: node-exporter/elephant-shed-prometheus-node-exporter ================================================ # Set go garbage collector to clean up more frequently GOGC=40 # Use only one thread GOMAXPROCS=1 # Set the command-line arguments to pass to the server. # Due to shell scaping, to pass backslashes for regexes, you need to double # them (\\d for \d). If running under systemd, you need to double them again # (\\\\d to mean \d), and escape newlines too. ARGS="--collector.diskstats.ignored-devices=^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$ \ --collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/) \ --collector.textfile.directory=/var/lib/prometheus/node-exporter \ --web.listen-address=:9100" # Prometheus-node-exporter supports the following options: # # --collector.bcache.priorityStats # Expose expensive priority stats. # --collector.cpu.info Enables metric cpu_info # --collector.cpu.info.flags-include=COLLECTOR.CPU.INFO.FLAGS-INCLUDE # Filter the `flags` field in cpuInfo with a value that must be a regular expression # --collector.cpu.info.bugs-include=COLLECTOR.CPU.INFO.BUGS-INCLUDE # Filter the `bugs` field in cpuInfo with a value that must be a regular expression # --collector.diskstats.ignored-devices="^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$" # Regexp of devices to ignore for diskstats. # --collector.filesystem.ignored-mount-points="^/(dev|proc|run|sys|mnt|media|var/lib/docker/.+)($|/)" # Regexp of mount points to ignore for filesystem collector. # --collector.filesystem.ignored-fs-types="^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$" # Regexp of filesystem types to ignore for filesystem collector. # --collector.ipvs.backend-labels="local_address,local_port,remote_address,remote_port,proto,local_mark" # Comma separated list for IPVS backend stats labels. # --collector.netclass.ignored-devices="^$" # Regexp of net devices to ignore for netclass collector. # --collector.netdev.device-include=COLLECTOR.NETDEV.DEVICE-INCLUDE # Regexp of net devices to include (mutually exclusive to device-exclude). # --collector.netdev.device-exclude="^lo$" # Regexp of net devices to exclude (mutually exclusive to device-include). # --collector.netstat.fields="^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*|TCPSynRetrans)|Tcp_(ActiveOpens|InSegs|OutSegs|OutRsts|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts|RcvbufErrors|SndbufErrors))$" # Regexp of fields to return for netstat collector. # --collector.ntp.server="127.0.0.1" # NTP server to use for ntp collector # --collector.ntp.protocol-version=4 # NTP protocol version # --collector.ntp.server-is-local # Certify that collector.ntp.server address is not a public ntp server # --collector.ntp.ip-ttl=1 IP TTL to use while sending NTP query # --collector.ntp.max-distance=3.46608s # Max accumulated distance to the root # --collector.ntp.local-offset-tolerance=1ms # Offset between local clock and local ntpd time to tolerate # --path.procfs="/proc" procfs mountpoint. # --path.sysfs="/sys" sysfs mountpoint. # --path.rootfs="/" rootfs mountpoint. # --collector.perf.cpus="" List of CPUs from which perf metrics should be collected # --collector.perf.tracepoint=COLLECTOR.PERF.TRACEPOINT ... # perf tracepoint that should be collected # --collector.powersupply.ignored-supplies="^$" # Regexp of power supplies to ignore for powersupplyclass collector. # --collector.qdisc.fixtures="" # test fixtures to use for qdisc collector end-to-end testing # --collector.runit.servicedir="/etc/service" # Path to runit service directory. # --collector.supervisord.url="http://localhost:9001/RPC2" # XML RPC endpoint. # --collector.systemd.unit-include=".+" # Regexp of systemd units to include. Units must both match include and not match exclude to be included. # --collector.systemd.unit-exclude=".+\\.(automount|device|mount|scope|slice|target)" # Regexp of systemd units to exclude. Units must both match include and not match exclude to be included. # --collector.systemd.enable-task-metrics # Enables service unit tasks metrics unit_tasks_current and unit_tasks_max # --collector.systemd.enable-restarts-metrics # Enables service unit metric service_restart_total # --collector.systemd.enable-start-time-metrics # Enables service unit metric unit_start_time_seconds # --collector.textfile.directory="/var/lib/prometheus/node-exporter" # Directory to read text files with metrics from. # --collector.vmstat.fields="^(oom_kill|pgpg|pswp|pg.*fault).*" # Regexp of fields to return for vmstat collector. # --collector.wifi.fixtures="" # test fixtures to use for wifi collector metrics # --collector.arp Enable the arp collector (default: enabled). # --collector.bcache Enable the bcache collector (default: enabled). # --collector.bonding Enable the bonding collector (default: enabled). # --collector.btrfs Enable the btrfs collector (default: enabled). # --collector.buddyinfo Enable the buddyinfo collector (default: disabled). # --collector.conntrack Enable the conntrack collector (default: enabled). # --collector.cpu Enable the cpu collector (default: enabled). # --collector.cpufreq Enable the cpufreq collector (default: enabled). # --collector.diskstats Enable the diskstats collector (default: enabled). # --collector.drbd Enable the drbd collector (default: disabled). # --collector.edac Enable the edac collector (default: enabled). # --collector.entropy Enable the entropy collector (default: enabled). # --collector.fibrechannel Enable the fibrechannel collector (default: enabled). # --collector.filefd Enable the filefd collector (default: enabled). # --collector.filesystem Enable the filesystem collector (default: enabled). # --collector.hwmon Enable the hwmon collector (default: enabled). # --collector.infiniband Enable the infiniband collector (default: enabled). # --collector.interrupts Enable the interrupts collector (default: disabled). # --collector.ipvs Enable the ipvs collector (default: enabled). # --collector.ksmd Enable the ksmd collector (default: disabled). # --collector.loadavg Enable the loadavg collector (default: enabled). # --collector.logind Enable the logind collector (default: disabled). # --collector.mdadm Enable the mdadm collector (default: enabled). # --collector.meminfo Enable the meminfo collector (default: enabled). # --collector.meminfo_numa Enable the meminfo_numa collector (default: disabled). # --collector.mountstats Enable the mountstats collector (default: disabled). # --collector.netclass Enable the netclass collector (default: enabled). # --collector.netdev Enable the netdev collector (default: enabled). # --collector.netstat Enable the netstat collector (default: enabled). # --collector.network_route Enable the network_route collector (default: disabled). # --collector.nfs Enable the nfs collector (default: enabled). # --collector.nfsd Enable the nfsd collector (default: enabled). # --collector.ntp Enable the ntp collector (default: disabled). # --collector.perf Enable the perf collector (default: disabled). # --collector.powersupplyclass # Enable the powersupplyclass collector (default: enabled). # --collector.pressure Enable the pressure collector (default: enabled). # --collector.processes Enable the processes collector (default: disabled). # --collector.qdisc Enable the qdisc collector (default: disabled). # --collector.rapl Enable the rapl collector (default: enabled). # --collector.runit Enable the runit collector (default: disabled). # --collector.schedstat Enable the schedstat collector (default: enabled). # --collector.sockstat Enable the sockstat collector (default: enabled). # --collector.softnet Enable the softnet collector (default: enabled). # --collector.stat Enable the stat collector (default: enabled). # --collector.supervisord Enable the supervisord collector (default: disabled). # --collector.systemd Enable the systemd collector (default: enabled). # --collector.tcpstat Enable the tcpstat collector (default: disabled). # --collector.textfile Enable the textfile collector (default: enabled). # --collector.thermal_zone Enable the thermal_zone collector (default: enabled). # --collector.time Enable the time collector (default: enabled). # --collector.timex Enable the timex collector (default: enabled). # --collector.udp_queues Enable the udp_queues collector (default: enabled). # --collector.uname Enable the uname collector (default: enabled). # --collector.vmstat Enable the vmstat collector (default: enabled). # --collector.wifi Enable the wifi collector (default: disabled). # --collector.xfs Enable the xfs collector (default: enabled). # --collector.zfs Enable the zfs collector (default: enabled). # --collector.zoneinfo Enable the zoneinfo collector (default: disabled). # --web.listen-address=":9100" # Address on which to expose metrics and web interface. # --web.telemetry-path="/metrics" # Path under which to expose metrics. # --web.disable-exporter-metrics # Exclude metrics about the exporter itself (promhttp_*, process_*, go_*). # --web.max-requests=40 Maximum number of parallel scrape requests. Use 0 to disable. # --collector.disable-defaults # Set all collectors to disabled by default. # --web.config="" [EXPERIMENTAL] Path to config yaml file that can enable TLS or authentication. # --log.level=info Only log messages with the given severity or above. One of: [debug, info, warn, error] # --log.format=logfmt Output format of log messages. One of: [logfmt, json] # --version Show application version ================================================ FILE: node-exporter/elephant-shed-prometheus-node-exporter.conf ================================================ [Service] EnvironmentFile=/etc/default/elephant-shed-prometheus-node-exporter ================================================ FILE: omnidb/wsgi.py ================================================ import os from OmniDB import custom_settings custom_settings.DEV_MODE = False custom_settings.HOME_DIR = os.path.join(os.path.expanduser('~'), '.omnidb', 'omnidb-server') custom_settings.PATH = '/omnidb/' from django.core.wsgi import get_wsgi_application os.environ.setdefault("DJANGO_SETTINGS_MODULE", "OmniDB.settings") application = get_wsgi_application() ================================================ FILE: pgbackrest/pgbackrest-archivecommand ================================================ #!/bin/bash # This wrapper around pgbackrest makes sure that the stanza exists for # a configured archived command. # In normal operation, arguments are just passed on to pgbackrest. # This is a kludge should go away as soon as pg_createcluster gets # hook support. set -eu PGCLUSTER="" for arg in "$@"; do if [ x"${arg#--stanza=}" != x"$arg" ]; then PGCLUSTER="${1#--stanza=}"; fi done if [ -z "$PGCLUSTER" ]; then echo "ERROR: No stanza found in argument list." exit 1 fi PGBACKREST="pgbackrest --log-level-console=info --stanza=$PGCLUSTER" if ! grep -q -F "[$PGCLUSTER]" /etc/pgbackrest.conf; then CONFTOOL="pg_conftool -s ${PGCLUSTER/-/ }" PGDATA=$($CONFTOOL show data_directory) PGPORT=$($CONFTOOL show port) test -d "$PGDATA" cat >> /etc/pgbackrest.conf <<-EOF [$PGCLUSTER] db-path=$PGDATA db-port=$PGPORT EOF fi if ! pgbackrest info | grep -q "stanza: $PGCLUSTER"; then $PGBACKREST stanza-create fi exec pgbackrest "$@" ================================================ FILE: pgbackrest/pgbackrest-incr@.service ================================================ # /lib/systemd/system/pgbackrest-incr@.service [Unit] Description=Backup PostgreSQL cluster %i using pgBackRest, incremental if previous full backup is available ConditionPathExists=/etc/postgresql/%I/postgresql.conf [Service] Type=oneshot User=postgres ExecStart=/usr/share/elephant-shed/pgbackrest-run %i incr SyslogIdentifier=pgbackrest@%i Nice=10 ================================================ FILE: pgbackrest/pgbackrest-incr@.timer ================================================ # /lib/systemd/system/pgbackrest-incr@.timer [Unit] Description=Automated pgBackRest incremental backup of PostgreSQL cluster %i [Timer] OnCalendar=Tue,Thu *-*-* 01:00:00 RandomizedDelaySec=2h [Install] WantedBy=multi-user.target ================================================ FILE: pgbackrest/pgbackrest-run ================================================ #!/bin/bash set -eu PGCLUSTER="$1" # in 9.6-main format BACKUPTYPE="$2" # type of backup (full|incr|diff) STATUSDIR="/var/www/html/pgbackrest" PGBACKREST="pgbackrest --log-level-console=info --stanza=$PGCLUSTER" test -d $STATUSDIR if ! grep -q -F "[$PGCLUSTER]" /etc/pgbackrest.conf; then CONFTOOL="pg_conftool -s ${PGCLUSTER/-/ }" PGDATA=$($CONFTOOL show data_directory) PGPORT=$($CONFTOOL show port) test -d "$PGDATA" cat >> /etc/pgbackrest.conf <<-EOF [$PGCLUSTER] db-path=$PGDATA db-port=$PGPORT EOF fi if ! pgbackrest info | grep -q "stanza: $PGCLUSTER"; then $PGBACKREST stanza-create fi # We need to make sure archiving is enabled /usr/share/elephant-shed/pgbackrest-toggle-archiving $PGCLUSTER enable $PGBACKREST backup --type=$BACKUPTYPE 2>&1 | tee $STATUSDIR/$PGCLUSTER.log # pipe ignores errors here $PGBACKREST info > $STATUSDIR/$PGCLUSTER.backup ================================================ FILE: pgbackrest/pgbackrest-toggle-archiving ================================================ #!/bin/bash # This script controls whether to enable or disable archiving using pgbackrest. # Any archive_command that doesn't contain the string "pgbackrest" is interpreted as "disabled" # Usage: pgbackrest-toogle-archiving [enable|disable] # where cluster is of format version-name (e.g. 9.6-main) set -eu PGCLUSTER="$1" # in 9.6-main format CONFTOOL="pg_conftool -s ${PGCLUSTER/-/ }" ACTION="toggle" COMMAND_ENABLED="/usr/share/elephant-shed/pgbackrest-archivecommand --stanza=${PGCLUSTER} archive-push %p" COMMAND_DISABLED="/bin/true" set +u if [ -n $2 ] && [ "$2" != "" ]; then case "$2" in "enable"|"disable"|"toggle") ACTION="$2" ;; *) echo "ERROR: action \"$2\" not supported, exiting." 1>&2 exit 1 ;; esac fi set -u if $CONFTOOL --short show archive_command | grep -q "pgbackrest"; then # archiving is currently on if [ "$ACTION" == "enable" ]; then echo "INFO: archive_command is enabled. No action required." exit 0 fi echo "INFO: archive_command is enabled. Disable archiving using \"$COMMAND_DISABLED\" as archive_command." $CONFTOOL set archive_command "$COMMAND_DISABLED" else # archiving is currently off if [ "$ACTION" == "disable" ]; then echo "INFO: archive_command is disabled. No action required." exit 0 fi echo "INFO: archive_command is disabled. Enable archiving using \"$COMMAND_ENABLED\" as archive_command." $CONFTOOL set archive_command "$COMMAND_ENABLED" fi # Check if archive mode is off and print a warning. if $CONFTOOL --short show archive_mode | grep -q "off"; then echo "WARNING: Archiving is disabled (\"archive_mode = off\")." echo "DETAIL: Changes to the archive_command doesn't have any effect." echo "HINT: Please switch the archive_mode to \"on\" manually." fi echo "INFO: Reloading cluster configuration for \"${PGCLUSTER/-/ }\"." pg_ctlcluster ${PGCLUSTER/-/ } reload ================================================ FILE: pgbackrest/pgbackrest-toggle-archiving@.service ================================================ # /lib/systemd/system/pgbackrest-toggle-archiving@.service [Unit] Description=pgBackrest - Toggle PostgreSQL archiving for cluster %i ConditionPathExists=/etc/postgresql/%I/postgresql.conf [Service] Type=oneshot User=postgres ExecStart=/usr/share/elephant-shed/pgbackrest-toggle-archiving %i SyslogIdentifier=pgbackrest@%i Nice=10 ================================================ FILE: pgbackrest/pgbackrest@.service ================================================ # /lib/systemd/system/pgbackrest@.service [Unit] Description=Backup PostgreSQL cluster %i using pgBackRest, always creates full backup ConditionPathExists=/etc/postgresql/%I/postgresql.conf [Service] Type=oneshot User=postgres ExecStart=/usr/share/elephant-shed/pgbackrest-run %i full SyslogIdentifier=pgbackrest@%i Nice=10 ================================================ FILE: pgbackrest/pgbackrest@.timer ================================================ # /lib/systemd/system/pgbackrest@.timer [Unit] Description=Automated pgBackRest full backup of PostgreSQL cluster %i [Timer] OnCalendar=Sun *-*-* 01:00:00 RandomizedDelaySec=2h [Install] WantedBy=multi-user.target ================================================ FILE: pgbadger/pgbadger-generator ================================================ #!/bin/sh set -eu gendir="$1" wantdir="$1/pgbadger.service.wants" pgbadgerservice="/lib/systemd/system/pgbadger@.service" mkdir -p "$wantdir" for conf in /etc/postgresql/*/*/postgresql.conf; do test -e "$conf" || continue dir="${conf%/*}" verdir="${dir%/*}" version="${verdir##*/}" cluster="${dir##*/}" ln -s "$pgbadgerservice" "$wantdir/pgbadger@$version-$cluster.service" done exit 0 ================================================ FILE: pgbadger/pgbadger-run ================================================ #!/bin/bash set -eu PGCLUSTER="$1" # in 9.6-main format (%i) REPORTDIR="/var/lib/pgbadger/$PGCLUSTER" INFO="$(pg_lsclusters -h $PGCLUSTER)" [ "$INFO" ] || exit 1 set -- $INFO PGDATA="$6" LOGFILE="$7" LOGFILE="${LOGFILE%,*}" # strip ",syslog" et al LOGFILE="${LOGFILE/\%?/*}" # replace logging_collector placeholders by wildcard case $LOGFILE in /*) ;; *) LOGFILE="$PGDATA/$LOGFILE" ;; # prepend PGDATA esac PREFIX="$(pg_conftool -s ${PGCLUSTER/-/ } show log_line_prefix)" [ -t 1 ] || QUIET="--quiet" mkdir -p "$REPORTDIR" set -x pgbadger ${QUIET:-} \ --format stderr ${PREFIX+--prefix="$PREFIX"} \ --title="${PGCLUSTER/-//} pgBadger report" \ --incremental --extra-files --start-monday \ --pid-dir "$REPORTDIR" --outdir "$REPORTDIR" \ $LOGFILE* ================================================ FILE: pgbadger/pgbadger.service ================================================ # /lib/systemd/system/pgbadger.service [Unit] Description=pgBadger reports for all PostgreSQL clusters [Service] Type=oneshot User=postgres ExecStart=/bin/true ExecReload=/bin/true ================================================ FILE: pgbadger/pgbadger.timer ================================================ # /lib/systemd/system/pgbadger.timer [Unit] Description=Run pgbadger.service every night [Timer] OnBootSec=120 OnCalendar=*-*-* 23:00:00 RandomizedDelaySec=1h [Install] WantedBy=multi-user.target ================================================ FILE: pgbadger/pgbadger@.service ================================================ # /lib/systemd/system/pgbadger@.service [Unit] Description=pgBadger report for PostgreSQL cluster %I PartOf=pgbadger.service Before=pgbadger.service [Service] Type=oneshot User=postgres ExecStart=/usr/share/elephant-shed/pgbadger-run "%i" SyslogIdentifier=pgbadger@%i Nice=10 ================================================ FILE: portal/cgi-bin/backrest.pl ================================================ #!/usr/bin/perl use strict; use warnings; use Template; use CGI; use CGI::Carp 'fatalsToBrowser'; use Data::Dumper; my $q = CGI->new(); my $get_cluster = $q->param('cluster') // ''; my $template_path = '/usr/share/elephant-shed/template'; my $page = 'backrest.html'; my $template = Template->new({ INCLUDE_PATH => $template_path, POST_CHOMP => 1, }); print "Content-type: text/html\n\n"; my $directory = '/var/www/html/pgbackrest'; my @backup_files; my %backups; opendir (DIR, $directory) or die $!; while (my $file = readdir(DIR)) { # We always want to list files with "backup" or "log" suffix. next unless ($file =~ m/.*\.(backup|log)/); # If the GET parameter "cluster" is given we want to filter out # all other clusters. next unless ($file =~ m/$get_cluster(\.(backup|log))?/); push @backup_files, $file; open (FILE, join "/",$directory,$file) || die "Could not open \"$file\", $!"; my $content = join '', ; $backups{$file} = $content; } $template->process($page, { SERVER_NAME => $ENV{SERVER_NAME}, REMOTE_USER => $ENV{REMOTE_USER}, TITLE => "Backups", HEADLINE => "Backups", BACKUP_FILES => \@backup_files, BACKUPS => \%backups, }) or die $template->error(); ================================================ FILE: portal/cgi-bin/error.pl ================================================ #!/usr/bin/perl use strict; use warnings; use PgCommon; use Template; my $template = Template->new({ INCLUDE_PATH => '/usr/share/elephant-shed/template', POST_CHOMP => 1, }); print "Content-type: text/html\n\n"; $template->process('error.html', { SERVER_NAME => $ENV{SERVER_NAME}, REMOTE_USER => $ENV{REMOTE_USER}, TITLE => "$ENV{REDIRECT_STATUS} - PostgreSQL", HEADLINE => "Error $ENV{REDIRECT_STATUS}", REDIRECT_STATUS => $ENV{REDIRECT_STATUS}, REDIRECT_SCRIPT_URI => $ENV{REDIRECT_SCRIPT_URI}, }) or die $template->error(); ================================================ FILE: portal/cgi-bin/index_footer.pl ================================================ #!/usr/bin/perl use strict; use warnings; use Template; my $template = Template->new({ INCLUDE_PATH => '/usr/share/elephant-shed/template', POST_CHOMP => 1, }); print "Content-type: text/html\n\n"; $template->process('footer.html', { }) or die $template->error(); ================================================ FILE: portal/cgi-bin/index_header.pl ================================================ #!/usr/bin/perl use strict; use warnings; use Template; my $template = Template->new({ INCLUDE_PATH => '/usr/share/elephant-shed/template', POST_CHOMP => 1, }); print "Content-type: text/html\n\n"; $template->process('header.html', { SERVER_NAME => $ENV{SERVER_NAME}, REMOTE_USER => $ENV{REMOTE_USER}, TITLE => "$ENV{REQUEST_URI} - PostgreSQL", HEADLINE => $ENV{REQUEST_URI}, }) or die $template->error(); ================================================ FILE: portal/cgi-bin/notloggedin.pl ================================================ #!/usr/bin/perl use strict; use warnings; use PgCommon; use Template; my $template = Template->new({ INCLUDE_PATH => '/usr/share/elephant-shed/template', POST_CHOMP => 1, }); print "Content-type: text/html\n\n"; $template->process('notloggedin.html', { SERVER_NAME => $ENV{SERVER_NAME}, TITLE => "PostgreSQL Appliance", HEADLINE => "PostgreSQL Appliance", }) or die $template->error(); ================================================ FILE: portal/cgi-bin/portalmain.pl ================================================ #!/usr/bin/perl use strict; use warnings; use PgCommon; use Template; my $pgbadgerdir = "/var/lib/pgbadger"; my $backupstatusdir = "/var/www/html/pgbackrest"; my $systemdstatusdir = "/etc/systemd/system/multi-user.target.wants"; # get PostgreSQL cluster information my @clusters; foreach my $version (get_versions()) { foreach my $cluster (get_version_clusters($version)) { my %info = cluster_info($version, $cluster); $info{version} = $version; $info{cluster} = $cluster; $info{owner} = (getpwuid $info{'owneruid'})[0]; # pgbadger report if (-e "$pgbadgerdir/$version-$cluster/LAST_PARSED") { $info{pgbadger} = "/pgbadger/$version-$cluster/"; } # pgbackrest status if (-e "$backupstatusdir/$version-$cluster.backup") { $info{backup} = "/pgbackrest/$version-$cluster.backup"; } if (not system("systemctl status pgbackrest\@$version-$cluster.timer | grep -q 'Active: active'")) { $info{backup_enabled} = 1; } if (not system("systemctl status pgbackrest-incr\@$version-$cluster.timer | grep -q 'Active: active'")) { $info{backup_incr_enabled} = 1; } # archive status if (not system("pg_conftool $version $cluster show archive_command | grep -q pgbackrest")) { $info{archive_enabled} = 1; } push @clusters, \%info; } } my $template = Template->new({ INCLUDE_PATH => '/usr/share/elephant-shed/template', POST_CHOMP => 1, }); print "Content-type: text/html\n\n"; $template->process('portalmain.html', { CLUSTERS => \@clusters, SERVER_NAME => $ENV{SERVER_NAME}, REMOTE_USER => $ENV{REMOTE_USER}, TITLE => "Dashboard - PostgreSQL", HEADLINE => "PostgreSQL Appliance Dashboard", }) or die $template->error(); ================================================ FILE: portal/cgi-bin/support.pl ================================================ #!/usr/bin/perl use strict; use warnings; use Template; my $template_path = '/usr/share/elephant-shed/template'; my $support_page = 'support.html'; my $template = Template->new({ INCLUDE_PATH => $template_path, POST_CHOMP => 1, }); print "Content-type: text/html\n\n"; if (-f $template_path . '/support.partner.html') { $support_page = 'support.partner.html'; } $template->process($support_page, { SERVER_NAME => $ENV{SERVER_NAME}, REMOTE_USER => $ENV{REMOTE_USER}, TITLE => "Support - PostgreSQL", HEADLINE => "PostgreSQL Support", }) or die $template->error(); ================================================ FILE: portal/elephant-shed.conf ================================================ RewriteEngine On RewriteRule ^(.*)$ https://%{HTTP_HOST}$1 [R=301,L] SSLEngine on SSLCertificateFile /etc/ssl/certs/ssl-cert-snakeoil.pem SSLCertificateKeyFile /etc/ssl/private/ssl-cert-snakeoil.key SSLProxyEngine On SSLProxyCheckPeerCN Off SSLProxyCheckPeerName Off ServerAdmin webmaster@localhost DocumentRoot /var/www/html ErrorLog ${APACHE_LOG_DIR}/error.log CustomLog ${APACHE_LOG_DIR}/access.log combined DefineExternalAuth pwauth pipe /usr/sbin/pwauth SetOutputFilter DEFLATE AddOutputFilterByType DEFLATE text/plain AddOutputFilterByType DEFLATE text/html AddOutputFilterByType DEFLATE text/xml AddOutputFilterByType DEFLATE text/css AddOutputFilterByType DEFLATE application/xml AddOutputFilterByType DEFLATE application/xhtml+xml AddOutputFilterByType DEFLATE application/rss+xml AddOutputFilterByType DEFLATE application/javascript AddOutputFilterByType DEFLATE application/x-javascript Alias /image /usr/share/elephant-shed/image Alias /static /usr/share/elephant-shed/static Satisfy Any Allow from all Alias /doc /usr/share/elephant-shed/doc # directory listings Options +Indexes HeaderName /cgi-bin/index_header.pl ReadmeName /cgi-bin/index_footer.pl IndexOptions HTMLTable FancyIndexing FoldersFirst NameWidth=* VersionSort SuppressHTMLPreamble # mod_autoindex wants the cgi's file type to be text/* AddType text/html .pl # custom error pages ErrorDocument 401 /cgi-bin/notloggedin.pl ErrorDocument 403 /cgi-bin/error.pl ErrorDocument 404 /cgi-bin/error.pl ErrorDocument 500 /cgi-bin/error.pl ErrorDocument 503 /cgi-bin/error.pl # pgadmin4 3.2 on CentOS 7 doesn't like /pgadmin4 RedirectMatch "/pgadmin4$" "/pgadmin4/browser/" # pgBackRest AddType text/plain .backup AddType text/plain .log Redirect "/prometheus" "/prometheus/" ProxyPass http://127.0.0.1:9090/prometheus/ ProxyPassReverse http://127.0.0.1:9090/prometheus/ ProxyPreserveHost On ProxyPass http://127.0.0.1:3000 ProxyPassReverse http://127.0.0.1:3000 RewriteEngine On # forward authed user to proxied applications RewriteRule .* - [E=PROXY_USER:%{LA-U:REMOTE_USER},NS] RequestHeader set X-WEBAUTH-USER "%{PROXY_USER}e" RequestHeader unset Authorization WSGIDaemonProcess omnidb user=omnidb group=omnidb WSGIScriptAlias /omnidb /usr/share/elephant-shed/omnidb/wsgi.py process-group=omnidb Alias /omnidb/static /usr/lib/python3/dist-packages/OmniDB_app/static Require all granted Require all granted ProxyPreserveHost On ProxyPass http://127.0.0.1:8888/powa ProxyPassReverse http://127.0.0.1:8888/powa ProxyPreserveHost On ProxyPass https://127.0.0.1:4200 ProxyPassReverse https://127.0.0.1:4200 Alias "/pgbadger" "/var/lib/pgbadger/" Options +Indexes IndexOptions HTMLTable IgnoreCase FancyIndexing FoldersFirst NameWidth=* VersionSort SuppressHTMLPreamble SuppressLastModified SuppressSize SuppressDescription # require user for access AuthType Basic AuthName "Elephant Shed" AuthBasicProvider external AuthExternal pwauth Require unix-group elephant-shed AddOutputFilterByType SUBSTITUTE text/html Substitute "s!\
\ \ \ \ \ \ \ \ \ \ \ \ RewriteEngine On # rewrite / to portal menu RewriteRule ^/?$ /cgi-bin/portalmain.pl [PT] # catch Cockpit paths (@ is /@localhost) RewriteRule ^/cockpit/socket wss://127.0.0.1:10090/cockpit/socket [P,L] RewriteRule ^/(apps|cockpit|dashboard|network|system|storage|updates|users|@)(.*) https://127.0.0.1:10090/$1$2 [P] # Fix CSP header RewriteRule . - [env=HOST:%{HTTP_HOST}] Header edit* Content-Security-Policy 127.0.0.1:10090 %{HOST}e # Apache 2.4.6 on CentOS 7 does not interpolate variables here, unset the header instead #Header unset Content-Security-Policy # Origin accepted in cockpit.conf RequestHeader set Origin https://localhost RequestHeader set X-Authorize password # /logout: sending any sort of 401 to the browser makes it forget the current credentials AuthType Basic AuthName "Elephant Shed" AuthBasicProvider external AuthExternal pwauth Require user does-not-exist # allow access to notloggedin page allow from all satisfy any Alias /favicon.ico /usr/share/elephant-shed/image/favicon.ico
================================================ FILE: portal/static/css/elephant-shed.css ================================================ /* General */ body { font-family: Arial, Sans-serif; } /* Links */ a { text-decoration: none; } a:link { color: #000000; } a:visited { color: #000000; } a:hover { color: #FF0000; text-decoration: none; } /* Header and Footer */ .el-header { border-bottom: 0.2em solid red; } .el-footer { border-top: 0.2em solid red; } /* Dashboard Buttons */ .dash td { padding:1em; } .dash div div a:hover img { filter: brightness(50%); transition: filter 0.1s; } /* Icons */ .main_icon { width: 100px; height: 100px; box-shadow: 5px 10px 5px 0px rgba(0,0,0,0.15); border-radius: 24px; margin-bottom: 15px; } /* Switch */ .switch { position: relative; display: inline-block; width: 60px; height: 34px; } .switch input { display:none; } .slider { position: absolute; cursor: pointer; top: 0; left: 0; right: 0; bottom: 0; background-color: #ccc; -webkit-transition: .4s; transition: .4s; } .slider:before { position: absolute; content: ""; height: 26px; width: 26px; left: 4px; bottom: 4px; background-color: white; -webkit-transition: .4s; transition: .4s; } input:checked + .slider { background-color: #5c5; } input:focus + .slider { box-shadow: 0 0 1px #5c5; } input:checked + .slider:before { -webkit-transform: translateX(26px); -ms-transform: translateX(26px); transform: translateX(26px); } /* Rounded sliders */ .slider.round { border-radius: 34px; } .slider.round:before { border-radius: 50%; } /* Status */ .status { width: 24px; height: 24px; background: #ccc; -moz-border-radius: 50%; -webkit-border-radius: 50%; border-radius: 50%; font-weight: bold; } .offline { background: rgb(254, 60, 60); } .online { background: #5c5; } .offlinetext { color: rgb(254, 60, 60); } .onlinetext { color: #5c5; } .recovery { background: purple; } /* Define the hover highlight color for the table row */ /*.clusterRow:hover{ background-color: #eee; }*/ .important{ font-weight: bold; } /* Detail view */ .detail { margin-right:auto; margin-left:auto; text-align: center; border: none; } /* Disable bootstrap table-hover for "detail" */ .detail table tr:hover, .detail table tr:hover td, .detail table tr:hover th { background-color: transparent; } .hidden { display: none; } .more_button { color: #ccc; font-weight: bold; text-align: center; font-size: 24px; cursor:pointer; } .more_button:hover { color: #777; transition: background-color 0.1s; } ================================================ FILE: portal/static/css/esmenu.css ================================================ .esmenu hr { background: red; height: 0px; border-top: 0; margin-top: 0; margin-bottom: 0; } .esmenu:hover hr { height: 2px; } .esmenu a { text-decoration: none; color: black; font-weight: bold; transition: .5s background-color; } .esmenu a:hover { text-decoration: underline; color: black; transition: .5s background-color; } .esmenu a img { width: 15px; height: 15px; margin-left: 2px; margin-right: 2px; margin-top: 2px; margin-bottom: 2px; transition: 1s all; } .esmenu:hover a img { width: 40px; height: 40px; margin-left: 3px; margin-right: 3px; margin-top: 4px; margin-bottom: 4px; transition: .2s all; } .esmenu a:hover img { filter: brightness(50%); transition: .2s filter; } .esmenu { z-index: 10000; top: 0; position: absolute; left: 0px; right: 0px; margin-left: auto; margin-right: auto; text-align: center; box-shadow: 3px 5px 3px 0px rgba(0,0,0,0.15); width: 220px; background-color: rgba(255,255,255,.15); border: 1px solid black; border-top: 0; border-radius: 10px; overflow: hidden; border-top-left-radius: 0px; border-top-right-radius: 0px; transition: top 0.3s, background-color 1s, width 1s; transition-timing-function: ease-out; transition-delay: 0.0s; } .esmenu:hover { background-color: rgba(255,255,255,1); width: 550px; transition: top 0.2s, width .2s, background-color 0.2s; } ================================================ FILE: portal/template/backrest.html ================================================ [% INCLUDE header.html %]
[% FOREACH file IN BACKUPS %]

[% file.key %]

    [% file.value %]
  
[% END %]
[% INCLUDE footer.html %] ================================================ FILE: portal/template/error.html ================================================ [% INCLUDE header.html %]
Sorry, a [% REDIRECT_STATUS %] error occurred while loading [% REDIRECT_SCRIPT_URI %].
Entschuldigung, beim Zugriff auf [% REDIRECT_SCRIPT_URI %] trat ein [% REDIRECT_STATUS %]-Fehler auf.
[% INCLUDE footer.html %] ================================================ FILE: portal/template/footer.html ================================================ ================================================ FILE: portal/template/header.html ================================================ [% TITLE %]

[% HEADLINE %]

credativ
[% IF REMOTE_USER %] [% END %] ================================================ FILE: portal/template/notloggedin.html ================================================ [% INCLUDE header.html %]

Error

A a valid user is required for accessing the PostgreSQL appliance.

Every user in the group "elephant-shed" is allowed to log in.

Hint

# Every user in the group "elephant-shed" is allowed to login at the portal.
# Add all needed users to this group.
sudo adduser <USERNAME> elephant-shed
    

[% INCLUDE footer.html %] ================================================ FILE: portal/template/portalmain.html ================================================ [% INCLUDE header.html %]

PostgreSQL Cluster

[% FOREACH cluster IN CLUSTERS.nsort('version') %] [% END %]
Cluster Port Data directory Archiving Full Backup Incr Backup
[% cluster.version %]/[% cluster.cluster %] [% cluster.port %] [% cluster.pgdata %]
[% INCLUDE footer.html %] ================================================ FILE: portal/template/support.html ================================================ [% INCLUDE header.html %]

The Elephant Shed by credativ

Source code

The Elephant Shed is available on GitHub. New releases of this software will be available on the GitHub project page.

A list of known issues and bugs is available on GitHub as well. Feel free to open new issues if you think you found a bug. Pull-requests are welcome as well.

Support

The Elephant Shed is an open source project by credativ. Our PostgreSQL Competence Center supports the Elephant Shed as well as other open source products. If you are interested in support please contact us.

License

Copyright © 2017-2019 credativ GmbH <info@credativ.de>

This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

[% INCLUDE footer.html %] ================================================ FILE: postgresql/elephant-shed.conf ================================================ # Default data directory. #data_directory = '/var/lib/postgresql/%v/%c' # Default directory for transaction logs #waldir = '/var/lib/postgresql/xlog/%v/%c/pg_xlog' # Options to pass to initdb. #initdb_options = '--data-checksums' # The following options are copied into the new cluster's postgresql.conf: # what IP address(es) to listen on listen_addresses = '*' # We use 90 % of checkpoint_timeout to write a checkpoint (default since v14) checkpoint_completion_target = 0.9 # Log to syslog as well log_destination = 'stderr,syslog' # Tag syslog ident with cluster name syslog_ident = 'postgres-%v-%c' # Add prefix to log lines log_line_prefix = '%%m [%%p] user=%u,db=%d,app=%a,client=%h ' # Use C.UTF-8 as lc_messages to ensure pgBadger is able to parse the log lc_messages = 'C.UTF-8' # Log slow statements log_min_duration_statement = 1s # Always log checkpoint information log_checkpoints = on # Log lock waits to debug locking errors log_lock_waits = on # Log temp files greater than 10 MB log_temp_files = 10MB # Log autovacuum processes longer then 3 minutes log_autovacuum_min_duration = 3min # Install lib for pg_stat_statements shared_preload_libraries = 'pg_stat_statements, auto_explain' # Backup #archive_command = '/usr/share/elephant-shed/pgbackrest-archivecommand --stanza=%v-%c archive-push %%p' archive_command = '/bin/true' archive_mode = on hot_standby = on max_wal_senders = 5 max_replication_slots = 5 wal_level = hot_standby ================================================ FILE: postgresql/es_ctlcluster ================================================ #!/bin/sh # This script configures PostgreSQL clusters for usage with Elephant Shed. # Usage: es_ctlcluster {enable-powa|enable-powa-addons} set -eu if [ -z "${2:-}" ]; then echo "Usage: $0 {enable-backup|enable-powa|enable-powa-addons}" exit 1 fi case $1 in *-*) VERSION="${1%%-*}" CLUSTER="${1#*-}" ACTION="$2" ;; */*) VERSION="${1%%/*}" CLUSTER="${1#*/}" ACTION="$2" ;; *) VERSION="$1" CLUSTER="$2" ACTION="$3" ;; esac pg_lsclusters -h "$VERSION/$CLUSTER" > /dev/null if ! pg_lsclusters -h "$VERSION/$CLUSTER" | grep -q online; then echo "Cluster $VERSION $CLUSTER is not running" exit 1 fi CONFTOOL="pg_conftool -s $VERSION $CLUSTER" enable_backup() { # check whether archiving is activated already archive_command=$($CONFTOOL show archive_command || :) case $archive_command in *true*) systemctl start pgbackrest-toggle-archiving@$VERSION-$CLUSTER ;; *pgbackrest*) echo "archive_command already configured" ;; esac systemctl enable pgbackrest@$VERSION-$CLUSTER.timer systemctl start pgbackrest@$VERSION-$CLUSTER.timer systemctl enable pgbackrest-incr@$VERSION-$CLUSTER.timer systemctl start pgbackrest-incr@$VERSION-$CLUSTER.timer echo "backup for cluster $VERSION-$CLUSTER enabled" } shared_preload () { local extension="$1" shared_preload_libraries=$($CONFTOOL show shared_preload_libraries || :) case $shared_preload_libraries in *$extension*) return 1 ;; # no restart required esac echo "Adding $extension to shared_preload_libraries ..." ( set -x $CONFTOOL set shared_preload_libraries "${shared_preload_libraries:+$shared_preload_libraries, }$extension" ) return 0 } enable_powa () { if [ -x /usr/bin/dpkg ]; then POWA_PKG="postgresql-$VERSION-powa" if ! dpkg -l "$POWA_PKG" | grep -q ^ii; then echo "Installing $POWA_PKG ..." apt-get install -y "$POWA_PKG" fi elif [ -x /usr/bin/rpm ]; then CONTRIB_PKG="postgresql$VERSION-contrib" if ! rpm -q "$CONTRIB_PKG" > /dev/null; then echo "Installing $CONTRIB_PKG ..." yum install -y "$CONTRIB_PKG" fi POWA_PKG="powa_$VERSION" if ! rpm -q "$POWA_PKG" > /dev/null; then echo "Installing $POWA_PKG ..." yum install -y "$POWA_PKG" fi fi shared_preload pg_stat_statements && restart=yes shared_preload powa && restart=yes if [ "${restart:-}" ]; then echo "Restarting cluster ..." pg_ctlcluster "$VERSION" "$CLUSTER" restart fi echo "Setting up powa user and database ..." su postgres < /dev/null; then echo "Installing $PKG ..." yum install -y "$PKG" fi done fi for EXTENSION in $EXTENSIONS; do if [ "$EXTENSION" = "pg-track-settings" -o "$EXTENSION" = "pg_track_settings" ]; then continue fi shared_preload $(echo $EXTENSION | tr '-' '_') && restart=yes done if [ "${restart:-}" ]; then echo "Restarting cluster ..." pg_ctlcluster "$VERSION" "$CLUSTER" restart fi for EXTENSION in $EXTENSIONS; do su postgres <new; $json->canonical(1); $json->indent(1); # list all clusters foreach my $version (get_versions()) { foreach my $cluster (get_version_clusters($version)) { my %info = cluster_info($version, $cluster); my $port = get_cluster_port($version, $cluster); $servers->{"$version/$cluster"} = { 'host' => 'localhost', 'port' => $port, 'database' => 'powa', 'query' => {'client_encoding' => 'utf8'}, }; } } # retrieve cookie and modes from existing config file if(open(my $fh, "<", $powaconf)) { # cookie_secret="xx" while (<$fh>) { $cookie = $1 if /^cookie_secret="(\S+)"/; } close $fh; ($mode, $user, $group) = (stat $powaconf)[2, 4, 5]; } # if not found, generate a new one if (not defined $cookie) { $cookie = `dd if=/dev/urandom bs=1k count=1 2>/dev/null | sha1sum | cut -d ' ' -f 1`; chomp $cookie; ($mode, $user, $group) = qw(0644 0 0); } # write output, preserving old mode and owner my ($fh, $filename) = tempfile("powa-web.conf.XXXXXX", DIR => "/etc", UNLINK => 1); my $js = $json->encode($servers); print $fh "servers=$js\nurl_prefix=\"/powa\"\ncookie_secret=\"$cookie\"\n" or die "$filename: $!"; close $fh or die "$filename: $!"; chmod $mode, $filename or die "$filename: $!"; chown $user, $group, $filename or die "$filename: $!"; rename $filename, $powaconf or die "$powaconf: $!"; # restart powa-web system('systemctl restart powa-web') ================================================ FILE: prometheus/elephant-shed-prometheus ================================================ # Set go garbage collector to clean up more frequently GOGC=40 GOMAXPROCS=2 # Set the command-line arguments to pass to the server. ARGS="--config.file /etc/prometheus/elephant-shed-prometheus.yml \ --web.external-url http://127.0.0.1:9090/prometheus/ \ --web.listen-address 127.0.0.1:9090 \ --storage.tsdb.retention=90d" # Prometheus supports the following options: # --config.file="/etc/prometheus/prometheus.yml" # Prometheus configuration file path. # --web.listen-address="0.0.0.0:9090" # Address to listen on for UI, API, and telemetry. # --web.read-timeout=5m Maximum duration before timing out read of the # request, and closing idle connections. # --web.max-connections=512 Maximum number of simultaneous connections. # --web.external-url= The URL under which Prometheus is externally # reachable (for example, if Prometheus is served # via a reverse proxy). Used for generating # relative and absolute links back to Prometheus # itself. If the URL has a path portion, it will # be used to prefix all HTTP endpoints served by # Prometheus. If omitted, relevant URL components # will be derived automatically. # --web.route-prefix= Prefix for the internal routes of web endpoints. # Defaults to path of --web.external-url. # --web.local-assets="/usr/share/prometheus/web/" # Path to static asset/templates directory. # --web.user-assets= Path to static asset directory, available at # /user. # --web.enable-lifecycle Enable shutdown and reload via HTTP request. # --web.enable-admin-api Enables API endpoints for admin control actions. # --web.console.templates="/etc/prometheus/consoles" # Path to the console template directory, # available at /consoles. # --web.console.libraries="/etc/prometheus/console_libraries" # Path to the console library directory. # --storage.tsdb.path="/var/lib/prometheus/metrics2/" # Base path for metrics storage. # --storage.tsdb.min-block-duration=2h # Minimum duration of a data block before being # persisted. # --storage.tsdb.max-block-duration= # Maximum duration compacted blocks may span. # (Defaults to 10% of the retention period) # --storage.tsdb.retention=15d # How long to retain samples in the storage. # --storage.tsdb.no-lockfile # Do not create lockfile in data directory. # --alertmanager.notification-queue-capacity=10000 # The capacity of the queue for pending alert # manager notifications. # --alertmanager.timeout=10s # Timeout for sending alerts to Alertmanager. # --query.lookback-delta=5m The delta difference allowed for retrieving # metrics during expression evaluations. # --query.timeout=2m Maximum time a query may take before being # aborted. # --query.max-concurrency=20 # Maximum number of queries executed concurrently. # --log.level=info Only log messages with the given severity or # above. One of: [debug, info, warn, error] ================================================ FILE: prometheus/elephant-shed-prometheus.conf ================================================ [Service] EnvironmentFile=/etc/default/elephant-shed-prometheus ================================================ FILE: prometheus/elephant-shed-prometheus.yml ================================================ # The Elephant Shed's Prometheus config global: scrape_interval: 30s # Attach these labels to any time series or alerts when communicating with # external systems (federation, remote storage, Alertmanager). external_labels: # monitor: 'example' scrape_configs: # scrape data from prometheus itself - job_name: prometheus scrape_interval: 300s metrics_path: /prometheus/metrics static_configs: - targets: ['localhost:9090'] # scrape data from node_exporter - file_sd_configs: - files: - /etc/prometheus/node_exporter/*.yml job_name: node relabel_configs: - source_labels: - __address__ target_label: __param_target - regex: (.+) replacement: ${1}; source_labels: - instance target_label: __tmp_instance - regex: ([^:;]+)((:[0-9]+)?|;(.*)) replacement: ${1} separator: '' source_labels: - __tmp_instance - __address__ target_label: instance # scrape data from sql_exporter - file_sd_configs: - files: - /etc/prometheus/sql_exporter/*.yml job_name: sql relabel_configs: - regex: (.+) replacement: ${1}; source_labels: - instance target_label: __tmp_instance - regex: ([^:;]+)((:[0-9]+)?|;(.*)) replacement: ${1} separator: '' source_labels: - __tmp_instance - __address__ target_label: instance ================================================ FILE: rpm/56-authnz_external.conf ================================================ # Load authnz_external for elephant-shed-portal authentication # (This file is part of elephant-shed-portal) LoadModule authnz_external_module modules/mod_authnz_external.so ================================================ FILE: rpm/README ================================================ == CentOS/RHEL 7 package installation instructions == elephant-shed depends on EPEL and PGDG software repositories. You need to enable them first, before installing elephant-shed packages. The following steps should be performed as root (if not noted otherwise). * EPEL $ yum install epel-release * PGDG Repository packages can be retrieved from https://yum.postgresql.org/repopackages.php Choose either CentOS or RHEL repository packages, depending on your target platform. Here for example PostgreSQL 10 repository packages for CentOS 7: $ yum install https://download.postgresql.org/pub/repos/yum/10/redhat/rhel-7-x86_64/pgdg-centos10-10-2.noarch.rpm * elephant-shed package repository Install the repository package from https://packages.credativ.com: $ yum install https://packages.credativ.com/public/postgresql/yum/credativ-repo-1-1.noarch.rpm * RPM installation steps Update the yum cache $ yum makecache Install elephant-shed $ yum install elephant-shed ================================================ FILE: rpm/elephant-shed.spec ================================================ Name: elephant-shed Version: %{package_version} Release: %{package_release}%{?dist} BuildArch: noarch Summary: PostgreSQL dashboard Packager: credativ GmbH License: GPLv3+ URL: https://packages.debian.org/sid/%{name} Source0: http://ftp.debian.org/debian/pool/main/p/%{name}/%{name}_%{version}.tar.xz #BuildRequires: python-sphinx Requires: elephant-shed-cockpit Requires: elephant-shed-grafana Requires: elephant-shed-pgbackrest Requires: elephant-shed-pgbadger Requires: elephant-shed-portal Requires: elephant-shed-postgresql Requires: elephant-shed-powa Requires: elephant-shed-prometheus Requires: elephant-shed-prometheus-node-exporter Requires: elephant-shed-prometheus-sql-exporter Requires: elephant-shed-shellinabox Requires: elephant-shed-tmate %description The Elephant Shed is a web-based PostgreSQL management front-end. This meta package depends on all Elephant Shed components. %package -n elephant-shed-portal Summary: PostgreSQL dashboard -- web interface Requires: shadow-utils Requires: httpd Requires: mod_ssl Requires: mod_authnz_external Requires: pwauth Requires: mod_authz_unixgroup Requires: perl-Template-Toolkit #Requires: libcgi-pm-perl %description -n elephant-shed-portal The Elephant Shed is a web-based PostgreSQL management front-end. This package provides the web interface. %post -n elephant-shed-portal groupadd --system elephant-shed || : systemctl enable httpd systemctl restart httpd %package -n elephant-shed-postgresql Requires: shadow-utils Requires: postgresql-common >= 183 Summary: PostgreSQL dashboard -- PostgreSQL integration %description -n elephant-shed-postgresql The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with PostgreSQL. %package -n elephant-shed-pgbadger Requires: pgbadger >= 9 Requires: postgresql-common Summary: PostgreSQL dashboard -- pgBadger integration %description -n elephant-shed-pgbadger The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with pgBadger. %post -n elephant-shed-pgbadger install -d -o postgres -g postgres /var/lib/pgbadger systemctl daemon-reload systemctl enable pgbadger.timer systemctl start pgbadger.timer systemctl start pgbadger %package -n elephant-shed-pgbackrest Requires: pgbackrest Requires: postgresql-common Summary: PostgreSQL dashboard -- pgBackRest integration %description -n elephant-shed-pgbackrest The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with pgBackRest. %post -n elephant-shed-pgbackrest systemctl daemon-reload install -d -o postgres -g postgres /var/lib/pgbackrest /var/www/html/pgbackrest chown postgres: /etc/pgbackrest.conf %package -n elephant-shed-powa Requires: powa_14-web Summary: PostgreSQL dashboard -- powa %description -n elephant-shed-powa The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with powa %post -n elephant-shed-powa update-powa-web-config systemctl enable powa-web-14 systemctl start powa-web-14 %package -n elephant-shed-grafana Requires: curl Requires: grafana >= 5 Summary: PostgreSQL dashboard -- Grafana integration %description -n elephant-shed-grafana The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with Grafana. %post -n elephant-shed-grafana systemctl daemon-reload systemctl enable grafana-server.service systemctl start grafana-server.service %package -n elephant-shed-prometheus Requires: prometheus2 Summary: PostgreSQL dashboard -- Prometheus integration %description -n elephant-shed-prometheus The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with Prometheus. %post -n elephant-shed-prometheus systemctl daemon-reload systemctl enable prometheus systemctl start prometheus %package -n elephant-shed-prometheus-node-exporter Requires: node_exporter >= 0.16.0 Summary: PostgreSQL dashboard -- Node exporter integration %description -n elephant-shed-prometheus-node-exporter The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with the Prometheus node exporter. %post -n elephant-shed-prometheus-node-exporter systemctl daemon-reload systemctl enable node_exporter systemctl start node_exporter %package -n elephant-shed-prometheus-sql-exporter Requires: perl-YAML Requires: postgresql-common Requires: sql_exporter Summary: PostgreSQL dashboard -- SQL exporter integration %description -n elephant-shed-prometheus-sql-exporter The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with the Prometheus SQL exporter. %post -n elephant-shed-prometheus-sql-exporter systemctl daemon-reload systemctl enable prometheus-sql-exporter systemctl start prometheus-sql-exporter systemctl enable prometheus-sql-exporter-restart.timer systemctl start prometheus-sql-exporter-restart.timer %package -n elephant-shed-cockpit Requires: cockpit Requires: cockpit-packagekit Summary: PostgreSQL dashboard -- cockpit integration %description -n elephant-shed-cockpit The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with cockpit. %post -n elephant-shed-cockpit systemctl daemon-reload # create /run/cockpit systemd-tmpfiles --create # allow cockpit to use port 10090 semanage port -a -t websm_port_t -p tcp 10090 # allow apache to connect to any port setsebool -P httpd_can_network_connect true systemctl enable cockpit.socket systemctl start cockpit.socket %package -n elephant-shed-shellinabox Requires: shellinabox Summary: PostgreSQL dashboard -- shellinabox integration %description -n elephant-shed-shellinabox The Elephant Shed is a web-based PostgreSQL management front-end. . This meta package provides the integration with shellinabox. %post -n elephant-shed-shellinabox systemctl daemon-reload systemctl enable shellinaboxd systemctl start shellinaboxd %package -n elephant-shed-tmate Requires: tmate Summary: PostgreSQL dashboard -- tmate integration %description -n elephant-shed-tmate The Elephant Shed is a web-based PostgreSQL management front-end. . This package provides the integration with tmate. %prep # unpack tarball, ignoring the name of the top level directory inside %setup -c mv */* . %build #make %install rm -rf %{buildroot} # install in subpackages using the Debian files for inst in debian/*.install; do pkg=$(basename $inst .install) [ "$pkg" = "elephant-shed-omnidb" ] && continue # not supported on RH yet echo "### Reading $pkg files list from $inst ###" while read file dir; do case $file in portal/cgi-bin) dir="var/www" ;; portal/elephant-shed.conf) dir="etc/httpd/conf.d" ;; esac mkdir -p %{buildroot}/$dir cp -r $file %{buildroot}/$dir echo "/$dir/${file##*/}" >> files-$pkg done < $inst done # update httpd paths for CentOS sed -i -e 's!SSLCertificateFile.*!SSLCertificateFile /etc/pki/tls/certs/localhost.crt!' \ -e 's!SSLCertificateKeyFile.*!SSLCertificateKeyFile /etc/pki/tls/private/localhost.key!' \ -e 's!ErrorLog.*!ErrorLog /var/log/httpd/error_log!' \ -e 's!CustomLog.*!CustomLog /var/log/httpd/access_log combined!' \ -e 's!Header edit.*Content-Security-Policy.*!#&!' \ -e '/Header unset Content-Security-Policy/s/#//' \ -e 's!DefineExternalAuth.*!DefineExternalAuth pwauth pipe /usr/bin/pwauth!' \ %{buildroot}/etc/httpd/conf.d/elephant-shed.conf # load authnz_external (it doesn't do that by itself) mkdir -p %{buildroot}/etc/httpd/conf.modules.d cp rpm/56-authnz_external.conf %{buildroot}/etc/httpd/conf.modules.d echo /etc/httpd/conf.modules.d/56-authnz_external.conf >> files-elephant-shed-portal # PostgreSQL integration # CentOS 7 does not have C.UTF-8 sed -i -e 's/C\.UTF-8/en_US.utf8/g' \ %{buildroot}/etc/postgresql-common/createcluster.d/elephant-shed.conf # prometheus2.rpm uses a different variable for extra arguments # preserve storage.tsdb.path from /etc/default/prometheus sed -i -e 's!^ARGS="!PROMETHEUS_OPTS="--storage.tsdb.path=/var/lib/prometheus/data !' %{buildroot}/etc/default/elephant-shed-prometheus # node exporter service is named differently on CentOS, and uses a different variable for extra arguments mv %{buildroot}/etc/systemd/system/prometheus-node-exporter.service.d %{buildroot}/etc/systemd/system/node_exporter.service.d sed -i -e 's!prometheus-node-exporter.service.d!node_exporter.service.d!' files-elephant-shed-prometheus-node-exporter sed -i -e 's!^ARGS=!NODE_EXPORTER_OPTS=!' %{buildroot}/etc/default/elephant-shed-prometheus-node-exporter %files -n elephant-shed-portal -f files-elephant-shed-portal %files -n elephant-shed-postgresql -f files-elephant-shed-postgresql %files -n elephant-shed-pgbadger -f files-elephant-shed-pgbadger %files -n elephant-shed-pgbackrest -f files-elephant-shed-pgbackrest %files -n elephant-shed-powa -f files-elephant-shed-powa %files -n elephant-shed-grafana -f files-elephant-shed-grafana %files -n elephant-shed-prometheus -f files-elephant-shed-prometheus %files -n elephant-shed-prometheus-node-exporter -f files-elephant-shed-prometheus-node-exporter %files -n elephant-shed-prometheus-sql-exporter -f files-elephant-shed-prometheus-sql-exporter %files -n elephant-shed-cockpit -f files-elephant-shed-cockpit %files -n elephant-shed-shellinabox %files -n elephant-shed-tmate -f files-elephant-shed-tmate %files -n elephant-shed ================================================ FILE: rpm/tmate.spec ================================================ Name: tmate Version: 2.2.1 Release: 1 License: BSD Summary: Instant Terminal Sharing BuildArch: x86_64 Source: https://github.com/tmate-io/tmate/archive/%{version}.tar.gz BuildRequires: openssl-devel zlib-devel ruby ncurses-devel libssh-devel msgpack-devel libevent-devel Requires: openssl zlib ncurses libssh msgpack libevent %description Tmate is a fork of tmux. It provides an instant pairing solution. %prep %setup -n tmate-%{version} %build ./autogen.sh ./configure --prefix=/usr make %install make install DESTDIR=%buildroot %clean rm -rf %buildroot %files %defattr(-,root,root,-) %{_bindir}/%{name} %{_mandir}/man1/%{name}.1.gz %changelog * Thu Oct 18 2018 Bernd Helmle 2.2.1-1 - Upstream release 2.2.1 ================================================ FILE: sql-exporter/SAMPLE.yml ================================================ # Files here named *.yml will be used by update-prometheus-sql-exporter-config # to update the prometheus-sql-exporter config in /var/run/postgresql/. # Queries with scope 'cluster' are executed once per cluster (via the # 'postgres' database). Scope 'database' queries are executed in each database. # Example: #- name: "application_hits" # help: "Web hits in our application" # scope: database # or 'cluster' # min_version: 9.0 # max_version: 9.6 # # alternative: version: 10 # run only on this version (regexp) # cluster: main # run only on this cluster (regexp) # database: appdb # run only on this database (regexp) # interval: 1h # only run the query every $interval (default: run synchronously) # labels: # - "username" # values: # - "count" # query: | # SELECT username, count(*) FROM applog GROUP BY username ================================================ FILE: sql-exporter/activity.yml ================================================ # queries run once per cluster (via the 'postgres' database) - name: "running_queries" help: "number of running queries" scope: cluster labels: - "datname" - "usename" values: - "count" query: >- SELECT datname::text, usename::text, COUNT(*)::float AS count FROM pg_stat_activity WHERE NOT datname ~ '^template(0|1)$' AND usename IS NOT NULL GROUP BY datname, usename - name: "pg_stat_activity" help: "running backends by database and state" scope: cluster labels: - "datname" - "state" values: - "count" - "max_tx_duration" query: >- SELECT pg_database.datname::text, states.state::text, COALESCE(count, 0) as count, COALESCE(max_tx_duration, 0) as max_tx_duration FROM (VALUES ('active'), ('waiting'), ('idle'), ('idle in transaction'), ('idle in transaction (aborted)'), ('fastpath function call'), ('disabled')) AS states(state) CROSS JOIN pg_database LEFT JOIN (SELECT datname, CASE WHEN state = 'active' AND wait_event_type = 'Lock' THEN 'waiting' ELSE state END AS state, count(*) AS count, MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration FROM pg_stat_activity WHERE backend_type = 'client backend' AND pid != pg_backend_pid() GROUP BY 1, 2) AS act ON states.state = act.state AND pg_database.datname = act.datname WHERE NOT pg_database.datname ~ '^template(0|1)$' ================================================ FILE: sql-exporter/elephant-shed.conf ================================================ [Unit] # forget default config file location ConditionPathExists= # wait for PostgreSQL to start up After=postgresql.service # If no PostgreSQL cluster exist, we dont't start. ConditionPathExistsGlob=/etc/postgresql/*/*/postgresql.conf [Service] # before starting the sql exporter, generate config (as root) PermissionsStartOnly=true ExecStartPre=/usr/bin/update-prometheus-sql-exporter-config /etc/prometheus-sql-exporter /var/run/postgresql/prometheus-sql-exporter.yml Environment="ARGS=-config.file /var/run/postgresql/prometheus-sql-exporter.yml" Environment="GOMAXPROCS=1" ================================================ FILE: sql-exporter/io.yml ================================================ # queries run once per cluster (via the 'postgres' database) - name: "pg_stat_io" help: "I/O statistics" scope: cluster min_version: 16 type: "counter" labels: - "backend_type" - "object" - "context" values: - "reads" - "read_time" - "writes" - "write_time" - "writebacks" - "writeback_time" - "extends" - "extend_time" - "hits" - "evictions" - "reuses" - "fsyncs" - "fsync_time" query: >- SELECT backend_type::text, object::text, context::text, COALESCE(reads::float, 0) AS reads, COALESCE(read_time::float, 0) AS read_time, COALESCE(writes::float, 0) AS writes, COALESCE(write_time::float, 0) AS write_time, COALESCE(writebacks::float, 0) AS writebacks, COALESCE(writeback_time::float, 0) AS writeback_time, COALESCE(extends::float, 0) AS extends, COALESCE(extend_time::float, 0) AS extend_time, COALESCE(hits::float, 0) AS hits, COALESCE(evictions::float, 0) AS evictions, COALESCE(reuses::float, 0) AS reuses, COALESCE(fsyncs::float, 0) AS fsyncs, COALESCE(fsync_time::float, 0) AS fsync_time FROM pg_stat_io WHERE backend_type NOT IN ('autovacuum launcher', 'startup', 'standalone backend'); ================================================ FILE: sql-exporter/prometheus-sql-exporter-restart.service ================================================ [Unit] Description=Restart prometheus-sql-exporter to update config based on clusters and databases present on the system [Service] Type=oneshot ExecStart=/bin/systemctl try-restart prometheus-sql-exporter.service [Install] # when enabled, start along with postgresql WantedBy=postgresql.service ================================================ FILE: sql-exporter/prometheus-sql-exporter-restart.timer ================================================ [Unit] Description=Restart prometheus-sql-exporter every 10 minutes to update config based on clusters and databases present on the system [Timer] OnCalendar=*:0/10 [Install] WantedBy=multi-user.target ================================================ FILE: sql-exporter/queries.yml ================================================ # queries run once per cluster (via the 'postgres' database) - name: "server" help: "Server version, role and start time" scope: cluster labels: - "server_version" values: - "server_start_time" - "server_is_primary" - "server_is_replica" query: >- SELECT split_part(current_setting('server_version'), ' ', 1) AS server_version, extract(epoch FROM pg_postmaster_start_time())::float AS server_start_time, CASE WHEN pg_is_in_recovery() THEN 0 ELSE 1 END AS server_is_primary, CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE 1 END AS server_is_replica - name: "settings" help: "PostgreSQL settings" scope: cluster min_version: 9.5 max_version: 10 labels: - "settings" values: - "data_checksums" - "max_connections" - "autovacuum_freeze_max_age" - "superuser_reserved_connections" - "max_wal_senders" - "max_prepared_transactions" - "max_worker_processes" - "random_page_cost" - "seq_page_cost" - "checkpoint_timeout" - "work_mem" - "maintenance_work_mem" - "shared_buffers" - "effective_cache_size" - "max_wal_size" query: >- SELECT current_setting('data_checksums')::bool::int::float AS data_checksums, current_setting('max_connections')::float AS max_connections, current_setting('autovacuum_freeze_max_age')::float AS autovacuum_freeze_max_age, current_setting('superuser_reserved_connections')::float AS superuser_reserved_connections, current_setting('max_wal_senders')::float AS max_wal_senders, current_setting('max_prepared_transactions')::float AS max_prepared_transactions, current_setting('max_worker_processes')::float AS max_worker_processes, current_setting('random_page_cost')::float AS random_page_cost, current_setting('seq_page_cost')::float AS seq_page_cost, (SELECT setting FROM pg_settings WHERE name = 'checkpoint_timeout') AS checkpoint_timeout, pg_size_bytes(current_setting('work_mem'))::float AS work_mem, pg_size_bytes(current_setting('maintenance_work_mem'))::float AS maintenance_work_mem, pg_size_bytes(current_setting('shared_buffers'))::float AS shared_buffers, pg_size_bytes(current_setting('effective_cache_size'))::float AS effective_cache_size, pg_size_bytes(current_setting('max_wal_size'))::float AS max_wal_size - name: "settings" help: "PostgreSQL settings" scope: cluster min_version: 11 labels: - "settings" values: - "data_checksums" - "jit" - "max_connections" - "autovacuum_freeze_max_age" - "superuser_reserved_connections" - "max_wal_senders" - "max_prepared_transactions" - "max_worker_processes" - "random_page_cost" - "seq_page_cost" - "checkpoint_timeout" - "work_mem" - "maintenance_work_mem" - "shared_buffers" - "effective_cache_size" - "max_wal_size" query: >- SELECT current_setting('data_checksums')::bool::int::float AS data_checksums, current_setting('jit')::bool::int::float AS jit, current_setting('max_connections')::float AS max_connections, current_setting('autovacuum_freeze_max_age')::float AS autovacuum_freeze_max_age, current_setting('superuser_reserved_connections')::float AS superuser_reserved_connections, current_setting('max_wal_senders')::float AS max_wal_senders, current_setting('max_prepared_transactions')::float AS max_prepared_transactions, current_setting('max_worker_processes')::float AS max_worker_processes, current_setting('random_page_cost')::float AS random_page_cost, current_setting('seq_page_cost')::float AS seq_page_cost, (SELECT setting FROM pg_settings WHERE name = 'checkpoint_timeout') AS checkpoint_timeout, pg_size_bytes(current_setting('work_mem'))::float AS work_mem, pg_size_bytes(current_setting('maintenance_work_mem'))::float AS maintenance_work_mem, pg_size_bytes(current_setting('shared_buffers'))::float AS shared_buffers, pg_size_bytes(current_setting('effective_cache_size'))::float AS effective_cache_size, pg_size_bytes(current_setting('max_wal_size'))::float AS max_wal_size - name: "pg_locks" help: "locks held" scope: cluster labels: - "datname" - "mode" values: - "count" query: >- SELECT datname, t.mode, count(l.mode) FROM pg_database d CROSS JOIN (VALUES ('AccessShareLock'), ('RowShareLock'), ('RowExclusiveLock'), ('ShareUpdateExclusiveLock'), ('ShareLock'), ('ShareRowExclusiveLock'), ('ExclusiveLock'), ('AccessExclusiveLock')) t(mode) LEFT JOIN pg_locks l ON d.oid = l.database AND t.mode = l.mode AND l.pid <> pg_backend_pid() WHERE datname !~ '^template(0|1)$' GROUP BY 1, 2 - name: "pg_stat_database" help: "database statistics" scope: cluster min_version: 9.2 max_version: 11 type: "counter" labels: - "datname" values: - "numbackends" - "xact_commit" - "xact_rollback" - "blks_read" - "blks_hit" - "tup_returned" - "tup_fetched" - "tup_inserted" - "tup_updated" - "tup_deleted" - "conflicts" - "temp_files" - "temp_bytes" - "deadlocks" - "blk_read_time" - "blk_write_time" - "freeze_age" - "dbsize" query: >- SELECT s.datname::text, numbackends::float, xact_commit::float, xact_rollback::float, blks_read::float, blks_hit::float, tup_returned::float, tup_fetched::float, tup_inserted::float, tup_updated::float, tup_deleted::float, conflicts::float, temp_files::float, temp_bytes::float, deadlocks::float, blk_read_time, blk_write_time, age(d.datfrozenxid) AS freeze_age, pg_database_size(s.datname)::float AS dbsize FROM pg_stat_database s LEFT JOIN pg_database d ON d.datname = s.datname WHERE NOT s.datname ~ '^template(0|1)$' - name: "pg_stat_database" help: "database statistics" scope: cluster min_version: 12 type: "counter" labels: - "datname" values: - "numbackends" - "xact_commit" - "xact_rollback" - "blks_read" - "blks_hit" - "tup_returned" - "tup_fetched" - "tup_inserted" - "tup_updated" - "tup_deleted" - "conflicts" - "temp_files" - "temp_bytes" - "deadlocks" - "blk_read_time" - "blk_write_time" - "freeze_age" - "checksum_failures" - "dbsize" query: >- SELECT s.datname::text, numbackends::float, xact_commit::float, xact_rollback::float, blks_read::float, blks_hit::float, tup_returned::float, tup_fetched::float, tup_inserted::float, tup_updated::float, tup_deleted::float, conflicts::float, temp_files::float, temp_bytes::float, deadlocks::float, blk_read_time, blk_write_time, age(d.datfrozenxid) AS freeze_age, coalesce(checksum_failures::float, 0) AS checksum_failures, pg_database_size(s.datname)::float AS dbsize FROM pg_stat_database s LEFT JOIN pg_database d ON d.datname = s.datname WHERE NOT s.datname ~ '^template(0|1)$' - name: "pg_stat_statements" help: "statement statistics" scope: cluster min_version: 9.2 max_version: 12 labels: - "usename" - "datname" - "queryid" - "query" values: - "calls" - "total_time" - "rows" - "shared_blks_hit" - "shared_blks_read" - "shared_blks_dirtied" - "shared_blks_written" - "local_blks_hit" - "local_blks_read" - "local_blks_dirtied" - "local_blks_written" - "temp_blks_read" - "temp_blks_written" query: >- WITH w_pg_stat_statements AS ( SELECT * FROM pg_stat_statements) (SELECT usename::text, datname::text, queryid::text, substr(regexp_replace(query, E'[\\n\\r]+', ' ', 'g' ), 1, 1024) AS query, calls, total_time, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written FROM w_pg_stat_statements pss JOIN pg_database pd ON pss.dbid = pd.oid JOIN pg_user pu ON pss.userid = pu.usesysid ORDER BY pss.total_time DESC LIMIT 25) UNION (SELECT usename::text, datname::text, queryid::text, substr(regexp_replace(query, E'[\\n\\r]+', ' ', 'g' ), 1, 1024) AS query, calls, total_time, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written FROM w_pg_stat_statements pss2 JOIN pg_database pd2 ON pss2.dbid = pd2.oid JOIN pg_user pu2 ON pss2.userid = pu2.usesysid ORDER BY calls DESC LIMIT 25) - name: "pg_stat_statements" help: "statement statistics" scope: cluster min_version: 13 labels: - "usename" - "datname" - "queryid" - "query" values: - "calls" - "total_time" - "total_plan_time" - "total_exec_time" - "rows" - "shared_blks_hit" - "shared_blks_read" - "shared_blks_dirtied" - "shared_blks_written" - "local_blks_hit" - "local_blks_read" - "local_blks_dirtied" - "local_blks_written" - "temp_blks_read" - "temp_blks_written" query: >- WITH w_pg_stat_statements AS ( SELECT * FROM pg_stat_statements) (SELECT usename::text, datname::text, queryid::text, substr(regexp_replace(query, E'[\\n\\r]+', ' ', 'g' ), 1, 1024) AS query, calls, total_plan_time + total_exec_time as total_time, total_plan_time, total_exec_time, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written FROM w_pg_stat_statements pss JOIN pg_database pd ON pss.dbid = pd.oid JOIN pg_user pu ON pss.userid = pu.usesysid ORDER BY 6 DESC LIMIT 25) UNION (SELECT usename::text, datname::text, queryid::text, substr(regexp_replace(query, E'[\\n\\r]+', ' ', 'g' ), 1, 1024) AS query, calls, total_plan_time + total_exec_time as total_time, total_plan_time, total_exec_time, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written FROM w_pg_stat_statements pss2 JOIN pg_database pd2 ON pss2.dbid = pd2.oid JOIN pg_user pu2 ON pss2.userid = pu2.usesysid ORDER BY calls DESC LIMIT 25) - name: "txid" help: "current txid" scope: cluster values: - "txid_current" query: SELECT CASE WHEN pg_is_in_recovery() THEN 0 ELSE txid_current() END - name: "prepared_transactions" help: "prepared transactions" scope: cluster labels: - "datname" values: - "count" query: >- SELECT datname::text, COUNT(transaction) AS count FROM pg_database d LEFT JOIN pg_prepared_xacts x ON d.datname = x.database WHERE NOT d.datname ~ '^template(0|1)$' GROUP BY datname # queries run for each database (except template0/template1) - name: "pg_stat_user_tables" help: "table statistics" scope: database labels: - "datname" - "schemaname" - "relname" values: - "seq_scan" - "seq_tup_read" - "idx_scan" - "idx_tup_fetch" - "n_tup_ins" - "n_tup_upd" - "n_tup_del" - "n_tup_hot_upd" - "n_live_tup" - "n_dead_tup" - "vacuum_count" - "autovacuum_count" - "analyze_count" - "autoanalyze_count" query: >- SELECT current_database()::text AS datname, COALESCE(schemaname::text, 'null') AS schemaname, COALESCE(relname::text, 'null') AS relname, COALESCE(seq_scan, 0)::float AS seq_scan, COALESCE(seq_tup_read, 0)::float AS seq_tup_read, COALESCE(idx_scan, 0)::float AS idx_scan, COALESCE(idx_tup_fetch, 0)::float AS idx_tup_fetch, COALESCE(n_tup_ins, 0)::float AS n_tup_ins, COALESCE(n_tup_upd, 0)::float AS n_tup_upd, COALESCE(n_tup_del, 0)::float AS n_tup_del, COALESCE(n_tup_hot_upd, 0)::float AS n_tup_hot_upd, COALESCE(n_live_tup, 0)::float AS n_live_tup, COALESCE(n_dead_tup, 0)::float AS n_dead_tup, COALESCE(vacuum_count, 0)::float AS vacuum_count, COALESCE(autovacuum_count, 0)::float AS autovacuum_count, COALESCE(analyze_count, 0)::float AS analyze_count, COALESCE(autoanalyze_count, 0)::float AS autoanalyze_count FROM pg_stat_user_tables FULL JOIN (VALUES(0)) filler(i) ON TRUE ORDER BY n_live_tup DESC LIMIT 1000 - name: "pg_statio_user_tables" help: "IO statistics" scope: database labels: - "datname" - "schemaname" - "relname" values: - "heap_blks_read" - "heap_blks_hit" - "idx_blks_read" - "idx_blks_hit" query: >- SELECT current_database()::text AS datname, COALESCE(schemaname::text, 'null') AS schemaname, COALESCE(relname::text, 'null') AS relname, COALESCE(heap_blks_read::float, 0) AS heap_blks_read, COALESCE(heap_blks_hit::float, 0) AS heap_blks_hit, COALESCE(idx_blks_read::float, 0) AS idx_blks_read, COALESCE(idx_blks_hit::float, 0) AS idx_blks_hit FROM pg_statio_user_tables FULL JOIN (VALUES(0)) filler(i) ON TRUE ORDER BY 6 DESC LIMIT 1000 - name: "BufferAccess" help: "buffer access statistics" scope: database labels: - "datname" - "schemaname" - "relname" values: - "reads" query: >- SELECT current_database()::text AS datname, COALESCE(schemaname::text, 'null') AS schemaname, COALESCE(relname::text, 'null') AS relname, SUM(COALESCE(heap_blks_read, 0) + COALESCE(heap_blks_hit, 0) + COALESCE(idx_blks_hit, 0) + COALESCE(idx_blks_read, 0) + COALESCE(toast_blks_hit, 0) + COALESCE(toast_blks_read, 0) + COALESCE(tidx_blks_hit, 0) + COALESCE(tidx_blks_read, 0)) * 8192::bigint as reads FROM pg_statio_user_tables FULL JOIN (VALUES(0)) filler(i) ON TRUE GROUP BY 1, 2, 3 ORDER BY 3 DESC,2,1 LIMIT 1000 - name: "Maintenancecounters" help: "table maintenance job counters" scope: database labels: - "datname" values: - "vacuum_count" - "autovacuum_count" - "analyze_count" - "autoanalyze_count" query: >- SELECT current_database()::text AS datname, COALESCE(SUM(vacuum_count), 0) vacuum_count, COALESCE(SUM(autovacuum_count), 0) autovacuum_count, COALESCE(SUM(analyze_count), 0) analyze_count, COALESCE(SUM(autoanalyze_count), 0) autoanalyze_count FROM pg_stat_user_tables ORDER BY 1,2,3 LIMIT 1000 ================================================ FILE: sql-exporter/replication.yml ================================================ # queries run once per cluster (via the 'postgres' database) - name: "archive_ready" help: "number of WAL files waiting to be archived" scope: cluster min_version: 10 values: - "archive_ready" query: SELECT COUNT(*) AS archive_ready FROM pg_ls_dir('pg_wal/archive_status') WHERE pg_ls_dir ~ '^[0-9a-fA-F]{24}\.ready$' - name: "pg_stat_archiver" help: "archiver statistics" scope: cluster values: - "archived_count" - "failed_count" query: SELECT archived_count, failed_count FROM pg_stat_archiver - name: "pg_stat_replication" help: "replication statistics" scope: cluster min_version: 10 labels: - "application_name" - "pid" values: - "send_lag_bytes" - "flush_lag_bytes" - "replay_lag_bytes" query: >- SELECT COALESCE(application_name, '')::text AS application_name, COALESCE(pid, 0)::text AS pid, COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_receive_lsn() ELSE pg_current_wal_lsn() END, sent_lsn), 0) AS send_lag_bytes, COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_receive_lsn() ELSE pg_current_wal_lsn() END, flush_lsn), 0) AS flush_lag_bytes, COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_receive_lsn() ELSE pg_current_wal_lsn() END, replay_lsn), 0) AS replay_lag_bytes FROM pg_stat_replication FULL JOIN (VALUES(0)) filler(i) ON TRUE ================================================ FILE: sql-exporter/update-prometheus-sql-exporter-config ================================================ #!/usr/bin/perl use strict; use warnings; use PgCommon; use Storable qw(dclone); use YAML; # from list of queries, remove all that do not satisfy the scope/version/cluster/database constraints sub filter_queries($$$$;$) { my ($queries, $scope, $version, $cluster, $database) = @_; my @result; foreach my $query (@$queries) { $query->{scope} //= 'database'; die "scope '$query->{scope}' must be either 'cluster' or 'database'" unless ($query->{scope} =~ /^(cluster|database)$/); next if $query->{scope} ne $scope; next if $query->{min_version} and $version < $query->{min_version}; next if $query->{max_version} and $version > $query->{max_version}; next if $query->{version} and $version !~ /$query->{version}/; next if $query->{cluster} and $cluster !~ /$query->{cluster}/; next if $query->{database} and $database !~ /$query->{database}/; my $q = dclone($query); delete $q->{scope}; delete $q->{min_version}; delete $q->{max_version}; delete $q->{version}; delete $q->{cluster}; delete $q->{database}; push @result, $q; } return \@result; } # Generate jobs from the given queries. One job is created for all the queries # with null interval, and one job is created per query with non-null interval. sub push_queries_as_jobs($$$) { my ($jobs, $queries, $job_settings) = @_; my @instant; foreach my $query (@$queries) { if (!$query->{interval}) { push @instant, $query; } else { # Generate separate job for query with non-null interval my $job_settings_query = dclone($job_settings); $job_settings_query->{interval} = $query->{interval}; $job_settings_query->{name} .= '/' . $query->{name}; my $q = dclone($query); delete $q->{interval}; $job_settings_query->{queries} = [$q]; push @$jobs, $job_settings_query; } } if (@instant) { my $instant_job = dclone($job_settings); # Debian switched to upstream sql-exporter which does not have synchronous # jobs yet, so interval cannot be 0 for instant, or else sql-exporter would # run the queries non-stop. Our default Prometheus scrape interval is 300s, # so set this to 150s for now. $instant_job->{interval} = '150s'; $instant_job->{queries} = \@instant; push @$jobs, $instant_job; } } my $queries_directory = $ARGV[0] // die "No directory for *.yml query files specified"; my $output_yaml = $ARGV[1] // die "No output yaml file specified"; # load all *.yml files from input directory and collect the contained query lists my $queries; foreach my $yml (glob "$queries_directory/*.yml") { my $q; eval { $q = YAML::LoadFile($yml); }; die "Error loading $yml: $@" if ($@); next if (ref($q) eq ''); # file is empty die "$yml is not a yaml list:" . ref($q) unless (ref($q) eq 'ARRAY'); push @$queries, @$q; } # walk all clusters and databases and produce jobs for them my $jobs = []; foreach my $version (get_versions()) { foreach my $cluster (get_version_clusters($version)) { my %info = cluster_info($version, $cluster); next unless $info{running}; # cluster is down, skip it my $owner = (getpwuid $info{owneruid})[0] // die "Could not determine owner name of cluster $version $cluster"; my $socket = get_cluster_socketdir($version, $cluster); # jobs for cluster-wide queries push_queries_as_jobs $jobs, filter_queries($queries, 'cluster', $version, $cluster), { connections => [ "postgres://$owner\@:$info{port}/postgres?sslmode=disable&host=$socket" ], name => "$version/$cluster", }; # jobs for per-database queries my @cluster_databases = get_cluster_databases($version, $cluster); foreach my $database (grep { $_ and $_ !~ /^template[01]$/ } @cluster_databases) { push_queries_as_jobs $jobs, filter_queries($queries, 'database', $version, $cluster, $database), { connections => [ "postgres://$owner\@:$info{port}/$database?sslmode=disable&host=$socket" ], name => "$version/$cluster/$database", }; } # create pg_stat_statements in "postgres" database if missing next if $info{recovery}; # cluster is in recovery mode, skip it open PSQL, "|-", "su -c 'psql -q -h $socket -p $info{port} postgres' $owner"; print PSQL "DO \$\$DECLARE ext name; BEGIN SELECT INTO ext extname FROM pg_extension WHERE extname = 'pg_stat_statements'; IF NOT FOUND THEN RAISE NOTICE 'Creating pg_stat_statements extension in cluster $version/$cluster'; CREATE EXTENSION pg_stat_statements; END IF; END\$\$ LANGUAGE plpgsql;\n"; close PSQL; } } # write output yml YAML::DumpFile($output_yaml, { jobs => $jobs }); ================================================ FILE: sql-exporter/wal.yml ================================================ # queries run once per cluster (via the 'postgres' database) - name: "checkpoints" help: "requested/timed checkpoints and checkpoint buffers" scope: cluster values: - "timed" - "requested" - "buffers" max_version: 16 query: >- SELECT pg_stat_get_bgwriter_timed_checkpoints() timed, pg_stat_get_bgwriter_requested_checkpoints() requested, pg_stat_get_bgwriter_buf_written_checkpoints() buffers - name: "checkpoints" help: "requested/timed checkpoints and checkpoint buffers" scope: cluster values: - "timed" - "requested" - "buffers" min_version: 17 query: >- SELECT pg_stat_get_checkpointer_num_timed() timed, pg_stat_get_checkpointer_num_requested() requested, pg_stat_get_checkpointer_buffers_written() buffers - name: "LastCheckpointDistance" help: "distance to the last checkpoint" scope: cluster min_version: 10 values: - "distance" query: SELECT pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_receive_lsn() ELSE pg_current_wal_lsn() END, checkpoint_lsn) "distance" FROM pg_control_checkpoint() - name: "waldistance" help: "amount of WAL written" scope: cluster min_version: 10 values: - "location" query: SELECT pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_receive_lsn() ELSE pg_current_wal_lsn() END, '0/0') "location" - name: "pg_stat_wal" help: "WAL statistics" scope: cluster min_version: 14 max_version: 17 values: - "wal_records" - "wal_fpi" - "wal_bytes" - "wal_buffers_full" - "wal_write" - "wal_sync" - "wal_write_time" - "wal_sync_time" query: >- SELECT wal_records::float, wal_fpi::float, wal_bytes::float, wal_buffers_full::float, wal_write::float, wal_sync::float, wal_write_time::float, wal_sync_time::float FROM pg_stat_wal; - name: "pg_stat_wal" help: "WAL statistics" scope: cluster min_version: 18 values: - "wal_records" - "wal_fpi" - "wal_bytes" - "wal_buffers_full" - "wal_write" - "wal_sync" - "wal_write_time" - "wal_sync_time" query: >- WITH io AS (SELECT coalesce(SUM(io.writes), 0) AS wal_write, coalesce(SUM(io.fsyncs), 0) AS wal_sync,coalesce(SUM(io.write_time), 0) AS wal_write_time, coalesce(SUM(io.fsync_time), 0) AS wal_sync_time FROM pg_stat_io io WHERE io.object = 'wal') SELECT w.wal_records::float, w.wal_fpi::float, w.wal_bytes::float, w.wal_buffers_full::float, io.wal_write::float, io.wal_sync::float, io.wal_write_time::float, io.wal_sync_time::float FROM pg_stat_wal w, io; ================================================ FILE: tmate/tmate.conf ================================================ # credativ tmate.conf # This settings configure tmate to use tmate.credativ.com set -g tmate-server-host "tmate.credativ.com" set -g tmate-server-port 10022 # tmate >= 2.4: set -g tmate-server-rsa-fingerprint "SHA256:a6ZNpEpVHviZ14uIb1qpJi1TOO4p+RWnn/1fPHM75Sg" # tmate <= 2.2: #set -g tmate-server-rsa-fingerprint "d2:cb:4b:24:28:cf:32:54:00:43:15:fa:db:ca:79:67" #set -g tmate-server-ecdsa-fingerprint "0e:84:4e:0d:2f:76:11:d9:5c:00:ea:6a:d1:97:c0:e3" set -g tmate-identity "" # Can be specified to use a different SSH key. #set -g pane-border-fg black #set -g pane-active-border-fg brightred # make tmux display things in 256 colors set -g default-terminal "screen-256color" # set scrollback history set -g history-limit 10000 ## Status bar design ## status line set -g status-bg default set -g status-fg brightred set -g status-interval 2 # messaging set -g message-fg white set -g message-bg brightred set -g message-command-fg blue set -g message-command-bg black # show host name and IP address on left side of status bar set -g status-left-length 70 set -g status-left "tmate.credativ.com #[fg=green]#h " ================================================ FILE: vagrant/.gitignore ================================================ .vagrant *.retry fact_cache/ *.box ================================================ FILE: vagrant/Vagrantfile ================================================ # -*- mode: ruby -*- # vi: set ft=ruby : # All Vagrant configuration is done below. The "2" in Vagrant.configure # configures the configuration version (we support older styles for # backwards compatibility). Please don't change it unless you know what # you're doing. Vagrant.configure("2") do |config| # The most common configuration options are documented and commented below. # For a complete reference, please see the online documentation at # https://docs.vagrantup.com. # Every Vagrant development environment requires a box. You can search for # boxes at https://atlas.hashicorp.com/search. config.vm.box = "debian/bookworm64" # Disable automatic box update checking. If you disable this, then # boxes will only be checked for updates when the user runs # `vagrant box outdated`. This is not recommended. # config.vm.box_check_update = false # Create a forwarded port mapping which allows access to a specific port # within the machine from a port on the host machine. In the example below, # accessing "localhost:8080" will access port 80 on the guest machine. config.vm.network "forwarded_port", guest: 80, host: 8080 config.vm.network "forwarded_port", guest: 443, host: 4433 config.vm.network "forwarded_port", guest: 3000, host: 8730 config.vm.network "forwarded_port", guest: 5432, host: 8732 config.vm.network "forwarded_port", guest: 9090, host: 8790 config.vm.network "forwarded_port", guest: 9100, host: 8791 config.vm.network "forwarded_port", guest: 4200, host: 4200 config.vm.network "forwarded_port", guest: 10090, host: 10090 # Create a private network, which allows host-only access to the machine # using a specific IP. # config.vm.network "private_network", ip: "192.168.33.10" # Create a public network, which generally matched to bridged network. # Bridged networks make the machine appear as another physical device on # your network. # config.vm.network "public_network" # Share an additional folder to the guest VM. The first argument is # the path on the host to the actual folder. The second argument is # the path on the guest to mount the folder. And the optional third # argument is a set of non-required options. config.vm.synced_folder "../", "/elephant-shed", type: "rsync", disabled: true config.vm.synced_folder "./", "/vagrant", type: "rsync", disabled: true # Provider-specific configuration so you can fine-tune various # backing providers for Vagrant. These expose provider-specific options. # Example for VirtualBox: # config.vm.provider "virtualbox" do |vb| # Display the VirtualBox GUI when booting the machine #vb.gui = true # Customize the amount of memory on the VM: vb.memory = "4096" vb.cpus = 6 end config.vm.provider "libvirt" do |lv| # Customize the amount of memory on the VM: lv.memory = "4096" lv.cpus = 2 end # # View the documentation for the provider you are using for more # information on available options. # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies # such as FTP and Heroku are also available. See the documentation at # https://docs.vagrantup.com/v2/push/atlas.html for more information. # config.push.define "atlas" do |push| # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME" # end # Enable provisioning with a shell script. Additional provisioners such as # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the # documentation for more information about their specific syntax and use. # config.vm.provision "shell", inline: <<-SHELL # ./elephant-shed/provision.sh # SHELL config.vm.provision "ansible" do |ansible| ansible.playbook = "elephant-shed.yml" ansible.groups = { #"dev" => ["default"], "ansible" => ["default"], } end end ================================================ FILE: vagrant/Vagrantfile.template ================================================ # -*- mode: ruby -*- # vi: set ft=ruby : Vagrant.configure("2") do |config| # The most common configuration options are documented and commented below. # For a complete reference, please see the online documentation at # https://docs.vagrantup.com. # Create a forwarded port mapping which allows access to a specific port # within the machine from a port on the host machine. config.vm.network "forwarded_port", guest: 443, host: 4433 config.vm.network "forwarded_port", guest: 5432, host: 55432 # Share an additional folder to the guest VM. The first argument is # the path on the host to the actual folder. The second argument is # the path on the guest to mount the folder. And the optional third # argument is a set of non-required options. config.vm.synced_folder "./", "/vagrant", type: "rsync", disabled: true # Provider-specific configuration so you can fine-tune various # backing providers for Vagrant. These expose provider-specific options. # Example for VirtualBox: # config.vm.provider "virtualbox" do |vb| # Display the VirtualBox GUI when booting the machine #vb.gui = true # Customize the amount of memory on the VM: vb.memory = "4096" vb.cpus = 4 end config.vm.provider "libvirt" do |lv| # Customize the amount of memory on the VM: lv.memory = "4096" lv.cpus = 4 end config.vm.post_up_message = <<-EOM Welcome to the Elephant Shed! https://elephant-shed.io/ Default user: admin Default pass: admin The web interface is at https://localhost:4433/ PostgreSQL is listening on port 55432: psql -h localhost -p 55432 -U admin EOM end ================================================ FILE: vagrant/ansible.cfg ================================================ [defaults] inventory = inventory nocows = True interpreter_python=/usr/bin/python3 ansible_python_interpreter=/usr/bin/python3 gathering = smart fact_caching = jsonfile fact_caching_connection = fact_cache # Fix Failed to set permissions on the temporary files allow_world_readable_tmpfiles = True [ssh_connection] pipelining = True ================================================ FILE: vagrant/elephant-shed.yml ================================================ #!/usr/bin/ansible-playbook # Copyright © 2017-2019 credativ GmbH # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. --- - name: elephant-shed hosts: all become: true handlers: - include: handlers.yml tasks: - name: Update package cache apt: update_cache: yes - name: Install pre dependencies package: name: '{{ item }}' state: present with_items: - apt-transport-https - gpg - etckeeper - lvm2 # Update etckeeper - name: Record changes found in /etc as etckeeper commit shell: "if etckeeper unclean; then etckeeper commit 'Changes FOUND by elephant-shed.yml (ansible)'; fi" changed_when: False - name: Partitioning include: tasks/partitioning.yml tags: partitioning when: partitioning == true - name: Setup local repository include: tasks/repository-local.yml tags: repository-local when: use_local_repo == true - name: Remove local repository apt_repository: repo: '{{ local_repo }}' filename: 'local' state: absent when: use_local_repo != true - include: tasks/grafana.yml tags: packages - include: tasks/packages.yml tags: packages - include: tasks/config-system.yml tags: config - include: tasks/ferm.yml tags: - ferm - firewall when: ansible_virtualization_type != "lxc" - include: tasks/apache2.yml tags: apache - include: tasks/postgresql.yml tags: postgresql - include: tasks/config-backup.yml tags: - config - backup # Update etckeeper - name: Record changes done in /etc as etckeeper commit shell: "if etckeeper unclean; then etckeeper commit 'Changes DONE by elephant-shed.yml (ansible)'; fi" changed_when: False ================================================ FILE: vagrant/files/git/gitconfig ================================================ # This is Git's per-user configuration file. [user] # Please adapt and uncomment the following lines: # name = root # email = root@debian ================================================ FILE: vagrant/files/grafana-key.10458545.asc ================================================ -----BEGIN PGP PUBLIC KEY BLOCK----- mQGNBGTnhmkBDADUE+SzjRRyitIm1siGxiHlIlnn6KO4C4GfEuV+PNzqxvwYO+1r mcKlGDU0ugo8ohXruAOC77Kwc4keVGNU89BeHvrYbIftz/yxEneuPsCbGnbDMIyC k44UOetRtV9/59Gj5YjNqnsZCr+e5D/JfrHUJTTwKLv88A9eHKxskrlZr7Un7j3i Ef3NChlOh2Zk9Wfk8IhAqMMTferU4iTIhQk+5fanShtXIuzBaxU3lkzFSG7VuAH4 CBLPWitKRMn5oqXUE0FZbRYL/6Qz0Gt6YCJsZbaQ3Am7FCwWCp9+ZHbR9yU+bkK0 Dts4PNx4Wr9CktHIvbypT4Lk2oJEPWjcCJQHqpPQZXbnclXRlK5Ea0NVpaQdGK+v JS4HGxFFjSkvTKAZYgwOk93qlpFeDML3TuSgWxuw4NIDitvewudnaWzfl9tDIoVS Bb16nwJ8bMDzovC/RBE14rRKYtMLmBsRzGYHWd0NnX+FitAS9uURHuFxghv9GFPh eTaXvc4glM94HBUAEQEAAbQmR3JhZmFuYSBMYWJzIDxlbmdpbmVlcmluZ0BncmFm YW5hLmNvbT6JAdQEEwEKAD4CGwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AWIQS1 Oud7rbYwpoMEYAWWP6J3EEWFRQUCaKhvPQUJB4NP1AAKCRCWP6J3EEWFRUjOC/9Y dWOWJLJVKzLx8uv5YVzebyw15HevhKahbznJX5fHnE8irjkiPFltVEZ4T37s5afR GBEJnR1UFd80s7jzwbuoZh/zEB3jN8q50g64AznuzDa0PWKzaY7Tgkssx3+hs6TS vIwV4z8T7f56lDudeHxHXx+htRnZ3ebKNPCJS7+G12GF6W3C3znpdjgvhVUB0uxd +42V0fRqk2GLNZeKS9988fi5dYRAy9Ozwced7ByCFjde9FBgUtrH3mG1/ibzLEh0 4k02nYjc8mrH32t4UCWpxQEJ1vZA2vT2HN3/cH/4uyFdyU6OHkMyMbz6lmeXe71d F5hOB4+/RP6Ndyj7ViRNDbm70NRBaFne/+YOJvmMfJTCh7YbF5qEn1ihGkJJ0ohE u2IB+EGEhyiDm8SIsj1uMw7n17iIPNtbsU5GgnmLtfguP/WbwKV2UeuxTpiOeYb6 blDwRlh48uHMlA5HBW+487Jktw3iPj1IKhdtAC9CU3xAvzDcseMbgmM6Xj2bSQG5 AY0EZOeGaQEMALNIFUricEIwtZiX7vSDjwxobbqPKqzdek8x3ud0CyYlrbGHy0k+ FDEXstjJQQ1s9rjJSu3sv5wyg9GDAUH3nzO976n/ZZvKPti3p2XU2UFx5gYkaaFV D56yYxqGY0YU5ft6BG+RUz3iEPg3UBUzt0sCIYnG9+CsDqGOnRYIIa46fu2/H9Vu 8JvvSq9xbsK9CfoQDkIcoQOixPuI4P7eHtswCeYR/1LUTWEnYQWsBCf57cEpzR6t 7mlQnzQo9z4i/kp4S0ybDB77wnn+isMADOS+/VpXO+M7Zj5tpfJ6PkKch3SGXdUy 3zht8luFOYpJr2lVzp7n3NwB4zW08RptTzTgFAaW/NH2JjYI+rDvQm4jNs08Dtsp nm4OQvBA9Df/6qwMEOZ9i10ixqk+55UpQFJ3nf4uKlSUM7bKXXVcD/odq804Y/K4 y3csE059YVIyaPexEvYSYlHE2odJWRg2Q1VehmrOSC8Qps3xpU7dTHXD74ZpaYbr haViRS5v/lCsiwARAQABiQG8BBgBCgAmAhsMFiEEtTrne622MKaDBGAFlj+idxBF hUUFAmiobzkFCQeDT9AACgkQlj+idxBFhUVsmQwA0PA/zd7NqtnZ/Z8857gp2Wq2 /e4EX8nRjsW2ZlrZfbU5oMQv9OZZ4z1UjIKEUV+TnCwXEKXTMJomdekQSSayVVx/ u5w+0YM8gRuQGrG8hW0GRR8sHIeuwBFlyQrlwxUwXvDOPDYyieETjaQqMucupIKo IPm3CjFySvfizvSWUVSWBnGmQfpv6OiGYawvwfewcQHUdLMgWN3lYlzGQJL4+OMm 7XcB8VNTa586Q00fmjDfktHYvGpmhqr3gsd4gS3AjTk0zI65qXBRJkdqVnwUrMUD 8TcxXYNXf90mhR0NWkLmp6kBYiW8+QY6ndMmRVpodg1A87qgMYaZUAAlxCS4XKTU r+/YMDYOWgLN6i4UeYG/3/hsnAEHm5ITojfh6cLfdlhjohFTnD0IYw3AsNJXRzKB 1g5FTBKLLLIdXgS/3rWV1qjAd3drQVIMCku6HKl/vT4ftrBHeSyV7eLwOYbe3/bw 8VMx+lmMheD8/qJMia1om0iBBRSXRjY//f+Lllqm =TH3J -----END PGP PUBLIC KEY BLOCK----- ================================================ FILE: vagrant/files/postgresql/ACCC4CF8.asc ================================================ -----BEGIN PGP PUBLIC KEY BLOCK----- mQINBE6XR8IBEACVdDKT2HEH1IyHzXkb4nIWAY7echjRxo7MTcj4vbXAyBKOfjja UrBEJWHN6fjKJXOYWXHLIYg0hOGeW9qcSiaa1/rYIbOzjfGfhE4x0Y+NJHS1db0V G6GUj3qXaeyqIJGS2z7m0Thy4Lgr/LpZlZ78Nf1fliSzBlMo1sV7PpP/7zUO+aA4 bKa8Rio3weMXQOZgclzgeSdqtwKnyKTQdXY5MkH1QXyFIk1nTfWwyqpJjHlgtwMi c2cxjqG5nnV9rIYlTTjYG6RBglq0SmzF/raBnF4Lwjxq4qRqvRllBXdFu5+2pMfC IZ10HPRdqDCTN60DUix+BTzBUT30NzaLhZbOMT5RvQtvTVgWpeIn20i2NrPWNCUh hj490dKDLpK/v+A5/i8zPvN4c6MkDHi1FZfaoz3863dylUBR3Ip26oM0hHXf4/2U A/oA4pCl2W0hc4aNtozjKHkVjRx5Q8/hVYu+39csFWxo6YSB/KgIEw+0W8DiTII3 RQj/OlD68ZDmGLyQPiJvaEtY9fDrcSpI0Esm0i4sjkNbuuh0Cvwwwqo5EF1zfkVj Tqz2REYQGMJGc5LUbIpk5sMHo1HWV038TWxlDRwtOdzw08zQA6BeWe9FOokRPeR2 AqhyaJJwOZJodKZ76S+LDwFkTLzEKnYPCzkoRwLrEdNt1M7wQBThnC5z6wARAQAB tBxQb3N0Z3JlU1FMIERlYmlhbiBSZXBvc2l0b3J5iQJOBBMBCAA4AhsDBQsJCAcD BRUKCQgLBRYCAwEAAh4BAheAFiEEuXsK/KoaR/BE8kSgf8x9RqzMTPgFAlhtCD8A CgkQf8x9RqzMTPgECxAAk8uL+dwveTv6eH21tIHcltt8U3Ofajdo+D/ayO53LiYO xi27kdHD0zvFMUWXLGxQtWyeqqDRvDagfWglHucIcaLxoxNwL8+e+9hVFIEskQAY kVToBCKMXTQDLarz8/J030Pmcv3ihbwB+jhnykMuyyNmht4kq0CNgnlcMCdVz0d3 z/09puryIHJrD+A8y3TD4RM74snQuwc9u5bsckvRtRJKbP3GX5JaFZAqUyZNRJRJ Tn2OQRBhCpxhlZ2afkAPFIq2aVnEt/Ie6tmeRCzsW3lOxEH2K7MQSfSu/kRz7ELf Cz3NJHj7rMzC+76Rhsas60t9CjmvMuGONEpctijDWONLCuch3Pdj6XpC+MVxpgBy 2VUdkunb48YhXNW0jgFGM/BFRj+dMQOUbY8PjJjsmVV0joDruWATQG/M4C7O8iU0 B7o6yVv4m8LDEN9CiR6r7H17m4xZseT3f+0QpMe7iQjz6XxTUFRQxXqzmNnloA1T 7VjwPqIIzkj/u0V8nICG/ktLzp1OsCFatWXh7LbU+hwYl6gsFH/mFDqVxJ3+DKQi vyf1NatzEwl62foVjGUSpvh3ymtmtUQ4JUkNDsXiRBWczaiGSuzD9Qi0ONdkAX3b ewqmN4TfE+XIpCPxxHXwGq9Rv1IFjOdCX0iG436GHyTLC1tTUIKF5xV4Y0+cXIOI RgQQEQgABgUCTpdI7gAKCRDFr3dKWFELWqaPAKD1TtT5c3sZz92Fj97KYmqbNQZP +ACfSC6+hfvlj4GxmUjp1aepoVTo3weJAhwEEAEIAAYFAk6XSQsACgkQTFprqxLS p64F8Q//cCcutwrH50UoRFejg0EIZav6LUKejC6kpLeubbEtuaIH3r2zMblPGc4i +eMQKo/PqyQrceRXeNNlqO6/exHozYi2meudxa6IudhwJIOn1MQykJbNMSC2sGUp 1W5M1N5EYgt4hy+qhlfnD66LR4G+9t5FscTJSy84SdiOuqgCOpQmPkVRm1HX5X1+ dmnzMOCk5LHHQuiacV0qeGO7JcBCVEIDr+uhU1H2u5GPFNHm5u15n25tOxVivb94 xg6NDjouECBH7cCVuW79YcExH/0X3/9G45rjdHlKPH1OIUJiiX47OTxdG3dAbB4Q fnViRJhjehFscFvYWSqXo3pgWqUsEvv9qJac2ZEMSz9x2mj0ekWxuM6/hGWxJdB+ +985rIelPmc7VRAXOjIxWknrXnPCZAMlPlDLu6+vZ5BhFX0Be3y38f7GNCxFkJzl hWZ4Cj3WojMj+0DaC1eKTj3rJ7OJlt9S9xnO7OOPEUTGyzgNIDAyCiu8F4huLPaT ape6RupxOMHZeoCVlqx3ouWctelB2oNXcxxiQ/8y+21aHfD4n/CiIFwDvIQjl7dg mT3u5Lr6yxuosR3QJx1P6rP5ZrDTP9khT30t+HZCbvs5Pq+v/9m6XDmi+NlU7Zuh Ehy97tL3uBDgoL4b/5BpFL5U9nruPlQzGq1P9jj40dxAaDAX/WKJAj0EEwEIACcC GwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlB5KywFCQPDFt8ACgkQf8x9RqzM TPhuCQ//QAjRSAOCQ02qmUAikT+mTB6baOAakkYq6uHbEO7qPZkv4E/M+HPIJ4wd nBNeSQjfvdNcZBA/x0hr5EMcBneKKPDj4hJ0panOIRQmNSTThQw9OU351gm3YQct AMPRUu1fTJAL/AuZUQf9ESmhyVtWNlH/56HBfYjE4iVeaRkkNLJyX3vkWdJSMwC/ LO3Lw/0M3R8itDsm74F8w4xOdSQ52nSRFRh7PunFtREl+QzQ3EA/WB4AIj3VohIG kWDfPFCzV3cyZQiEnjAe9gG5pHsXHUWQsDFZ12t784JgkGyO5wT26pzTiuApWM3k /9V+o3HJSgH5hn7wuTi3TelEFwP1fNzI5iUUtZdtxbFOfWMnZAypEhaLmXNkg4zD kH44r0ss9fR0DAgUav1a25UnbOn4PgIEQy2fgHKHwRpCy20d6oCSlmgyWsR40EPP YvtGq49A2aK6ibXmdvvFT+Ts8Z+q2SkFpoYFX20mR2nsF0fbt1lfH65P64dukxeR GteWIeNakDD40bAAOH8+OaoTGVBJ2ACJfLVNM53PEoftavAwUYMrR910qvwYfd/4 6rh46g1Frr9SFMKYE9uvIJIgDsQB3QBp71houU4H55M5GD8XURYs+bfiQpJG1p7e B8e5jZx1SagNWc4XwL2FzQ9svrkbg1Y+359buUiP7T6QXX2zY++JAj0EEwEIACcC GwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlEqbZUFCQg2wEEACgkQf8x9RqzM TPhFMQ//WxAfKMdpSIA9oIC/yPD/dJpY/+DyouOljpE6MucMy/ArBECjFTBwi/j9 NYM4ynAk34IkhuNexc1i9/05f5RM6+riLCLgAOsADDbHD4miZzoSxiVr6GQ3YXMb OGld9kV9Sy6mGNjcUov7iFcf5Hy5w3AjPfKuR9zXswyfzIU1YXObiiZT38l55pp/ BSgvGVQsvbNjsff5CbEKXS7q3xW+WzN0QWF6YsfNVhFjRGj8hKtHvwKcA02wwjLe LXVTm6915ZUKhZXUFc0vM4Pj4EgNswH8Ojw9AJaKWJIZmLyW+aP+wpu6YwVCicxB Y59CzBO2pPJDfKFQzUtrErk9irXeuCCLesDyirxJhv8o0JAvmnMAKOLhNFUrSQ2m +3EnF7zhfz70gHW+EG8X8mL/EN3/dUM09j6TVrjtw43RLxBzwMDeariFF9yC+5bL tnGgxjsB9Ik6GV5v34/NEEGf1qBiAzFmDVFRZlrNDkq6gmpvGnA5hUWNr+y0i01L jGyaLSWHYjgw2UEQOqcUtTFK9MNzbZze4mVaHMEz9/aMfX25R6qbiNqCChveIm8m Yr5Ds2zdZx+G5bAKdzX7nx2IUAxFQJEE94VLSp3npAaTWv3sHr7dR8tSyUJ9poDw gw4W9BIcnAM7zvFYbLF5FNggg/26njHCCN70sHt8zGxKQINMc6SJAj0EEwEIACcC GwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlLpFRkFCQ6EJy0ACgkQf8x9RqzM TPjOZA//Zp0e25pcvle7cLc0YuFr9pBv2JIkLzPm83nkcwKmxaWayUIG4Sv6pH6h m8+S/CHQij/yFCX+o3ngMw2J9HBUvafZ4bnbI0RGJ70GsAwraQ0VlkIfg7GUw3Tz voGYO42rZTru9S0K/6nFP6D1HUu+U+AsJONLeb6oypQgInfXQExPZyliUnHdipei 4WR1YFW6sjSkZT/5C3J1wkAvPl5lvOVthI9Zs6bZlJLZwusKxU0UM4Btgu1Sf3nn JcHmzisixwS9PMHE+AgPWIGSec/N27a0KmTTvImV6K6nEjXJey0K2+EYJuIBsYUN orOGBwDFIhfRk9qGlpgt0KRyguV+AP5qvgry95IrYtrOuE7307SidEbSnvO5ezNe mE7gT9Z1tM7IMPfmoKph4BfpNoH7aXiQh1Wo+ChdP92hZUtQrY2Nm13cmkxYjQ4Z gMWfYMC+DA/GooSgZM5i6hYqyyfAuUD9kwRN6BqTbuAUAp+hCWYeN4D88sLYpFh3 paDYNKJ+Gf7Yyi6gThcV956RUFDH3ys5Dk0vDL9NiWwdebWfRFbzoRM3dyGP889a OyLzS3mh6nHzZrNGhW73kslSQek8tjKrB+56hXOnb4HaElTZGDvD5wmrrhN94kby Gtz3cydIohvNO9d90+29h0eGEDYti7j7maHkBKUAwlcPvMg5m3Y= =DA1T -----END PGP PUBLIC KEY BLOCK----- ================================================ FILE: vagrant/files/postgresql/pg_hba.conf ================================================ # PostgreSQL Client Authentication Configuration File # =================================================== # # Refer to the "Client Authentication" section in the PostgreSQL # documentation for a complete description of this file. A short # synopsis follows. # # This file controls: which hosts are allowed to connect, how clients # are authenticated, which PostgreSQL user names they can use, which # databases they can access. Records take one of these forms: # # local DATABASE USER METHOD [OPTIONS] # host DATABASE USER ADDRESS METHOD [OPTIONS] # hostssl DATABASE USER ADDRESS METHOD [OPTIONS] # hostnossl DATABASE USER ADDRESS METHOD [OPTIONS] # # (The uppercase items must be replaced by actual values.) # # The first field is the connection type: "local" is a Unix-domain # socket, "host" is either a plain or SSL-encrypted TCP/IP socket, # "hostssl" is an SSL-encrypted TCP/IP socket, and "hostnossl" is a # plain TCP/IP socket. # # DATABASE can be "all", "sameuser", "samerole", "replication", a # database name, or a comma-separated list thereof. The "all" # keyword does not match "replication". Access to replication # must be enabled in a separate record (see example below). # # USER can be "all", a user name, a group name prefixed with "+", or a # comma-separated list thereof. In both the DATABASE and USER fields # you can also write a file name prefixed with "@" to include names # from a separate file. # # ADDRESS specifies the set of hosts the record matches. It can be a # host name, or it is made up of an IP address and a CIDR mask that is # an integer (between 0 and 32 (IPv4) or 128 (IPv6) inclusive) that # specifies the number of significant bits in the mask. A host name # that starts with a dot (.) matches a suffix of the actual host name. # Alternatively, you can write an IP address and netmask in separate # columns to specify the set of hosts. Instead of a CIDR-address, you # can write "samehost" to match any of the server's own IP addresses, # or "samenet" to match any address in any subnet that the server is # directly connected to. # # METHOD can be "trust", "reject", "md5", "password", "gss", "sspi", # "ident", "peer", "pam", "ldap", "radius" or "cert". Note that # "password" sends passwords in clear text; "md5" is preferred since # it sends encrypted passwords. # # OPTIONS are a set of options for the authentication in the format # NAME=VALUE. The available options depend on the different # authentication methods -- refer to the "Client Authentication" # section in the documentation for a list of which options are # available for which authentication methods. # # Database and user names containing spaces, commas, quotes and other # special characters must be quoted. Quoting one of the keywords # "all", "sameuser", "samerole" or "replication" makes the name lose # its special character, and just match a database or username with # that name. # # This file is read on server startup and when the postmaster receives # a SIGHUP signal. If you edit the file on a running system, you have # to SIGHUP the postmaster for the changes to take effect. You can # use "pg_ctl reload" to do that. # Put your actual configuration here # ---------------------------------- # # If you want to allow non-local connections, you need to add more # "host" records. In that case you will also need to make PostgreSQL # listen on a non-local interface via the listen_addresses # configuration parameter, or via the -i or -h command line switches. # DO NOT DISABLE! # If you change this first entry you will need to make sure that the # database superuser can access the database using some other method. # Noninteractive access to all databases is required during automatic # maintenance (custom daily cronjobs, replication, and similar tasks). # # Database administrative login by Unix domain socket local all postgres peer # TYPE DATABASE USER ADDRESS METHOD # "local" is for Unix domain socket connections only local all all peer # IPv4 local connections: host all all 127.0.0.1/32 pam # IPv6 local connections: host all all ::1/128 pam host all all 0.0.0.0/0 pam host all all ::/0 pam # Allow replication connections from localhost, by a user with the # replication privilege. local replication all peer host replication postgres 127.0.0.1/32 md5 host replication postgres ::1/128 md5 ================================================ FILE: vagrant/group_vars/all ================================================ --- # Partitioning partitioning: False creat_test_loops: False vg_data_devices: /dev/sda1, /dev/sdb1 vg_data: vg_data lv_wal: lv_wal lv_wal_size: 10g lv_data: lv_data lv_data_size: 100%FREE vg_backup_devices: /dev/sdc1, /dev/sdd1 vg_backup: vg_backup lv_backup: lv_backup lv_backup_size: 100%FREE datadir_base: /mnt/pgdata waldir_base: /mnt/wal backupdir_base: /mnt/backup mount_fstype: ext4 mount_opts: noatime,nodev,noexec,nobarrier # System overcommit_ratio: 90 version: 17 clustername: main confdir: /etc/postgresql/{{ version }}/{{ clustername }} datadir: '{{ datadir_base }}/{{ version }}/{{ clustername }}' waldir: '{{ waldir_base }}/{{ version }}/{{ clustername }}' admin_user: 'admin' admin_pass: '$6$smIbQ.WUs$pOjJEGx4nk3tBGJtebUXAToQsT/DTM8BESdD/pZ4SUxfvn.q8B15YR5o0rib1QMCJicUdtj1FYAYWnZRIZ6XS1' # admin local_repo: 'deb [trusted=yes] file:/srv/repository ./' pgdg_repo: 'pgdg' # 'pgdg' or 'pgdg-testing' use_local_repo: False sql_exporter_version: '0.2.0.1' locale: - de_DE.UTF-8 UTF-8 - en_US.UTF-8 UTF-8 nofiles_hard: 131072 nofiles_soft: 65536 # Backup pgbackrest_backupdir: '{{ backupdir_base }}/pgbackrest' pgbackrest_retention_full: 4 pgbackrest_compress_level: 3 pgbackrest_spool_path: '{{ backupdir_base }}/pgbackrest_spool' pgbackrest_process_max: 2 pgbackrest_archive_async: 'y' pgbackrest_archive_queue_max: 1099511627776 pgbackrest_archive_push_process_max: 10 # PostgreSQL Tuning max_connections: 100 superuser_reserved_connections: 8 shared_buffers: 512MB temp_buffers: 64MB work_mem: 64MB maintenance_work_mem: 1GB vacuum_cost_limit: 400 wal_compression: off wal_log_hints: on checkpoint_timeout: 15min max_wal_size: 4GB min_wal_size: 160MB checkpoint_completion_target: 0.8 random_page_cost: 0.4 effective_cache_size: 5GB autovacuum_max_workers: 5 autovacuum_naptime: 1min max_worker_processes: 8 max_parallel_workers_per_gather: 2 track_io_timing: on ================================================ FILE: vagrant/group_vars/dev ================================================ --- use_local_repo: True # PostgreSQL Tuning max_connections: 100 shared_buffers: 1GB work_mem: 32MB maintenance_work_mem: 1GB vacuum_cost_limit: 400 max_wal_size: 1GB min_wal_size: 64MB effective_cache_size: 4GB ================================================ FILE: vagrant/group_vars/openpower ================================================ --- use_local_repo: False # Partitioning partitioning: True vg_data_devices: /dev/sda lv_wal_size: 50g vg_backup_devices: /dev/md0 # PostgreSQL Tuning max_connections: 800 superuser_reserved_connections: 16 shared_buffers: 16GB work_mem: 256MB maintenance_work_mem: 8GB vacuum_cost_limit: 800 max_wal_size: 25GB min_wal_size: 160MB checkpoint_completion_target: 0.8 random_page_cost: 2 effective_cache_size: 256GB autovacuum_max_workers: 6 autovacuum_naptime: 30s max_worker_processes: 16 max_parallel_workers_per_gather: 4 ================================================ FILE: vagrant/group_vars/test ================================================ --- # The goal of this ansible configuration is to test all components. # However, no partitioning tests are done. use_local_repo: False # PostgreSQL Tuning max_connections: 100 shared_buffers: 1GB work_mem: 32MB maintenance_work_mem: 1GB vacuum_cost_limit: 400 max_wal_size: 1GB min_wal_size: 64MB effective_cache_size: 4GB ================================================ FILE: vagrant/group_vars/test-with-disks ================================================ --- # The goal of this ansible configuration is to test all components, # including partitioning. use_local_repo: True creat_test_loops: True # Partitioning partitioning: True vg_data_devices: /dev/loop0 lv_wal_size: 400m vg_backup_devices: /dev/loop1 # PostgreSQL Tuning max_connections: 100 shared_buffers: 1GB work_mem: 32MB maintenance_work_mem: 1GB vacuum_cost_limit: 400 max_wal_size: 1GB min_wal_size: 64MB effective_cache_size: 4GB ================================================ FILE: vagrant/group_vars/x86-big ================================================ --- use_local_repo: False # Partitioning partitioning: True vg_data_devices: /dev/sdc lv_wal_size: 50g vg_backup_devices: /dev/md0 # PostgreSQL Tuning max_connections: 800 superuser_reserved_connections: 16 shared_buffers: 16GB work_mem: 256MB maintenance_work_mem: 8GB vacuum_cost_limit: 800 max_wal_size: 25GB min_wal_size: 160MB checkpoint_completion_target: 0.8 random_page_cost: 2 effective_cache_size: 256GB autovacuum_max_workers: 6 autovacuum_naptime: 30s max_worker_processes: 16 max_parallel_workers_per_gather: 4 ================================================ FILE: vagrant/handlers.yml ================================================ - name: apt-get update apt: update_cache: true - name: update grub2 command: update-grub # on Debian - name: apt-ftparchive shell: 'apt-ftparchive packages . > Packages' args: chdir: /srv/repository notify: apt-get update - name: restart postgresql service: name: postgresql state: restarted - name: reload postgresql service: name: postgresql state: reloaded - name: restart apache service: name: apache2 state: restarted - name: restart grafana service: name: grafana-server state: restarted - name: restart prometheus service: name: prometheus state: restarted - name: restart prometheus-node-exporter service: name: prometheus-node-exporter state: restarted - name: restart prometheus-sql-exporter service: name: prometheus-sql-exporter state: restarted - name: restart pgbadger-report service: name: pgbadger-report state: restarted - name: restart pgbadger-report timer service: name: pgbadger-report.timer state: restarted - name: restart update prometheus sql exporter timer service: name: update-prometheus-sql-exporter-config.timer state: restarted - name: restart cockpit service: name: cockpit state: restarted - name: reload cockpit service: name: cockpit state: reload - name: systemctl daemon-reload command: systemctl daemon-reload become: true - name: restart ferm service: name: ferm state: restarted - name: restart sysstat service: name: sysstat state: restarted enabled: true ================================================ FILE: vagrant/inventory ================================================ [vm] #elephant-shed.dev.credativ.lan ansible_host=172.26.251.138 ansible_user=debian ================================================ FILE: vagrant/openstack/Vagrantfile ================================================ require 'vagrant-openstack-plugin' Vagrant.configure('2') do |config| config.vm.box = "sharpie/dummy" config.ssh.username = 'debian' config.ssh.private_key_path = 'app_dbteam.pem' config.vm.synced_folder "./", "/vagrant", type: "rsync", disabled: true config.vm.provider "openstack" do |os| # Connection parameter os.endpoint = ENV['OS_AUTH_URL']+"/tokens" os.username = ENV['OS_USERNAME'] os.api_key = ENV['OS_PASSWORD'] os.tenant = ENV['OS_TENANT_NAME'] os.region = ENV['OS_REGION_NAME'] # VM parameter os.server_name = "elephant-shed-test" os.flavor = "m1.large" os.image = "Debian 9 -- 20170119" os.keypair_name = "app_dbteam" os.ssh_username = "debian" os.public_network_name = "public" os.networks = %w(DEV) end config.vm.provision "ansible" do |ansible| ansible.playbook = "../elephant-shed.yml" ansible.groups = { "dev" => ["default"], "ansible" => ["default"], } end end ================================================ FILE: vagrant/tasks/apache2.yml ================================================ - name: Configure apache2 to use less resources template: src: templates/apache2/mpm_event.conf.j2 dest: /etc/apache2/mods-available/mpm_event.conf owner: root group: root mode: 0644 notify: restart apache ================================================ FILE: vagrant/tasks/config-backup.yml ================================================ --- - name: Set pgBackrest global repo-path ini_file: dest: /etc/pgbackrest.conf state: present section: 'global' option: 'repo1-path' value: '{{ pgbackrest_backupdir }}' no_extra_spaces: true - name: Set pgBackrest global retension policy ini_file: dest: /etc/pgbackrest.conf state: present section: 'global' option: 'repo1-retention-full' value: '{{ pgbackrest_retention_full }}' no_extra_spaces: true - name: Set pgBackrest compression level ini_file: dest: /etc/pgbackrest.conf state: present section: 'global' option: 'compress-level' value: '{{ pgbackrest_compress_level }}' no_extra_spaces: true - name: Set pgBackrest spool path ini_file: dest: /etc/pgbackrest.conf state: present section: 'global' option: 'spool-path' value: '{{ pgbackrest_spool_path }}' no_extra_spaces: true - name: Set pgBackrest process-max ini_file: dest: /etc/pgbackrest.conf state: present section: 'global' option: 'process-max' value: '{{ pgbackrest_process_max }}' no_extra_spaces: true - name: Set pgBackrest async archiving ini_file: dest: /etc/pgbackrest.conf state: present section: 'global' option: 'archive-async' value: '{{ pgbackrest_archive_async }}' no_extra_spaces: true - name: Set pgBackrest max async queue ini_file: dest: /etc/pgbackrest.conf state: present section: 'global' option: 'archive-queue-max' value: '{{ pgbackrest_archive_queue_max }}' no_extra_spaces: true - name: Set pgBackrest archive-push process-max ini_file: dest: /etc/pgbackrest.conf state: present section: "global:archive-push" option: 'process-max' value: '{{ pgbackrest_archive_push_process_max }}' no_extra_spaces: true ================================================ FILE: vagrant/tasks/config-system.yml ================================================ # BASIC TUNING - name: Generate ssh keys for user root user: name: root generate_ssh_key: yes - name: Create admin user user: name: '{{ admin_user }}' # password is 'admin' password: '{{ admin_pass }}' update_password: on_create comment: 'Default Admin User,,,' shell: /bin/bash groups: sudo,elephant-shed generate_ssh_key: yes # Set locale - name: Set locale lineinfile: dest: /etc/locale.gen line: "{{ item }}" with_items: "{{ locale }}" register: locale - name: Gen locale command: locale-gen when: locale is changed # Kernel configuration - name: Set vm.overcommit_ratio sysctl: name: vm.overcommit_ratio value: "{{ overcommit_ratio }}" when: ansible_virtualization_type != "lxc" - name: Set vm.overcommit_memory=2, don't overcommit sysctl: name: vm.overcommit_memory value: 2 when: ansible_virtualization_type != "lxc" - name: Turn off memory zone reclaim sysctl: name: vm.zone_reclaim_mode value: 0 when: ansible_virtualization_type != "lxc" - name: Edit grub defaults lineinfile: dest: /etc/default/grub state: present regexp: GRUB_TIMEOUT line: GRUB_TIMEOUT=1 when: ansible_virtualization_type != "lxc" notify: - update grub2 - name: Silence etckeeper git messages copy: src: files/git/gitconfig dest: /root/.gitconfig owner: root group: root mode: 0644 force: no - name: Create /etc/security/limits.d/ file: path: /etc/security/limits.d owner: root mode: 0755 state: directory - name: Set max open files for postgres user template: dest: /etc/security/limits.d/postgres.conf src: templates/postgres/postgres-limits.conf.j2 - name: Set max open files for service postgresql@.service lineinfile: dest: /lib/systemd/system/postgresql@.service state: present regexp: 'LimitNOFILE' line: 'LimitNOFILE={{ nofiles_hard }}' insertafter: '^\[Service\]' notify: - systemctl daemon-reload - name: Enable sysstat metrics capturing lineinfile: dest: /etc/default/sysstat state: present regexp: ENABLED line: ENABLED="true" notify: - restart sysstat ================================================ FILE: vagrant/tasks/ferm.yml ================================================ - name: Create ferm.d file: path: /etc/ferm/ferm.d state: directory owner: root group: adm mode: 0751 - name: Configure ferm.conf template: src: templates/ferm/ferm.conf.j2 dest: /etc/ferm/ferm.conf owner: root group: root mode: 0644 notify: restart ferm - name: Configure ferm.d/elephant-shed.conf template: src: templates/ferm/elephant-shed.conf.j2 dest: /etc/ferm/ferm.d/elephant-shed.conf owner: root group: root mode: 0644 notify: restart ferm ================================================ FILE: vagrant/tasks/grafana.yml ================================================ --- - name: Get Grafana key apt_key: data: "{{ lookup('file', 'grafana-key.10458545.asc') }}" id: 10458545 state: present - name: Setup Grafana repository apt_repository: repo: "deb https://apt.grafana.com stable main" filename: 'grafana' state: present - name: Update package cache apt: update_cache: yes - name: Install Grafana apt: name: grafana state: present notify: restart grafana ================================================ FILE: vagrant/tasks/packages.yml ================================================ # pgapt - name: Install pgdg Apt key apt_key: data: "{{ lookup('file', 'postgresql/ACCC4CF8.asc') }}" id: ACCC4CF8 state: present - name: Setup pgdg repository apt_repository: repo: "deb http://apt.postgresql.org/pub/repos/apt/ {{ ansible_distribution_release }}-{{ pgdg_repo | default('pgdg') }} main" filename: 'pgdg' update_cache: false # We want to add the debian security repository, if not already # present. This is required if some 3rd party package depends on # versions that are not included in bookworm main but in # bookworm/updates main. We use lineinfile instead of apt_repository # to not add it if its already configured in # /etc/apt/sources.list. apt_repository would add a new entry within # /etc/apt/sources.list.d. - name: Setup debian security repository if not already present. lineinfile: dest: /etc/apt/sources.list regexp: "^deb\ .*security.debian.org.*" line: "deb http://security.debian.org/debian-security {{ ansible_distribution_release }}/updates main contrib" when: ansible_distribution_release == "buster" - name: Setup debian security repository if not already present. lineinfile: dest: /etc/apt/sources.list regexp: "^deb\ .*security.debian.org.*" line: "deb http://security.debian.org/debian-security {{ ansible_distribution_release }}-security main contrib" when: ansible_distribution_release != "buster" and ansible_distribution != "Ubuntu" # apt - name: Apt Dist-Upgrade apt: update_cache: yes upgrade: dist # packages - name: Install base packages package: name: "{{ item }}" state: present with_items: - tree - vim - screen - tmux - htop - unzip - w3m - openssh-server - ncdu - ssl-cert - net-tools - ferm - wget - sysstat # elephant shed # separate tasks, so we can see the progress better - name: Install elephant-shed-omnidb package: state=present name=elephant-shed-omnidb when: 'ansible_distribution_release != "stretch"' - name: Install elephant-shed-portal package: state=present name=elephant-shed-portal - name: Install elephant-shed-cockpit package: state=present name=elephant-shed-cockpit - name: Install elephant-shed-prometheus package: state=present name=elephant-shed-prometheus - name: Install elephant-shed-grafana package: state=present name=elephant-shed-grafana - name: Install elephant-shed-pgbackrest package: state=present name=elephant-shed-pgbackrest - name: Install elephant-shed-pgbadger package: state=present name=elephant-shed-pgbadger - name: Install elephant-shed-tmate package: state=present name=elephant-shed-tmate - name: Install elephant-shed-powa package: state=present name=elephant-shed-powa - name: Install elephant-shed package: state=present name=elephant-shed ================================================ FILE: vagrant/tasks/partitioning.yml ================================================ --- - name: Create loop devices for testing shell: | dd if=/dev/zero of=/tmp/loop0 bs=1024 count=1000000 dd if=/dev/zero of=/tmp/loop1 bs=1024 count=1000000 args: creates: /tmp/loop1 when: creat_test_loops == True - name: Mount loop devices for testing shell: | losetup /dev/loop0 /tmp/loop0 losetup /dev/loop1 /tmp/loop1 args: creates: /dev/loop1 when: creat_test_loops == True - name: Create Data VG lvg: vg: '{{ vg_data }}' pvs: '{{ vg_data_devices }}' - name: Create Backup VG lvg: vg: '{{ vg_backup }}' pvs: '{{ vg_backup_devices }}' - name: Setup WAL LV lvol: vg: '{{ vg_data }}' lv: '{{ lv_wal }}' size: '{{ lv_wal_size }}' shrink: no - name: Setup Data LV lvol: vg: '{{ vg_data }}' lv: '{{ lv_data }}' size: '{{ lv_data_size }}' shrink: no - name: Setup Backup LV lvol: vg: '{{ vg_backup }}' lv: '{{ lv_backup }}' size: '{{ lv_backup_size }}' shrink: no - name: Format Data Volume filesystem: fstype: '{{ mount_fstype }}' dev: /dev/{{ vg_data }}/{{ lv_data }} - name: Format WAL Volume filesystem: fstype: '{{ mount_fstype }}' dev: /dev/{{ vg_data }}/{{ lv_wal }} - name: Format Backup Volume filesystem: fstype: '{{ mount_fstype }}' dev: /dev/{{ vg_backup }}/{{ lv_backup }} - name: Mount Data Volume mount: name: '{{ datadir_base }}' # path src: /dev/{{ vg_data }}/{{ lv_data }} fstype: '{{ mount_fstype }}' opts: '{{ mount_opts }}' state: mounted - name: Mount WAL Volume mount: name: '{{ waldir_base }}' # path src: /dev/{{ vg_data }}/{{ lv_wal }} fstype: '{{ mount_fstype }}' opts: '{{ mount_opts }}' state: mounted - name: Mount Backup Volume mount: name: '{{ backupdir_base }}' # path src: /dev/{{ vg_backup }}/{{ lv_backup }} fstype: '{{ mount_fstype }}' opts: '{{ mount_opts }}' state: mounted ================================================ FILE: vagrant/tasks/postgresql.yml ================================================ # PostgreSQL - name: Install postgresql-common package: name: "{{ item }}" state: present with_items: - postgresql-common - name: Disable auto cluster creation lineinfile: dest: /etc/postgresql-common/createcluster.conf state: present regexp: '^.?create_main_cluster' line: 'create_main_cluster = false' - name: Install elephant-shed-postgresql.deb package: name: elephant-shed-postgresql state: present - name: Install PostgreSQL packages package: name: "{{ item }}" state: present with_items: - postgresql-{{ version }} - postgresql-contrib-{{ version }} - postgresql-{{ version }}-unit - check-postgres - python3-psycopg2 # for postgresql_user - name: Create data volume file: path: '{{ datadir_base }}' owner: postgres group: postgres mode: 0700 state: directory recurse: true - name: Create WAL volume file: path: '{{ waldir_base }}' owner: postgres group: postgres mode: 0700 state: directory recurse: true - name: Create backup volume file: path: '{{ backupdir_base }}' owner: postgres group: postgres mode: 0700 state: directory recurse: true - name: Create backup dir file: path: '{{ pgbackrest_backupdir }}' owner: postgres group: postgres mode: 0700 state: directory recurse: true - name: Create backup spool dir file: path: '{{ pgbackrest_spool_path }}' owner: postgres group: postgres mode: 0700 state: directory recurse: true - name: Change data_directory for new clusters lineinfile: dest: /etc/postgresql-common/createcluster.d/elephant-shed.conf state: present regexp: '^.?data_directory' line: "data_directory = '{{ datadir_base }}/%v/%c'" insertafter: '^# Default data directory' - name: Config waldir for new clusters lineinfile: dest: /etc/postgresql-common/createcluster.d/elephant-shed.conf state: present regexp: '^.?waldir' line: "waldir = '{{ waldir_base }}/%v/%c'" insertafter: '^# Default directory for transaction logs' - name: Deploy performance tuning configuration for postgresql-common template: dest: /etc/postgresql-common/createcluster.d/performance-tuning.conf src: templates/postgresql-common/performance-tuning.conf.j2 - name: Create {{version}}/main cluster shell: "pg_createcluster {{ version }} main --start" args: creates: /etc/postgresql/{{version}}/main/postgresql.conf notify: restart prometheus-sql-exporter - name: Deploy pg_hba.conf copy: dest: "{{ confdir }}/pg_hba.conf" src: files/postgresql/pg_hba.conf owner: postgres group: postgres mode: 0600 notify: - reload postgresql - name: Start postgresql@{{version}}-main service: name: postgresql@{{version}}-main state: started - name: Process all pending handler actions now meta: flush_handlers - name: 'Create {{ admin_user }} user in PostgreSQL' become_user: postgres become_method: sudo postgresql_user: name: '{{ admin_user }}' role_attr_flags: LOGIN,SUPERUSER ignore_errors: True # Fails on read-only standby - name: Generate ssh keys for user postgres user: name: postgres generate_ssh_key: yes ================================================ FILE: vagrant/tasks/repository-local.yml ================================================ - name: Install apt-utils package: name: apt-utils state: present - name: Create directory /srv/repository file: path: /srv/repository state: directory mode: 0775 notify: apt-ftparchive - name: Copy elephant-shed*all.deb copy: src: '../../{{item}}' dest: '/srv/repository/{{item}}' with_lines: 'cd ../.. && ls elephant-shed*all.deb || :' notify: apt-ftparchive register: localpackages - meta: flush_handlers - name: Setup local repository apt_repository: repo: '{{ local_repo }}' filename: 'local' # Reinstall packages when changed (the version number doesn't change, so upgrade doesn't work) - name: Reinstall elephant-shed-*all.deb shell: | DEBIAN_FRONTEND=noninteractive dpkg -l elephant-shed\* | grep ^ii | awk '{ print $2 }' | xargs -r apt-get install --reinstall -y --force-yes when: localpackages is changed ================================================ FILE: vagrant/templates/apache2/mpm_event.conf.j2 ================================================ # event MPM # StartServers: initial number of server processes to start # MinSpareThreads: minimum number of worker threads which are kept spare # MaxSpareThreads: maximum number of worker threads which are kept spare # ThreadsPerChild: constant number of worker threads in each server process # MaxRequestWorkers: maximum number of worker threads # MaxConnectionsPerChild: maximum number of requests a server process serves StartServers 1 MinSpareThreads 2 MaxSpareThreads 4 ThreadLimit 64 ThreadsPerChild 16 MaxRequestWorkers 128 MaxConnectionsPerChild 0 # vim: syntax=apache ts=4 sw=4 sts=4 sr noet ================================================ FILE: vagrant/templates/ferm/elephant-shed.conf.j2 ================================================ chain INPUT { # allow access to the web interface proto tcp dport (http https) ACCEPT; # allow access to the first 8 PostgreSQL clusters proto tcp dport (5432:5439) ACCEPT; } ================================================ FILE: vagrant/templates/ferm/ferm.conf.j2 ================================================ # -*- shell-script -*- # # Configuration file for ferm(1). # domain (ip ip6) { table filter { chain INPUT { policy DROP; # connection tracking mod state state INVALID DROP; mod state state (ESTABLISHED RELATED) ACCEPT; # allow local packet interface lo ACCEPT; # respond to ping proto icmp ACCEPT; # allow IPsec proto udp dport 500 ACCEPT; @if @eq($DOMAIN, ip) { proto (esp ah) ACCEPT; } @else { proto (esp) ACCEPT; } # allow SSH connections proto tcp dport ssh ACCEPT; } chain OUTPUT { policy ACCEPT; # connection tracking #mod state state INVALID DROP; mod state state (ESTABLISHED RELATED) ACCEPT; } chain FORWARD { policy DROP; # connection tracking mod state state INVALID DROP; mod state state (ESTABLISHED RELATED) ACCEPT; } } } @include ferm.d/; ================================================ FILE: vagrant/templates/postgres/postgres-limits.conf.j2 ================================================ postgres hard nofile {{ nofiles_hard }} postgres soft nofile {{ nofiles_soft }} ================================================ FILE: vagrant/templates/postgresql-common/performance-tuning.conf.j2 ================================================ #------------------------------------------------------------------------------ # CONNECTIONS AND AUTHENTICATION #------------------------------------------------------------------------------ max_connections = {{ max_connections }} superuser_reserved_connections = {{ superuser_reserved_connections }} #------------------------------------------------------------------------------ # RESOURCE USAGE (except WAL) #------------------------------------------------------------------------------ shared_buffers = {{ shared_buffers }} temp_buffers = {{ temp_buffers }} work_mem = {{ work_mem }} maintenance_work_mem = {{ maintenance_work_mem }} vacuum_cost_limit = {{ vacuum_cost_limit }} max_worker_processes = {{ max_worker_processes }} max_parallel_workers_per_gather = {{ max_parallel_workers_per_gather }} #------------------------------------------------------------------------------ # WRITE AHEAD LOG #------------------------------------------------------------------------------ wal_compression = {{ wal_compression }} wal_log_hints = {{ wal_log_hints }} # - Checkpoints - checkpoint_timeout = {{ checkpoint_timeout }} max_wal_size = {{ max_wal_size }} min_wal_size = {{ min_wal_size }} checkpoint_completion_target = {{ checkpoint_completion_target }} #------------------------------------------------------------------------------ # QUERY TUNING #------------------------------------------------------------------------------ # - Planner Cost Constants - random_page_cost = {{ random_page_cost }} effective_cache_size = {{ effective_cache_size }} #------------------------------------------------------------------------------ # RUNTIME STATISTICS #------------------------------------------------------------------------------ track_activities = on track_counts = on track_io_timing = {{ track_io_timing }} #------------------------------------------------------------------------------ # AUTOVACUUM PARAMETERS #------------------------------------------------------------------------------ autovacuum_max_workers = {{ autovacuum_max_workers }} autovacuum_naptime = {{ autovacuum_naptime }} ================================================ FILE: vagrant/vagrantpackage.sh ================================================ #!/bin/sh rm -vf elephant-shed.box vagrant package --vagrantfile Vagrantfile.template --output elephant-shed.box