Repository: zerok/celery-prometheus-exporter Branch: master Commit: c55ec446670d Files: 25 Total size: 34.2 KB Directory structure: gitextract_kcrsm1yz/ ├── .coveragerc ├── .dockerignore ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── Dockerfile-celery3 ├── Dockerfile-celery4 ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.rst ├── celery_prometheus_exporter.py ├── celeryapp.py ├── docker-compose.yml ├── docker-entrypoint.sh ├── requirements/ │ ├── base.txt │ ├── celery3.txt │ ├── celery4.txt │ ├── promclient030.txt │ ├── promclient050.txt │ └── test.txt ├── setup.py ├── test/ │ ├── celery_test_utils.py │ └── test_unit.py └── tox.ini ================================================ FILE CONTENTS ================================================ ================================================ FILE: .coveragerc ================================================ [run] source = celery_prometheus_exporter [report] fail_under = 100 show_missing = True [paths] source = celery_prometheus_exporter ================================================ FILE: .dockerignore ================================================ *.img ================================================ FILE: .gitignore ================================================ *.img /dist /build /*.egg-info *.pyc __pycache__ .coverage .tox/ .cache/ ================================================ FILE: .travis.yml ================================================ sudo: false language: python python: - "2.7" - "3.4" - "3.5" - "3.6" install: pip install tox-travis tox script: tox ================================================ FILE: CONTRIBUTING.md ================================================ The initial release of celery-prometheus-exporter was intended as a minimal solution that would cover what I personally needed at my own projects. That being said, you might need completely different kinds of metrics being exposed. If you do, please feel free to create tickets and pull requests 🙂 As such, the more details you can provide in your tickets the better. I will try to look into each issue but please note that I might not be available all the time and that timezones exist. Please be patient 😊 ================================================ FILE: Dockerfile-celery3 ================================================ FROM python:3.6-alpine MAINTAINER Horst Gutmann RUN mkdir -p /app/requirements ADD requirements/* /app/requirements/ WORKDIR /app ENV PYTHONUNBUFFERED 1 RUN pip install -r requirements/promclient050.txt -r requirements/celery3.txt ADD celery_prometheus_exporter.py docker-entrypoint.sh /app/ ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"] CMD [] EXPOSE 8888 ================================================ FILE: Dockerfile-celery4 ================================================ FROM python:3.6-alpine MAINTAINER Horst Gutmann RUN mkdir -p /app/requirements ADD requirements/* /app/requirements/ WORKDIR /app ENV PYTHONUNBUFFERED 1 RUN pip install -r requirements/promclient050.txt -r requirements/celery4.txt ADD celery_prometheus_exporter.py docker-entrypoint.sh /app/ ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"] CMD [] EXPOSE 8888 ================================================ FILE: LICENSE.txt ================================================ MIT License Copyright (c) 2016, Horst Gutmann Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: MANIFEST.in ================================================ include README.rst celery_prometheus_exporter.py ================================================ FILE: Makefile ================================================ all: celery_exporter-celery3.img celery_exporter-celery4.img celery_exporter-celery3.img: celery_prometheus_exporter.py Dockerfile-celery3 requirements/* docker build -f Dockerfile-celery3 -t celery_exporter:1-celery3 . docker save -o $@ celery_exporter:1-celery3 celery_exporter-celery4.img: celery_prometheus_exporter.py Dockerfile-celery4 requirements/* docker build -f Dockerfile-celery4 -t celery_exporter:1-celery4 . docker save -o $@ celery_exporter:1-celery4 .PHONY: clean all clean: rm -rf celery_exporter.img *.egg-info build dist publish: all docker tag celery_exporter:1-celery3 zerok/celery_exporter:1-celery3 docker tag celery_exporter:1-celery3 zerok/celery_exporter:1.3.0-celery3 docker tag celery_exporter:1-celery4 zerok/celery_exporter:1-celery4 docker tag celery_exporter:1-celery4 zerok/celery_exporter:1.3.0-celery4 docker push zerok/celery_exporter:1-celery4 docker push zerok/celery_exporter:1.3.0-celery4 docker push zerok/celery_exporter:1-celery3 docker push zerok/celery_exporter:1.3.0-celery3 ================================================ FILE: README.rst ================================================ ========================== celery-prometheus-exporter ========================== .. admonition:: info Sadly, for the last couple of months at the time of writing this (Sept 2019) I couldn't find the time to maintain this package anymore. I therefore decided to archive it. If you find this code useful, please fork it! A big "THANK YOU" goes to everyone who contributed to this project over the years! .. image:: https://img.shields.io/docker/automated/zerok/celery-prometheus-exporter.svg?maxAge=2592000 :target: https://hub.docker.com/r/zerok/celery-prometheus-exporter/ celery-prometheus-exporter is a little exporter for Celery related metrics in order to get picked up by Prometheus. As with other exporters like mongodb\_exporter or node\_exporter this has been implemented as a standalone-service to make reuse easier across different frameworks. So far it provides access to the following metrics: * ``celery_tasks`` exposes the number of tasks currently known to the queue grouped by ``state`` (RECEIVED, STARTED, ...). * ``celery_tasks_by_name`` exposes the number of tasks currently known to the queue grouped by ``name`` and ``state``. * ``celery_workers`` exposes the number of currently probably alive workers * ``celery_task_latency`` exposes a histogram of task latency, i.e. the time until tasks are picked up by a worker * ``celery_tasks_runtime_seconds`` tracks the number of seconds tasks take until completed as histogram How to use ========== There are multiple ways to install this. The obvious one is using ``pip install celery-prometheus-exporter`` and then using the ``celery-prometheus-exporter`` command:: $ celery-prometheus-exporter Starting HTTPD on 0.0.0.0:8888 This package only depends on Celery directly, so you will have to install whatever other dependencies you will need for it to speak with your broker 🙂 Celery workers have to be configured to send task-related events: http://docs.celeryproject.org/en/latest/userguide/configuration.html#worker-send-task-events. Running ``celery-prometheus-exporter`` with the ``--enable-events`` argument will periodically enable events on the workers. This is useful because it allows running celery workers with events disabled, until ``celery-prometheus-exporter`` is deployed, at which time events get enabled on the workers. Alternatively, you can use the bundle Makefile and Dockerfile to generate a Docker image. By default, the HTTPD will listen at ``0.0.0.0:8888``. If you want the HTTPD to listen to another port, use the ``--addr`` option or the environment variable ``DEFAULT_ADDR``. By default, this will expect the broker to be available through ``redis://redis:6379/0``, although you can change via environment variable ``BROKER_URL``. If you're using AMQP or something else other than Redis, take a look at the Celery documentation and install the additioinal requirements 😊 Also use the ``--broker`` option to specify a different broker URL. If you need to pass additional options to your broker's transport use the ``--transport-options`` option. It tries to read a dict from a JSON object. E.g. to set your master name when using Redis Sentinel for broker discovery: ``--transport-options '{"master_name": "mymaster"}'`` Use ``--tz`` to specify the timezone the Celery app is using. Otherwise the systems local time will be used. By default, buckets for histograms are the same as default ones in the prometheus client: https://github.com/prometheus/client_python#histogram. It means they are intended to cover typical web/rpc requests from milliseconds to seconds, so you may want to customize them. It can be done via environment variable ``RUNTIME_HISTOGRAM_BUCKETS`` for tasks runtime and via environment variable ``LATENCY_HISTOGRAM_BUCKETS`` for tasks latency. Buckets should be passed as a list of float values separated by a comma. E.g. ``".005, .05, 0.1, 1.0, 2.5"``. Use ``--queue-list`` to specify the list of queues that will have its length monitored (Automatic Discovery of queues isn't supported right now, see limitations/ caveats. You can use the `QUEUE_LIST` environment variable as well. If you then look at the exposed metrics, you should see something like this:: $ http get http://localhost:8888/metrics | grep celery_ # HELP celery_workers Number of alive workers # TYPE celery_workers gauge celery_workers 1.0 # HELP celery_tasks Number of tasks per state # TYPE celery_tasks gauge celery_tasks{state="RECEIVED"} 3.0 celery_tasks{state="PENDING"} 0.0 celery_tasks{state="STARTED"} 1.0 celery_tasks{state="RETRY"} 2.0 celery_tasks{state="FAILURE"} 1.0 celery_tasks{state="REVOKED"} 0.0 celery_tasks{state="SUCCESS"} 8.0 # HELP celery_tasks_by_name Number of tasks per state # TYPE celery_tasks_by_name gauge celery_tasks_by_name{name="my_app.tasks.calculate_something",state="RECEIVED"} 0.0 celery_tasks_by_name{name="my_app.tasks.calculate_something",state="PENDING"} 0.0 celery_tasks_by_name{name="my_app.tasks.calculate_something",state="STARTED"} 0.0 celery_tasks_by_name{name="my_app.tasks.calculate_something",state="RETRY"} 0.0 celery_tasks_by_name{name="my_app.tasks.calculate_something",state="FAILURE"} 0.0 celery_tasks_by_name{name="my_app.tasks.calculate_something",state="REVOKED"} 0.0 celery_tasks_by_name{name="my_app.tasks.calculate_something",state="SUCCESS"} 1.0 celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="RECEIVED"} 3.0 celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="PENDING"} 0.0 celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="STARTED"} 1.0 celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="RETRY"} 2.0 celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="FAILURE"} 1.0 celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="REVOKED"} 0.0 celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="SUCCESS"} 7.0 # HELP celery_task_latency Seconds between a task is received and started. # TYPE celery_task_latency histogram celery_task_latency_bucket{le="0.005"} 2.0 celery_task_latency_bucket{le="0.01"} 3.0 celery_task_latency_bucket{le="0.025"} 4.0 celery_task_latency_bucket{le="0.05"} 4.0 celery_task_latency_bucket{le="0.075"} 5.0 celery_task_latency_bucket{le="0.1"} 5.0 celery_task_latency_bucket{le="0.25"} 5.0 celery_task_latency_bucket{le="0.5"} 5.0 celery_task_latency_bucket{le="0.75"} 5.0 celery_task_latency_bucket{le="1.0"} 5.0 celery_task_latency_bucket{le="2.5"} 8.0 celery_task_latency_bucket{le="5.0"} 11.0 celery_task_latency_bucket{le="7.5"} 11.0 celery_task_latency_bucket{le="10.0"} 11.0 celery_task_latency_bucket{le="+Inf"} 11.0 celery_task_latency_count 11.0 celery_task_latency_sum 16.478713035583496 celery_queue_length{queue_name="queue1"} 35.0 celery_queue_length{queue_name="queue2"} 0.0 Limitations =========== * Among tons of other features celery-prometheus-exporter doesn't support stats for multiple queues. As far as I can tell, only the routing key is exposed through the events API which might be enough to figure out the final queue, though. * This has only been tested with Redis so far. * At this point, you should specify the queues that will be monitored using an environment variable or an arg (`--queue-list`). ================================================ FILE: celery_prometheus_exporter.py ================================================ from __future__ import print_function import argparse import celery import celery.states import celery.events import collections from itertools import chain import logging import prometheus_client import signal import sys import threading import time import json import os from celery.utils.objects import FallbackContext import amqp.exceptions __VERSION__ = (1, 2, 0, 'final', 0) def decode_buckets(buckets_list): return [float(x) for x in buckets_list.split(',')] def get_histogram_buckets_from_evn(env_name): if env_name in os.environ: buckets = decode_buckets(os.environ.get(env_name)) else: if hasattr(prometheus_client.Histogram, 'DEFAULT_BUCKETS'): # pragma: no cover buckets = prometheus_client.Histogram.DEFAULT_BUCKETS else: # pragma: no cover # For prometheus-client < 0.3.0 we cannot easily access # the default buckets: buckets = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, float('inf')) return buckets DEFAULT_BROKER = os.environ.get('BROKER_URL', 'redis://redis:6379/0') DEFAULT_ADDR = os.environ.get('DEFAULT_ADDR', '0.0.0.0:8888') DEFAULT_MAX_TASKS_IN_MEMORY = int(os.environ.get('DEFAULT_MAX_TASKS_IN_MEMORY', '10000')) RUNTIME_HISTOGRAM_BUCKETS = get_histogram_buckets_from_evn('RUNTIME_HISTOGRAM_BUCKETS') LATENCY_HISTOGRAM_BUCKETS = get_histogram_buckets_from_evn('LATENCY_HISTOGRAM_BUCKETS') DEFAULT_QUEUE_LIST = os.environ.get('QUEUE_LIST', []) LOG_FORMAT = '[%(asctime)s] %(name)s:%(levelname)s: %(message)s' TASKS = prometheus_client.Gauge( 'celery_tasks', 'Number of tasks per state', ['state']) TASKS_NAME = prometheus_client.Gauge( 'celery_tasks_by_name', 'Number of tasks per state and name', ['state', 'name']) TASKS_RUNTIME = prometheus_client.Histogram( 'celery_tasks_runtime_seconds', 'Task runtime (seconds)', ['name'], buckets=RUNTIME_HISTOGRAM_BUCKETS) WORKERS = prometheus_client.Gauge( 'celery_workers', 'Number of alive workers') LATENCY = prometheus_client.Histogram( 'celery_task_latency', 'Seconds between a task is received and started.', buckets=LATENCY_HISTOGRAM_BUCKETS) QUEUE_LENGTH = prometheus_client.Gauge( 'celery_queue_length', 'Number of tasks in the queue.', ['queue_name'] ) class MonitorThread(threading.Thread): """ MonitorThread is the thread that will collect the data that is later exposed from Celery using its eventing system. """ def __init__(self, app=None, *args, **kwargs): self._app = app self.log = logging.getLogger('monitor') self.log.info('Setting up monitor...') max_tasks_in_memory = kwargs.pop('max_tasks_in_memory', DEFAULT_MAX_TASKS_IN_MEMORY) self._state = self._app.events.State( max_tasks_in_memory=max_tasks_in_memory) self._known_states = set() self._known_states_names = set() self._tasks_started = dict() super(MonitorThread, self).__init__(*args, **kwargs) def run(self): # pragma: no cover self._monitor() def _process_event(self, evt): # Events might come in in parallel. Celery already has a lock # that deals with this exact situation so we'll use that for now. with self._state._mutex: if celery.events.group_from(evt['type']) == 'task': evt_state = evt['type'][5:] try: # Celery 4 state = celery.events.state.TASK_EVENT_TO_STATE[evt_state] except AttributeError: # pragma: no cover # Celery 3 task = celery.events.state.Task() task.event(evt_state) state = task.state if state == celery.states.STARTED: self._observe_latency(evt) self._collect_tasks(evt, state) def _observe_latency(self, evt): try: prev_evt = self._state.tasks[evt['uuid']] except KeyError: # pragma: no cover pass else: # ignore latency if it is a retry if prev_evt.state == celery.states.RECEIVED: LATENCY.observe( evt['local_received'] - prev_evt.local_received) def _collect_tasks(self, evt, state): if state in celery.states.READY_STATES: self._incr_ready_task(evt, state) else: # add event to list of in-progress tasks self._state._event(evt) self._collect_unready_tasks() def _incr_ready_task(self, evt, state): TASKS.labels(state=state).inc() try: # remove event from list of in-progress tasks event = self._state.tasks.pop(evt['uuid']) TASKS_NAME.labels(state=state, name=event.name).inc() if 'runtime' in evt: TASKS_RUNTIME.labels(name=event.name) \ .observe(evt['runtime']) except (KeyError, AttributeError): # pragma: no cover pass def _collect_unready_tasks(self): # count unready tasks by state cnt = collections.Counter(t.state for t in self._state.tasks.values()) self._known_states.update(cnt.elements()) for task_state in self._known_states: TASKS.labels(state=task_state).set(cnt[task_state]) # count unready tasks by state and name cnt = collections.Counter( (t.state, t.name) for t in self._state.tasks.values() if t.name) self._known_states_names.update(cnt.elements()) for task_state in self._known_states_names: TASKS_NAME.labels( state=task_state[0], name=task_state[1], ).set(cnt[task_state]) def _monitor(self): # pragma: no cover while True: try: self.log.info('Connecting to broker...') with self._app.connection() as conn: recv = self._app.events.Receiver(conn, handlers={ '*': self._process_event, }) setup_metrics(self._app) recv.capture(limit=None, timeout=None, wakeup=True) self.log.info("Connected to broker") except Exception: self.log.exception("Queue connection failed") setup_metrics(self._app) time.sleep(5) class WorkerMonitoringThread(threading.Thread): celery_ping_timeout_seconds = 5 periodicity_seconds = 5 def __init__(self, app=None, *args, **kwargs): self._app = app self.log = logging.getLogger('workers-monitor') super(WorkerMonitoringThread, self).__init__(*args, **kwargs) def run(self): # pragma: no cover while True: self.update_workers_count() time.sleep(self.periodicity_seconds) def update_workers_count(self): try: WORKERS.set(len(self._app.control.ping( timeout=self.celery_ping_timeout_seconds))) except Exception: # pragma: no cover self.log.exception("Error while pinging workers") class EnableEventsThread(threading.Thread): periodicity_seconds = 5 def __init__(self, app=None, *args, **kwargs): # pragma: no cover self._app = app self.log = logging.getLogger('enable-events') super(EnableEventsThread, self).__init__(*args, **kwargs) def run(self): # pragma: no cover while True: try: self.enable_events() except Exception: self.log.exception("Error while trying to enable events") time.sleep(self.periodicity_seconds) def enable_events(self): self._app.control.enable_events() class QueueLengthMonitoringThread(threading.Thread): periodicity_seconds = 30 def __init__(self, app, queue_list): # type: (celery.Celery, [str]) -> None self.celery_app = app self.queue_list = queue_list self.connection = self.celery_app.connection_or_acquire() if isinstance(self.connection, FallbackContext): self.connection = self.connection.fallback() super(QueueLengthMonitoringThread, self).__init__() def measure_queues_length(self): for queue in self.queue_list: try: length = self.connection.default_channel.queue_declare(queue=queue, passive=True).message_count except (amqp.exceptions.ChannelError,) as e: logging.warning("Queue Not Found: {}. Setting its value to zero. Error: {}".format(queue, str(e))) length = 0 self.set_queue_length(queue, length) def set_queue_length(self, queue, length): QUEUE_LENGTH.labels(queue).set(length) def run(self): # pragma: no cover while True: self.measure_queues_length() time.sleep(self.periodicity_seconds) def setup_metrics(app): """ This initializes the available metrics with default values so that even before the first event is received, data can be exposed. """ WORKERS.set(0) logging.info('Setting up metrics, trying to connect to broker...') try: registered_tasks = app.control.inspect().registered_tasks().values() except Exception: # pragma: no cover for metric in TASKS.collect(): for sample in metric.samples: TASKS.labels(**sample[1]).set(0) for metric in TASKS_NAME.collect(): for sample in metric.samples: TASKS_NAME.labels(**sample[1]).set(0) else: for state in celery.states.ALL_STATES: TASKS.labels(state=state).set(0) for task_name in set(chain.from_iterable(registered_tasks)): TASKS_NAME.labels(state=state, name=task_name).set(0) def start_httpd(addr): # pragma: no cover """ Starts the exposing HTTPD using the addr provided in a separate thread. """ host, port = addr.split(':') logging.info('Starting HTTPD on {}:{}'.format(host, port)) prometheus_client.start_http_server(int(port), host) def shutdown(signum, frame): # pragma: no cover """ Shutdown is called if the process receives a TERM signal. This way we try to prevent an ugly stacktrace being rendered to the user on a normal shutdown. """ logging.info("Shutting down") sys.exit(0) def main(): # pragma: no cover parser = argparse.ArgumentParser() parser.add_argument( '--broker', dest='broker', default=DEFAULT_BROKER, help="URL to the Celery broker. Defaults to {}".format(DEFAULT_BROKER)) parser.add_argument( '--transport-options', dest='transport_options', help=("JSON object with additional options passed to the underlying " "transport.")) parser.add_argument( '--addr', dest='addr', default=DEFAULT_ADDR, help="Address the HTTPD should listen on. Defaults to {}".format( DEFAULT_ADDR)) parser.add_argument( '--enable-events', action='store_true', help="Periodically enable Celery events") parser.add_argument( '--tz', dest='tz', help="Timezone used by the celery app.") parser.add_argument( '--verbose', action='store_true', default=False, help="Enable verbose logging") parser.add_argument( '--max_tasks_in_memory', dest='max_tasks_in_memory', default=DEFAULT_MAX_TASKS_IN_MEMORY, type=int, help="Tasks cache size. Defaults to {}".format( DEFAULT_MAX_TASKS_IN_MEMORY)) parser.add_argument( '--queue-list', dest='queue_list', default=DEFAULT_QUEUE_LIST, nargs='+', help="Queue List. Will be checked for its length." ) parser.add_argument( '--version', action='version', version='.'.join([str(x) for x in __VERSION__])) opts = parser.parse_args() if opts.verbose: logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT) else: logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) signal.signal(signal.SIGINT, shutdown) signal.signal(signal.SIGTERM, shutdown) if opts.tz: os.environ['TZ'] = opts.tz time.tzset() logging.info('Setting up celery for {}'.format(opts.broker)) app = celery.Celery(broker=opts.broker) if opts.transport_options: try: transport_options = json.loads(opts.transport_options) except ValueError: print("Error parsing broker transport options from JSON '{}'" .format(opts.transport_options), file=sys.stderr) sys.exit(1) else: app.conf.broker_transport_options = transport_options setup_metrics(app) t = MonitorThread(app=app, max_tasks_in_memory=opts.max_tasks_in_memory) t.daemon = True t.start() w = WorkerMonitoringThread(app=app) w.daemon = True w.start() if opts.queue_list: if type(opts.queue_list) == str: queue_list = opts.queue_list.split(',') else: queue_list = opts.queue_list q = QueueLengthMonitoringThread(app=app, queue_list=queue_list) q.daemon = True q.start() e = None if opts.enable_events: e = EnableEventsThread(app=app) e.daemon = True e.start() start_httpd(opts.addr) t.join() w.join() if e is not None: e.join() if __name__ == '__main__': # pragma: no cover main() ================================================ FILE: celeryapp.py ================================================ from celery import Celery from kombu import Queue, Exchange import os import time BROKER_URL = os.getenv("BROKER_URL") RESULT_BACKEND_URL = os.getenv("RESULT_BACKEND_URL", None) celery_app = Celery( broker=BROKER_URL, ) if RESULT_BACKEND_URL: celery_app.conf.update(backend=RESULT_BACKEND_URL) celery_app.conf.update( CELERY_DEFAULT_QUEUE="queue1", CELERY_QUEUES=( Queue('queue1', exchange=Exchange('queue1', type='direct'), routing_key='queue1'), Queue('queue2', exchange=Exchange('queue2', type='direct'), routing_key='queue2'), Queue('queue3', exchange=Exchange('queue3', type='direct'), routing_key='queue3'), ), CELERY_ROUTES={ 'task1': {'queue': 'queue1', 'routing_key': 'queue1'}, 'task2': {'queue': 'queue2', 'routing_key': 'queue2'}, 'task3': {'queue': 'queue3', 'routing_key': 'queue3'}, } ) @celery_app.task def task1(): time.sleep(20) @celery_app.task def task2(): time.sleep(20) @celery_app.task def task3(): time.sleep(20) ================================================ FILE: docker-compose.yml ================================================ version: '2' services: app: image: celery-exporter:3 build: context: . dockerfile: Dockerfile-celery3 user: "65534" volumes: - ./:/app environment: - BROKER_URL=amqp://rabbit entrypoint: celery -A celeryapp worker exporter: image: celery-exporter:3 build: context: . dockerfile: Dockerfile-celery3 volumes: - ./:/app environment: - BROKER_URL=amqp://rabbit - QUEUE_LIST=queue1,queue2,queue3 ports: - 8888:8888 cache: image: redis:alpine rabbit: image: rabbitmq:alpine ================================================ FILE: docker-entrypoint.sh ================================================ #!/bin/sh exec python /app/celery_prometheus_exporter.py $@ ================================================ FILE: requirements/base.txt ================================================ redis==2.10.6 ================================================ FILE: requirements/celery3.txt ================================================ -r base.txt celery==3.1.25 ================================================ FILE: requirements/celery4.txt ================================================ -r base.txt celery==4.2.0 kombu==4.3.0 ================================================ FILE: requirements/promclient030.txt ================================================ prometheus_client==0.3.0 ================================================ FILE: requirements/promclient050.txt ================================================ prometheus_client==0.5.0 ================================================ FILE: requirements/test.txt ================================================ -r base.txt pytest coverage ================================================ FILE: setup.py ================================================ import io from setuptools import setup long_description = "See https://github.com/zerok/celery-prometheus-exporter" with io.open('README.rst', encoding='utf-8') as fp: long_description = fp.read() setup( name='celery-prometheus-exporter', description="Simple Prometheus metrics exporter for Celery", long_description=long_description, version='1.7.0', author='Horst Gutmann', license='MIT', author_email='horst@zerokspot.com', url='https://github.com/zerok/celery-prometheus-exporter', classifiers=[ 'Development Status :: 3 - Alpha', 'Environment :: Console', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3 :: Only', ], py_modules=[ 'celery_prometheus_exporter', ], install_requires=[ 'celery>=3', 'prometheus_client>=0.0.20', ], entry_points={ 'console_scripts': [ 'celery-prometheus-exporter = celery_prometheus_exporter:main', ], } ) ================================================ FILE: test/celery_test_utils.py ================================================ import celery import time from kombu import Queue, Exchange def get_celery_app(queue=None): app = celery.Celery(broker='memory://', backend='cache+memory://') if queue: app.conf.update( CELERY_DEFAULT_QUEUE=queue, CELERY_QUEUES=( Queue(queue, exchange=Exchange(queue, type='direct'), routing_key=queue), ), CELERY_ROUTES={ 'task1': {'queue': queue, 'routing_key': queue}, } ) return app class SampleTask(celery.Task): name = 'sample-task' def run(self, *args, **kwargs): time.sleep(10) ================================================ FILE: test/test_unit.py ================================================ from time import time import os import celery import celery.states import amqp.exceptions from celery.events import Event from celery.utils import uuid from prometheus_client import REGISTRY from unittest import TestCase try: from unittest.mock import patch except ImportError: from mock import patch from celery_prometheus_exporter import ( WorkerMonitoringThread, setup_metrics, MonitorThread, EnableEventsThread, TASKS, get_histogram_buckets_from_evn, QueueLengthMonitoringThread, QUEUE_LENGTH) from celery_test_utils import get_celery_app, SampleTask class TestBucketLoading(TestCase): def tearDown(self): if 'TEST_BUCKETS' in os.environ: del os.environ['TEST_BUCKETS'] def test_default_buckets(self): self.assertIsNotNone(get_histogram_buckets_from_evn('TEST_BUCKETS')) def test_from_env(self): os.environ['TEST_BUCKETS'] = '1,2,3' self.assertEqual([1.0, 2.0, 3.0], get_histogram_buckets_from_evn('TEST_BUCKETS')) class TestFallbackSetup(TestCase): def test_fallback(self): TASKS.labels(state='RUNNING').set(0) setup_metrics(None) class TestMockedCelery(TestCase): task = 'my_task' def setUp(self): self.app = get_celery_app() with patch('celery.task.control.inspect.registered_tasks') as tasks: tasks.return_value = {'worker1': [self.task]} setup_metrics(self.app) # reset metrics def test_initial_metric_values(self): self._assert_task_states(celery.states.ALL_STATES, 0) assert REGISTRY.get_sample_value('celery_workers') == 0 assert REGISTRY.get_sample_value('celery_task_latency_count') == 0 assert REGISTRY.get_sample_value('celery_task_latency_sum') == 0 def test_workers_count(self): assert REGISTRY.get_sample_value('celery_workers') == 0 with patch.object(self.app.control, 'ping') as mock_ping: w = WorkerMonitoringThread(app=self.app) mock_ping.return_value = [] w.update_workers_count() assert REGISTRY.get_sample_value('celery_workers') == 0 mock_ping.return_value = [0] # 1 worker w.update_workers_count() assert REGISTRY.get_sample_value('celery_workers') == 1 mock_ping.return_value = [0, 0] # 2 workers w.update_workers_count() assert REGISTRY.get_sample_value('celery_workers') == 2 mock_ping.return_value = [] w.update_workers_count() assert REGISTRY.get_sample_value('celery_workers') == 0 def test_tasks_events(self): task_uuid = uuid() hostname = 'myhost' local_received = time() latency_before_started = 123.45 runtime = 234.5 m = MonitorThread(app=self.app) self._assert_task_states(celery.states.ALL_STATES, 0) assert REGISTRY.get_sample_value('celery_task_latency_count') == 0 assert REGISTRY.get_sample_value('celery_task_latency_sum') == 0 m._process_event(Event( 'task-received', uuid=task_uuid, name=self.task, args='()', kwargs='{}', retries=0, eta=None, hostname=hostname, clock=0, local_received=local_received)) self._assert_all_states({celery.states.RECEIVED}) m._process_event(Event( 'task-started', uuid=task_uuid, hostname=hostname, clock=1, name=self.task, local_received=local_received + latency_before_started)) self._assert_all_states({celery.states.STARTED}) m._process_event(Event( 'task-succeeded', uuid=task_uuid, result='42', runtime=runtime, hostname=hostname, clock=2, local_received=local_received + latency_before_started + runtime)) self._assert_all_states({celery.states.SUCCESS}) assert REGISTRY.get_sample_value('celery_task_latency_count') == 1 self.assertAlmostEqual(REGISTRY.get_sample_value( 'celery_task_latency_sum'), latency_before_started) assert REGISTRY.get_sample_value( 'celery_tasks_runtime_seconds_count', labels=dict(name=self.task)) == 1 assert REGISTRY.get_sample_value( 'celery_tasks_runtime_seconds_sum', labels=dict(name=self.task)) == 234.5 def test_enable_events(self): with patch.object( self.app.control, 'enable_events') as mock_enable_events: e = EnableEventsThread(app=self.app) e.enable_events() mock_enable_events.assert_called_once_with() def test_can_measure_queue_length(self): celery_app = get_celery_app(queue='realqueue') sample_task = SampleTask() sample_task.app = celery_app monitoring_thread_instance = QueueLengthMonitoringThread(celery_app, queue_list=['realqueue']) sample_task.delay() monitoring_thread_instance.measure_queues_length() sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'realqueue'}) self.assertEqual(1.0, sample) def test_set_zero_on_queue_length_when_an_channel_layer_error_occurs_during_queue_read(self): instance = QueueLengthMonitoringThread(app=self.app, queue_list=['noqueue']) instance.measure_queues_length() sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'noqueue'}) self.assertEqual(0.0, sample) def _assert_task_states(self, states, cnt): for state in states: assert REGISTRY.get_sample_value( 'celery_tasks', labels=dict(state=state)) == cnt task_by_name_label = dict(state=state, name=self.task) assert REGISTRY.get_sample_value( 'celery_tasks_by_name', labels=task_by_name_label) == cnt def _assert_all_states(self, exclude): self._assert_task_states(celery.states.ALL_STATES - exclude, 0) self._assert_task_states(exclude, 1) def _setup_task_with_celery_and_queue_support(self, queue_name, task, celery_app): task.app = celery_app return task ================================================ FILE: tox.ini ================================================ [tox] envlist = py{27,34,35,36}-celery{3,4}-promclient{030,050}, lint [testenv] deps = -rrequirements/test.txt py27: mock promclient030: -rrequirements/promclient030.txt promclient050: -rrequirements/promclient050.txt celery3: -rrequirements/celery3.txt celery4: -rrequirements/celery4.txt commands = coverage run -m py.test -s -v {toxinidir}/test/ coverage report [testenv:lint] basepython = python3 deps = flake8>=3.3.0,<4 commands = flake8 --max-complexity 15 celery_prometheus_exporter.py test