Repository: deskmen/twisted-monitor Branch: master Commit: 5c9cfea59b89 Files: 18 Total size: 13.5 KB Directory structure: gitextract_ekwmj_tw/ ├── README.md ├── monitor_client/ │ ├── __init__.py │ ├── core/ │ │ ├── __init__.py │ │ └── heartbeat.py │ ├── main.py │ ├── plugins/ │ │ ├── __init__.py │ │ ├── cpu.py │ │ ├── load.py │ │ └── plugin_api.py │ └── runserver.py └── monitor_server/ ├── __init__.py ├── conf/ │ ├── __init__.py │ ├── hosts.py │ ├── services/ │ │ ├── __init__.py │ │ ├── generic.py │ │ └── linux.py │ └── templates.py └── runserver.tac ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ #twisted-monitor 此程序用于监控任何需要监控的服务 服务端程序可以分布部署在不同地区的服务器上,客户端连接的不同地域服务端程序 #服务端需要安装twisted pip install twisted #添加服务端监控主机: monitor_server/conf/hosts.py web_clusters.hosts,mysql_groups.hosts中添加监控客户端 web_clusters.hosts = ['192.168.1.11'] mysql_groups.hosts = ['192.168.1.11','192.168.1.12'] #启动服务端: cd monitor_server twistd -y runserver.tac #查看监控log: tail -f twistd.log #配置客户端: monitor_client/core/heartbeat.py self.host = '192.168.1.10' #启动客户端: cd monitor_client python runserver.py start #查看监控 tail -f runserver.log ================================================ FILE: monitor_client/__init__.py ================================================ ================================================ FILE: monitor_client/core/__init__.py ================================================ ================================================ FILE: monitor_client/core/heartbeat.py ================================================ #!/usr/bin/env python import socket import json class connection_server(object): def __init__(self): self.host = "192.168.1.10" self.port = 8007 def client_config(self): s = socket.socket(socket.AF_INET,socket.SOCK_STREAM) s.connect((self.host,self.port)) mark = s.recv(1024) sgin = json.loads(mark) s.close() return sgin def send_data(self,data): s = socket.socket(socket.AF_INET,socket.SOCK_STREAM) s.connect((self.host,self.port)) mark = s.send(data) s.close() ================================================ FILE: monitor_client/main.py ================================================ #!/usr/bin/env python import socket import json import time import threading from core.heartbeat import * from plugins import plugin_api class MonitorClient(object): def __init__(self): self.sgin = connection_server() self.host_config = self.sgin.client_config() print self.host_config def start(self): self.handle() def get_host_config(self): pass def handle(self): if self.host_config: while 1: for service,val in self.host_config.items(): if len(val) < 3: self.host_config[service].append(0) plugin_name,interval,last_run_timestrf = val now_time_strftime = time.time() last_time = now_time_strftime - last_run_timestrf if last_time < interval: next_run_time = interval - last_time print "service %s next run time %s"%(service,next_run_time) else: print "going to run the %s again"%service self.host_config[service][2] = time.time() t = threading.Thread(target=self.call_plugin,args=(service,plugin_name,interval)) t.start() time.sleep(1) else: print "cannot get host config" def call_plugin(self,service,plugin_name,interval): func = getattr(plugin_api,plugin_name) report_data = { 'service':service, 'data':func(), 'timestrf':time.time(), 'interval':interval } r_data = json.dumps(report_data) self.sgin.send_data(r_data) ================================================ FILE: monitor_client/plugins/__init__.py ================================================ ================================================ FILE: monitor_client/plugins/cpu.py ================================================ #!/usr/bin/env python import commands def monitor(): shell_command = "sar 1 3| grep 'Average:'" status,result = commands.getstatusoutput(shell_command) if status != 0: value_dic = {"status":status} else: value_dic = {} user,nice,system,iowait,steal,idle = result.split()[2:] value_dic = { 'user':user, 'nice':nice, 'system':system, 'iowait':iowait, 'steal':steal, 'idle':idle, 'status':status } return value_dic ================================================ FILE: monitor_client/plugins/load.py ================================================ #!/usr/bin/env python import commands def monitor(): shell_command = "uptime" status,result = commands.getstatusoutput(shell_command) if status != 0: value_dic = {"status":status} else: value_dic = {} uptime = result.split(',')[:-1][0] load1,load5,load15 = result.split('load average:')[1].split(',') value_dic = { 'uptime':uptime, 'load1':load1, 'load5':load5, 'load15':load15, 'status':status } return value_dic ================================================ FILE: monitor_client/plugins/plugin_api.py ================================================ #!/usr/bin/env python import cpu,load def get_cpu_status(): return cpu.monitor() def get_load_status(): return load.monitor() ================================================ FILE: monitor_client/runserver.py ================================================ #!/usr/bin/env python #coding:utf-8 import sys import os from main import MonitorClient def daemonize(stdin='/dev/null',stdout='/dev/null',stderr='/dev/null'): try: pid = os.fork() if pid > 0: sys.exit(0) except OSError,e: sys.stderr.write("fork #1 failed:(%d) %s\n"%(e.errno,e.strerror)) sys.exit(1) os.chdir("/") os.umask(0) os.setsid() try: pid = os.fork() if pid > 0: sys.exit(0) except OSError,e: sys.stderr.write("fork #2 failed:(%d) %s\n"%(e.errno,e.strerror)) sys.exit(1) for f in sys.stdout,sys.stderr:f.flush() si = file(stdin,'r') so = file(stdout,'a+') se = file(stderr,'a+',0) os.dup2(si.fileno(),sys.stdin.fileno()) os.dup2(so.fileno(),sys.stdout.fileno()) os.dup2(se.fileno(),sys.stderr.fileno()) def run(): sys.stdout.write('daemon started with pid %d\n'%os.getpid()) pid = os.getpid() with open(process_pid,"w") as f: f.write("%d"%pid) sys.stdout.flush() dk = MonitorClient() dk.start() if __name__ == '__main__': BASE_DIR = os.path.abspath(os.path.dirname(__file__)) process_pid = "%s/%s.pid"%(BASE_DIR,sys.argv[0].strip(".py")) process_log = "%s/%s.log"%(BASE_DIR,sys.argv[0].strip(".py")) if len(sys.argv) != 2: print "参数数目输入错误,ps:python %s start|stop|restart"%sys.argv[0] elif sys.argv[1] != "start" and sys.argv[1] != "stop" and sys.argv[1] != "restart": print "参数输入错误,ps:start|stop|restart" elif sys.argv[1] == "start": print "the process is start" daemonize('/dev/null',process_log,process_log) run() elif sys.argv[1] == "stop": with open(process_pid,"r") as f: pid = f.read() os.kill(int(pid),9) print "the process is stop" elif sys.argv[1] == "restart": try: with open(process_pid,"r") as f: pid = f.read() os.kill(int(pid),9) print "the process is stop" print "the process is start" daemonize('/dev/null',process_log,process_log) run() except OSError,e: print e print "the process is start" daemonize('/dev/null',process_log,process_log) run() except: print "error" ================================================ FILE: monitor_server/__init__.py ================================================ ================================================ FILE: monitor_server/conf/__init__.py ================================================ ================================================ FILE: monitor_server/conf/hosts.py ================================================ import templates web_clusters = templates.linuxgenerictemplate() web_clusters.hosts = [ '192.168.1.11', ] mysql_groups = templates.linux2() mysql_groups.hosts = [ '192.168.1.11', '192.168.1.12', ] monitor_group = [web_clusters,mysql_groups] def send_config(): host_config_dict = {} for group in monitor_group: for host in group.hosts: if host not in host_config_dict: host_config_dict[host] = {} for s in group.services: host_config_dict[host][s.name] = [s.plugin_name,s.interval] return host_config_dict def all_config(client): host_config_dict = {} for group in monitor_group: if client in group.hosts: for s in group.services: host_config_dict[s.name] = [s.triggers] return host_config_dict ================================================ FILE: monitor_server/conf/services/__init__.py ================================================ ================================================ FILE: monitor_server/conf/services/generic.py ================================================ class BaseService(object): def __init__(self): self.name = "Basename" self.interval = 300 self.plugin_name = "plugin_name" self.triggers = {} ================================================ FILE: monitor_server/conf/services/linux.py ================================================ from generic import BaseService class CPU(BaseService): def __init__(self): super(CPU,self).__init__() self.interval = 30 self.name = "linux_cpu" self.plugin_name = "get_cpu_status" self.triggers = { 'idle':{ 'func':'avg', 'last':10*60, 'count':1, 'operator':'lt', 'warning':40, 'critical':30, "data_type":float }, 'iowait':{ 'func':'hit', 'last':10*60, 'count':5, 'operator':'gt', 'warning':30, 'critical':40, 'data_type':float } } class LOAD(BaseService): def __init__(self): super(LOAD,self).__init__() self.interval = 30 self.name = "linux_load" self.plugin_name = "get_load_status" self.triggers = { 'load1':{ 'func':'hit', 'last':10*60, 'count':1, 'operator':'gt', 'warning':5, 'critical':10, "data_type":float }, 'load5':{ 'func':'hit', 'last':10*60, 'count':1, 'operator':'gt', 'warning':1, 'critical':10, 'data_type':float }, 'load15':{ 'func':'hit', 'last':10*60, 'count':1, 'operator':'gt', 'warning':5, 'critical':10, 'data_type':float } } class MEMORY(BaseService): def __init__(self): super(MEMORY,self).__init__() self.interval = 20 self.name = "linux_memory" self.plugin_name = "get_memory_status" self.triggers = { 'usage':{ 'func':'avg', 'last':5*60, 'count':1, 'operator':'gt', 'warning':80, 'critical':90, 'data_type':float } } class NETWORK(BaseService): def __init__(self): super(NETWORK,self).__init__() self.interval = 60 self.name = "linux_network" self.plugin_name = "get_network_status" self.triggers = { 'in':{ 'func':'hit', 'last':10*60, 'count':5, 'operator':'gt', 'warning':1024*1024*10, 'critical':1024*1024*15, 'data_type':float }, 'out':{ 'func':'hit', 'last':10*60, 'count':5, 'operator':'gt', 'warning':1024*1024*10, 'critical':1024*1024*15, 'data_type':float } } ================================================ FILE: monitor_server/conf/templates.py ================================================ from services import linux class BaseTemplate(object): def __init__(self): self.name = 'name' self.hosts = [] self.services = [] class linuxgenerictemplate(BaseTemplate): def __init__(self): super(linuxgenerictemplate,self).__init__() self.name = "linuxcommonservices" self.services = [ linux.CPU(), linux.LOAD(), ] self.services[0].interval = 60 class linux2(BaseTemplate): def __init__(self): super(linux2,self).__init__() self.name = "linux2" self.services = [ linux.CPU(), linux.NETWORK() ] ================================================ FILE: monitor_server/runserver.tac ================================================ #!/usr/bin/env python #coding:utf-8 from twisted.internet.protocol import Protocol from twisted.internet.protocol import Factory from twisted.internet import reactor from twisted.application import service,internet import time import operator import json from conf.hosts import * class Echo(Protocol): '''协议类实现用户的服务协议,例如 http,ftp,ssh 等''' def __init__(self, factory): self.factory = factory def connectionMade(self): '''连接建立时被回调的方法''' client = self.transport.getPeer().host hosts_template = send_config() print "%s 已连接"%client if client not in hosts_template: print "%s 没有加入主机监控组"%client self.transport.loseConnection() else: host_template = hosts_template[client] self.transport.write(json.dumps(host_template)) def connectionLost(self, reason): '''连接关闭时被回调的方法''' client = self.transport.getPeer().host print "%s 已断开"%client #self.factory.numProtocols = self.factory.numProtocols - 1 def dataReceived(self, data): '''接收数据的函数,当有数据到达时被回调''' client = self.transport.getPeer().host client_info = json.loads(data) client_service = client_info["service"] client_data = client_info["data"] client_timestrf = client_info["timestrf"] client_interval = client_info["interval"] client_config_all = all_config(client) client_config_info = client_config_all[client_service] client_config = client_config_info[0] if time.time() - client_timestrf < client_interval: if client_info["data"]["status"] == 0: print "%s service %s data valid"%(client,client_service) self.service_item_handle(client_config,client_data,client_service,client) else: print "%s service %s plugin error"%(client,client_service) else: expired_time = time.time() - client_timestrf - client_interval print "%s service %s data expired"%(client,client_service) def service_item_handle(self,client_config,client_data,client_service,client): for k,v in client_config.items(): print k,client_data[k] oper = v["operator"] warning_val = v["warning"] critical_val = v["critical"] oper_func = getattr(operator,oper) if v["data_type"] is float: item_data = float(client_data[k]) warning_res = oper_func(item_data,warning_val) critical_res = oper_func(item_data,critical_val) print "warning:%s critical:%s"%(warning_val,critical_val) print "warning:%s critical:%s"%(warning_res,critical_res) class EchoFactory(Factory): '''协议工厂类,当客户端建立连接的时候,创建协议对象,协议对象与客户端连接一一对应''' def buildProtocol(self, addr): return Echo(self) if __name__ == '__builtin__': # 创建监听端口 application = service.Application("echo") echoService = internet.TCPServer(8007,EchoFactory()) echoService.setServiceParent(application)