[
  {
    "path": "README.md",
    "content": "#twisted-monitor\n\n此程序用于监控任何需要监控的服务\n服务端程序可以分布部署在不同地区的服务器上,客户端连接的不同地域服务端程序\n\n#服务端需要安装twisted\npip install twisted\n\n#添加服务端监控主机:\nmonitor_server/conf/hosts.py\nweb_clusters.hosts,mysql_groups.hosts中添加监控客户端\nweb_clusters.hosts = ['192.168.1.11']\nmysql_groups.hosts = ['192.168.1.11','192.168.1.12']\n\n\n#启动服务端:\ncd monitor_server\ntwistd -y runserver.tac\n#查看监控log:\ntail -f twistd.log\n\n\n\n#配置客户端:\nmonitor_client/core/heartbeat.py\nself.host = '192.168.1.10'\n\n#启动客户端:\ncd monitor_client\npython runserver.py start\n#查看监控\ntail -f runserver.log\n"
  },
  {
    "path": "monitor_client/__init__.py",
    "content": ""
  },
  {
    "path": "monitor_client/core/__init__.py",
    "content": ""
  },
  {
    "path": "monitor_client/core/heartbeat.py",
    "content": "#!/usr/bin/env python\n\nimport socket\nimport json\n\n\n\nclass connection_server(object):\n    def __init__(self):\n        self.host = \"192.168.1.10\"\n        self.port = 8007\n    def client_config(self):\n        s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)\n        s.connect((self.host,self.port))\n        mark = s.recv(1024)\n        sgin = json.loads(mark)\n        s.close()\n        return sgin\n    def send_data(self,data):\n        s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)\n        s.connect((self.host,self.port))\n        mark = s.send(data)\n        s.close()\n\n"
  },
  {
    "path": "monitor_client/main.py",
    "content": "#!/usr/bin/env python\n\nimport socket\nimport json\nimport time\nimport threading\nfrom core.heartbeat import * \nfrom plugins import plugin_api\n\n\nclass MonitorClient(object):\n    def __init__(self):\n        self.sgin = connection_server()\n        self.host_config = self.sgin.client_config()\n        print self.host_config\n    def start(self):\n        self.handle() \n    def get_host_config(self):\n        pass\n    def handle(self):\n        if self.host_config:\n            while 1:\n                for service,val in self.host_config.items():\n                    if len(val) < 3:\n                        self.host_config[service].append(0)\n                    plugin_name,interval,last_run_timestrf = val\n                    now_time_strftime = time.time()\n                    last_time = now_time_strftime - last_run_timestrf \n                    if last_time < interval:\n                        next_run_time = interval - last_time \n                        print \"service %s next run time %s\"%(service,next_run_time)\n                    else:\n                        print \"going to run the %s again\"%service\n                        self.host_config[service][2] = time.time() \n                        t = threading.Thread(target=self.call_plugin,args=(service,plugin_name,interval))\n                        t.start()\n                time.sleep(1)\n        else:\n            print \"cannot get host config\"\n    def call_plugin(self,service,plugin_name,interval):\n        func = getattr(plugin_api,plugin_name)\n        report_data = {\n            'service':service,\n            'data':func(),\n            'timestrf':time.time(),\n            'interval':interval\n        }\n        r_data = json.dumps(report_data)\n        self.sgin.send_data(r_data)\n"
  },
  {
    "path": "monitor_client/plugins/__init__.py",
    "content": ""
  },
  {
    "path": "monitor_client/plugins/cpu.py",
    "content": "#!/usr/bin/env python\n\nimport commands\n\n\ndef monitor():\n    shell_command = \"sar 1 3| grep 'Average:'\"\n    status,result = commands.getstatusoutput(shell_command)\n    if status != 0:\n        value_dic = {\"status\":status}\n    else:\n        value_dic = {}\n        user,nice,system,iowait,steal,idle = result.split()[2:]\n        value_dic = {\n            'user':user,\n            'nice':nice,\n            'system':system,\n            'iowait':iowait,\n            'steal':steal,\n            'idle':idle,\n            'status':status\n        }\n    return value_dic\n"
  },
  {
    "path": "monitor_client/plugins/load.py",
    "content": "#!/usr/bin/env python\n\nimport commands\n\n\ndef monitor():\n    shell_command = \"uptime\"\n    status,result = commands.getstatusoutput(shell_command)\n    if status != 0:\n        value_dic = {\"status\":status}\n    else:\n        value_dic = {}\n        uptime = result.split(',')[:-1][0]\n        load1,load5,load15 = result.split('load average:')[1].split(',')\n        value_dic = {\n            'uptime':uptime,\n            'load1':load1,\n            'load5':load5,\n            'load15':load15,\n            'status':status\n        }\n    return value_dic\n"
  },
  {
    "path": "monitor_client/plugins/plugin_api.py",
    "content": "#!/usr/bin/env python\n\nimport cpu,load\n\n\ndef get_cpu_status():\n    return cpu.monitor()\ndef get_load_status():\n    return load.monitor()\n\n\n"
  },
  {
    "path": "monitor_client/runserver.py",
    "content": "#!/usr/bin/env python\n#coding:utf-8\n\nimport sys\nimport os\nfrom main import MonitorClient\n\ndef daemonize(stdin='/dev/null',stdout='/dev/null',stderr='/dev/null'):\n    try:\n        pid = os.fork()\n        if pid > 0:\n            sys.exit(0)\n    except OSError,e:\n        sys.stderr.write(\"fork #1 failed:(%d) %s\\n\"%(e.errno,e.strerror))\n        sys.exit(1)\n\n    os.chdir(\"/\")\n    os.umask(0)\n    os.setsid()\n    try:\n        pid = os.fork()\n        if pid > 0:\n            sys.exit(0)\n    except OSError,e:\n        sys.stderr.write(\"fork #2 failed:(%d) %s\\n\"%(e.errno,e.strerror))\n        sys.exit(1)\n    for f in sys.stdout,sys.stderr:f.flush()\n    si = file(stdin,'r')\n    so = file(stdout,'a+')\n    se = file(stderr,'a+',0)\n    os.dup2(si.fileno(),sys.stdin.fileno())\n    os.dup2(so.fileno(),sys.stdout.fileno())\n    os.dup2(se.fileno(),sys.stderr.fileno())\n\n\ndef run():\n    sys.stdout.write('daemon started with pid %d\\n'%os.getpid())\n    pid = os.getpid()\n    with open(process_pid,\"w\") as f:\n        f.write(\"%d\"%pid)\n    sys.stdout.flush()\n    dk = MonitorClient()\n    dk.start()\n\n\n\nif __name__ == '__main__':\n    BASE_DIR = os.path.abspath(os.path.dirname(__file__))\n    process_pid = \"%s/%s.pid\"%(BASE_DIR,sys.argv[0].strip(\".py\"))\n    process_log = \"%s/%s.log\"%(BASE_DIR,sys.argv[0].strip(\".py\"))\n    if len(sys.argv) != 2:\n        print \"参数数目输入错误,ps:python %s start|stop|restart\"%sys.argv[0]\n    elif sys.argv[1] != \"start\" and sys.argv[1] != \"stop\" and sys.argv[1] != \"restart\":\n        print \"参数输入错误,ps:start|stop|restart\"\n    elif sys.argv[1] == \"start\":\n        print \"the process is start\"\n        daemonize('/dev/null',process_log,process_log)\n        run()\n    elif sys.argv[1] == \"stop\":\n        with open(process_pid,\"r\") as f:\n            pid = f.read()\n        os.kill(int(pid),9)\n        print \"the process is stop\"\n    elif sys.argv[1] == \"restart\":\n        try:\n            with open(process_pid,\"r\") as f:\n                pid = f.read()\n                os.kill(int(pid),9)\n            print \"the process is stop\"\n            print \"the process is start\"\n            daemonize('/dev/null',process_log,process_log)\n            run()\n        except OSError,e:\n            print e\n            print \"the process is start\"\n            daemonize('/dev/null',process_log,process_log)\n            run()\n        except:\n            print \"error\"\n\n"
  },
  {
    "path": "monitor_server/__init__.py",
    "content": ""
  },
  {
    "path": "monitor_server/conf/__init__.py",
    "content": ""
  },
  {
    "path": "monitor_server/conf/hosts.py",
    "content": "\nimport templates\n\nweb_clusters = templates.linuxgenerictemplate()\n\nweb_clusters.hosts = [\n\t\t'192.168.1.11',\n\t\t]\n\n\n\nmysql_groups = templates.linux2()\n\nmysql_groups.hosts = [\n\t\t'192.168.1.11',\n\t\t'192.168.1.12',\n\t\t]\t\t\n\nmonitor_group = [web_clusters,mysql_groups]\n\n\ndef send_config():\n\thost_config_dict = {}\n\tfor group in monitor_group:\n\t\tfor host in group.hosts:\n\t\t\tif host not in host_config_dict:\n\t\t\t\thost_config_dict[host] = {}\n\t\t\tfor s in group.services:\n\t\t\t\thost_config_dict[host][s.name] = [s.plugin_name,s.interval]\n\treturn host_config_dict\n\ndef all_config(client): \n\thost_config_dict = {}\n\tfor group in monitor_group:\n\t\tif client in group.hosts:\n\t\t\tfor s in group.services:\n\t\t\t\thost_config_dict[s.name] = [s.triggers]\n\treturn host_config_dict\n\n"
  },
  {
    "path": "monitor_server/conf/services/__init__.py",
    "content": ""
  },
  {
    "path": "monitor_server/conf/services/generic.py",
    "content": "\nclass BaseService(object):\n\tdef __init__(self):\n\t\tself.name = \"Basename\"\n\t\tself.interval = 300\n\t\tself.plugin_name = \"plugin_name\"\n\t\tself.triggers = {}\n"
  },
  {
    "path": "monitor_server/conf/services/linux.py",
    "content": "from generic import BaseService\n\nclass CPU(BaseService):\n\tdef __init__(self):\n\t\tsuper(CPU,self).__init__()\n\t\tself.interval = 30\n\t\tself.name = \"linux_cpu\"\n\t\tself.plugin_name = \"get_cpu_status\"\n\t\tself.triggers = {\n\t\t\t'idle':{\n\t\t\t\t'func':'avg',\n\t\t\t\t'last':10*60,\n\t\t\t\t'count':1,\n\t\t\t\t'operator':'lt',\n\t\t\t\t'warning':40,\n\t\t\t\t'critical':30,\n\t\t\t\t\"data_type\":float\n\t\t\t},\n\t\t\t'iowait':{\n\t\t\t\t'func':'hit',\n\t\t\t\t'last':10*60,\n\t\t\t\t'count':5,\n\t\t\t\t'operator':'gt',\n\t\t\t\t'warning':30,\n\t\t\t\t'critical':40,\n\t\t\t\t'data_type':float\n\t\t\t}\n\t\t}\n\nclass LOAD(BaseService):\n        def __init__(self):\n                super(LOAD,self).__init__()\n                self.interval = 30\n                self.name = \"linux_load\"\n                self.plugin_name = \"get_load_status\"\n                self.triggers = {\n                        'load1':{\n                                'func':'hit',\n                                'last':10*60,\n                                'count':1,\n                                'operator':'gt',\n                                'warning':5,\n                                'critical':10,\n                                \"data_type\":float\n                        },\n                        'load5':{\n                                'func':'hit',\n                                'last':10*60,\n                                'count':1,\n                                'operator':'gt',\n                                'warning':1,\n                                'critical':10,\n                                'data_type':float\n                        },\n\t\t\t'load15':{\n                                'func':'hit',\n                                'last':10*60,\n                                'count':1,\n                                'operator':'gt',\n                                'warning':5,\n                                'critical':10,\n                                'data_type':float\n                        } \n\t\t\t\n                }\n\n\nclass MEMORY(BaseService):\n\tdef __init__(self):\n        \tsuper(MEMORY,self).__init__()\n        \tself.interval = 20\n        \tself.name = \"linux_memory\"\n        \tself.plugin_name = \"get_memory_status\"\n        \tself.triggers = {\n        \t\t'usage':{\n        \t\t\t'func':'avg',\n        \t\t\t'last':5*60,\n\t\t\t\t'count':1,\n        \t\t\t'operator':'gt',\n        \t\t\t'warning':80,\n        \t\t\t'critical':90,\n\t\t\t\t'data_type':float\n        \t\t}\n        \t}\n\n\nclass NETWORK(BaseService):\n\tdef __init__(self):\n        \tsuper(NETWORK,self).__init__()\n        \tself.interval = 60 \n        \tself.name = \"linux_network\"\n        \tself.plugin_name = \"get_network_status\"\n        \tself.triggers = {\n        \t\t'in':{\n        \t\t\t'func':'hit',\n        \t\t\t'last':10*60,\n\t\t\t\t'count':5,\n        \t\t\t'operator':'gt',\n        \t\t\t'warning':1024*1024*10,\n        \t\t\t'critical':1024*1024*15,\n\t\t\t\t'data_type':float\t\n        \t\t},\n        \t\t'out':{\n        \t\t\t'func':'hit',\n        \t\t\t'last':10*60,\n\t\t\t\t'count':5,\n        \t\t\t'operator':'gt',\n        \t\t\t'warning':1024*1024*10,\n        \t\t\t'critical':1024*1024*15,\n\t\t\t\t'data_type':float\n        \t\t}\n        \t}\n"
  },
  {
    "path": "monitor_server/conf/templates.py",
    "content": "\nfrom services import linux\n\n\nclass BaseTemplate(object):\n\tdef __init__(self):\n\t\tself.name = 'name'\n\t\tself.hosts = []\n\t\tself.services = []\n\n\nclass linuxgenerictemplate(BaseTemplate):\n\tdef __init__(self):\n\t\tsuper(linuxgenerictemplate,self).__init__()\n\t\tself.name = \"linuxcommonservices\"\n\t\tself.services = [\n\t\t\tlinux.CPU(),\n\t\t\tlinux.LOAD(),\n\t\t]\n\n\t\tself.services[0].interval = 60\n\t\nclass linux2(BaseTemplate):\n\tdef __init__(self):\n\t\tsuper(linux2,self).__init__()\n\t\tself.name = \"linux2\"\n\t\tself.services = [\n\t\t\tlinux.CPU(),\n\t\t\tlinux.NETWORK()\n\t\t]\n"
  },
  {
    "path": "monitor_server/runserver.tac",
    "content": "#!/usr/bin/env python\n#coding:utf-8\n\nfrom twisted.internet.protocol import Protocol\nfrom twisted.internet.protocol import Factory\nfrom twisted.internet import reactor\nfrom twisted.application import service,internet\nimport time\nimport operator\nimport json\nfrom conf.hosts import * \n\nclass Echo(Protocol):\n        '''协议类实现用户的服务协议，例如 http,ftp,ssh 等'''\n        def __init__(self, factory):\n                self.factory = factory\n\n        def connectionMade(self):\n                '''连接建立时被回调的方法'''\n\t\tclient = self.transport.getPeer().host\n\t\thosts_template = send_config() \n\t\tprint \"%s 已连接\"%client\n\t\tif client not in hosts_template:\n\t\t\tprint  \"%s 没有加入主机监控组\"%client\n\t\t\tself.transport.loseConnection()\n\t\telse:\n\t\t\thost_template = hosts_template[client]\n\t\t\tself.transport.write(json.dumps(host_template))\n\t\t\n\n        def connectionLost(self, reason):\n                '''连接关闭时被回调的方法'''\n\t\tclient = self.transport.getPeer().host\n\t\tprint \"%s 已断开\"%client\n                #self.factory.numProtocols = self.factory.numProtocols - 1\n\n        def dataReceived(self, data):\n                '''接收数据的函数，当有数据到达时被回调'''\n\t\tclient = self.transport.getPeer().host\n\t\tclient_info = json.loads(data)\n\t\tclient_service = client_info[\"service\"]\n\t\tclient_data = client_info[\"data\"]\n\t\tclient_timestrf = client_info[\"timestrf\"]\n\t\tclient_interval = client_info[\"interval\"]\n\t\tclient_config_all = all_config(client)\n\t\t\n\t\tclient_config_info = client_config_all[client_service]\n\t\tclient_config = client_config_info[0]\n\t\tif time.time() - client_timestrf < client_interval:\n\t\t\tif client_info[\"data\"][\"status\"] == 0:\n\t\t\t\tprint \"%s service %s data valid\"%(client,client_service)\n\t\t\t\tself.service_item_handle(client_config,client_data,client_service,client)\n\t\t\telse:\n\t\t\t\tprint \"%s service %s plugin error\"%(client,client_service)\n\t\telse:\n\t\t\texpired_time = time.time() - client_timestrf - client_interval\n\t\t\tprint \"%s service %s data expired\"%(client,client_service)\n\tdef service_item_handle(self,client_config,client_data,client_service,client):\n\t\tfor k,v in client_config.items():\n\t\t\tprint k,client_data[k]\n\t\t\toper = v[\"operator\"]\n\t\t\twarning_val = v[\"warning\"]\n\t\t\tcritical_val = v[\"critical\"]\n\t\t\toper_func = getattr(operator,oper)\n\n\t\t\tif v[\"data_type\"] is float:\n\t\t\t\titem_data = float(client_data[k])\n\t\t\t\twarning_res = oper_func(item_data,warning_val)\n\t\t\t\tcritical_res = oper_func(item_data,critical_val)\t\n\t\t\t\tprint \"warning:%s critical:%s\"%(warning_val,critical_val)\n\t\t\t\tprint \"warning:%s critical:%s\"%(warning_res,critical_res)\n\nclass EchoFactory(Factory):\n        '''协议工厂类，当客户端建立连接的时候，创建协议对象，协议对象与客户端连接一一对应'''\n        def buildProtocol(self, addr):\n                return Echo(self)\n\nif __name__ == '__builtin__':\n        # 创建监听端口\n\t\n\tapplication = service.Application(\"echo\")\n\techoService = internet.TCPServer(8007,EchoFactory())\n\techoService.setServiceParent(application) \n"
  }
]