[
  {
    "path": "README.md",
    "content": "# ProxyPool\n爬取代理IP并进行测速，筛选出高速可用的ip。\n\n**0.安装方式**\n\n有两种安装方式，一种直接通过npm安装，然后直接跳到第四步：\n```javascript\nnpm install ip-proxy-pool\n```\n或者通过git下载：\n\n**1.在目录下运行，安装依赖包**\n```javascript\nnpm install\n```\n\n**2.爬取代理ip并进行测速检查**\n```javascript\nnode main.js\n```\n\n**3.只检查数据库里现有的ip**\n```javascript\nnode check.js\n```\n\n**4.如何在项目里使用代理池**\n```javascript\n//导入本地模块\nconst proxy = require('./proxy_pool.js')\n//如果通过npm安装\n//var proxy = require('ip-proxy-pool')\n\n//主程序，爬取ip+检查ip\nconst proxys = proxy.run\n\n//不爬取，只检查数据库里现有的ip\nconst check = proxy.check\n\n//提取所有ip\nconst ips = proxy.ips\n//ips接收一个处理函数，然后向这个函数传递两个参数，一个为错误信息，另一个为数据库里的所有ip\nips((err,response)=>{\n    console.log(response)\n})\n```\n\n\n后续更新：\n1.在爬取代理IP时会先从ip池里寻找可用的ip进行爬取，没有的话才用本身的ip。\n2.加入更多代理ip源\n\n\n![imag](https://github.com/Card007/Proxy-Pool/blob/master/other/ip_proxy.png)\n"
  },
  {
    "path": "check.js",
    "content": "const proxy = require('./proxy_pool.js')\n\nfunction main() {\n  proxy.check()\n}\n\nmain()"
  },
  {
    "path": "main.js",
    "content": "const proxy = require('./proxy_pool.js')\n\nfunction main() {\n  proxy.run()\n}\n\nmain()"
  },
  {
    "path": "package.json",
    "content": "{\n  \"name\": \"ip-proxy-pool\",\n  \"version\": \"1.2.1\",\n  \"description\": \"爬取代理IP并进行测速，筛选出高速可用的ip\",\n  \"main\": \"proxy_pool.js\",\n  \"dependencies\": {\n    \"cheerio\": \"^1.0.0-rc.2\",\n    \"request\": \"^2.88.0\",\n    \"sqlite3\": \"^4.0.6\"\n  },\n  \"devDependencies\": {},\n  \"scripts\": {\n    \"test\": \"echo \\\"Error: no test specified\\\" && exit 1\"\n  },\n  \"repository\": {\n    \"type\": \"git\",\n    \"url\": \"git+https://github.com/Card007/ProxyPool.git\"\n  },\n  \"keywords\": [\"proxy\",\"ip\",\"proxy-pool\"],\n  \"author\": \"Card007\",\n  \"license\": \"ISC\",\n  \"bugs\": {\n    \"url\": \"https://github.com/Card007/ProxyPool/issues\"\n  },\n  \"homepage\": \"https://github.com/Card007/ProxyPool#readme\"\n}\n"
  },
  {
    "path": "proxy_pool.js",
    "content": "const request = require('request')\nconst cheerio = require('cheerio')\nconst sqlite3 = require('sqlite3')\n\nconst db = new sqlite3.Database('Proxy.db', (err) => {\n    if(!err){\n        console.log('打开成功')\n    } else {\n        console.log(err)\n    }\n})\n\ndb.run('CREATE TABLE proxy(ip char(15), port char(15), type char(15))',(err) => {})\n\nconst useragent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'\n\nconst headers = {\n    'User-Agent': useragent,\n}\n\n//添加数据文件\nconst insertDb = function(ip, port, type){\n    db.run(\"INSERT INTO proxy VALUES(?, ?, ?)\",[ip,port,type])\n}\n\n//提取优化文件数据\nconst clearN = function(l){\n    let index = 0\n    for (let i = 0; i < l.length; i++) {\n        if(l[i] === '' || l[i] === '\\n'){\n        }else{\n            let ips = l[i].replace('\\n','')\n            if (index === 0){\n                var ip = ips\n                console.log('爬取ip:' + ip)\n            } else if(index === 1){\n                var port = ips\n            } else if(index === 4){\n                var type = ips\n            }\n            index += 1\n        }\n    }\n    insertDb(ip, port, type)\n}\n\n//分析网页内容\nconst loadHtml = function(data){\n    let l = []\n    let e = cheerio.load(data)\n    e('tr').each(function(i, elem){\n        l[i] = e(this).text()\n    })\n    for (let i = 1; i < l.length; i ++){\n        clearN(l[i].split(' '))\n    }\n}\n\n//链接网络\nconst requestProxy = function(options){\n    return new Promise((resolve, reject) => {\n        request(options, function(err, response, body){\n            if(err === null && response.statusCode === 200){\n                loadHtml(body)\n                resolve()\n            } else {\n                console.log('链接失败')\n                resolve()\n            }\n        })\n    })\n}\n\n//生成网址\nconst ipUrl = function(resolve){\n    const url = 'http://www.xicidaili.com/nn/'\n\n    let options = {\n        url:'http://www.xicidaili.com/nn/',\n        headers,\n    }\n    let arr = []\n   \n    return new Promise((resolve, reject) => {\n        for (let i = 1; i <= 5; i++) {\n            options.url = url + i\n            arr.push(requestProxy(options))\n        }\n        Promise.all(arr).then(function(){\n            resolve()\n        })\n    })\n}\n\n//从数据库提取所有ip\nconst allIp = function(callback){\n    return db.all('select * from proxy', callback)\n}\n\n//代理ip对象\nconst Proxys = function(ip,port,type){\n    this.ip = ip\n    this.port = port\n    this.type = type\n}\n\n//提取所有ip，通过check函数检查\nconst runIp = async function(){\n    let arr = []\n\n    allIp((err,response) => {\n        for (let i = 0; i < response.length; i++) {\n            let ip = response[i]\n            let proxy = new Proxys(ip.ip, ip.port, ip.type)\n            arr.push(check(proxy, headers))\n        }\n        Promise.all(arr).then(function(){\n            allIp((err, response)=>{\n                console.log('\\n\\n可用ip为:')\n                console.log(response)\n            })\n        })\n    })\n}\n\n//检测ip\nconst check = function(proxy, headers){\n    return new Promise((resolve, reject) => {\n        request({\n            url:'http://apps.bdimg.com/libs/jquery/2.1.4/jquery.min.js',\n            proxy: `${proxy.type.toLowerCase()}://${proxy.ip}:${proxy.port}`,\n            method:'GET',\n            timeout: 2000,\n            headers,}\n            ,function(err, response,body){\n                if(!err && response.statusCode == 200){\n                    console.log(proxy.ip+' 链接成功：')\n                    resolve()\n                } else {\n                    console.log(proxy.ip+' 链接失败')\n                    removeIp(proxy.ip)\n                    resolve()\n                }\n            }\n        )\n    })\n}\n\n//删除命令\nconst removeIp = function(ip){\n    db.run(`DELETE FROM proxy WHERE ip = '${ ip }'`, function(err){\n        if(err){\n            console.log(err)\n        }else {\n            console.log('成功删除：'+ip)\n        }\n    })\n}\n\nexports.run = async function(){\n    await ipUrl()\n    await runIp()\n}\n\nexports.check = function(){\n    runIp()\n}\n\nexports.ips = function(callback){\n    allIp(callback)\n}"
  }
]