Repository: Card007/Proxy-Pool Branch: master Commit: 1bd36c717315 Files: 5 Total size: 5.5 KB Directory structure: gitextract_hz3rej80/ ├── README.md ├── check.js ├── main.js ├── package.json └── proxy_pool.js ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # ProxyPool 爬取代理IP并进行测速,筛选出高速可用的ip。 **0.安装方式** 有两种安装方式,一种直接通过npm安装,然后直接跳到第四步: ```javascript npm install ip-proxy-pool ``` 或者通过git下载: **1.在目录下运行,安装依赖包** ```javascript npm install ``` **2.爬取代理ip并进行测速检查** ```javascript node main.js ``` **3.只检查数据库里现有的ip** ```javascript node check.js ``` **4.如何在项目里使用代理池** ```javascript //导入本地模块 const proxy = require('./proxy_pool.js') //如果通过npm安装 //var proxy = require('ip-proxy-pool') //主程序,爬取ip+检查ip const proxys = proxy.run //不爬取,只检查数据库里现有的ip const check = proxy.check //提取所有ip const ips = proxy.ips //ips接收一个处理函数,然后向这个函数传递两个参数,一个为错误信息,另一个为数据库里的所有ip ips((err,response)=>{ console.log(response) }) ``` 后续更新: 1.在爬取代理IP时会先从ip池里寻找可用的ip进行爬取,没有的话才用本身的ip。 2.加入更多代理ip源 ![imag](https://github.com/Card007/Proxy-Pool/blob/master/other/ip_proxy.png) ================================================ FILE: check.js ================================================ const proxy = require('./proxy_pool.js') function main() { proxy.check() } main() ================================================ FILE: main.js ================================================ const proxy = require('./proxy_pool.js') function main() { proxy.run() } main() ================================================ FILE: package.json ================================================ { "name": "ip-proxy-pool", "version": "1.2.1", "description": "爬取代理IP并进行测速,筛选出高速可用的ip", "main": "proxy_pool.js", "dependencies": { "cheerio": "^1.0.0-rc.2", "request": "^2.88.0", "sqlite3": "^4.0.6" }, "devDependencies": {}, "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, "repository": { "type": "git", "url": "git+https://github.com/Card007/ProxyPool.git" }, "keywords": ["proxy","ip","proxy-pool"], "author": "Card007", "license": "ISC", "bugs": { "url": "https://github.com/Card007/ProxyPool/issues" }, "homepage": "https://github.com/Card007/ProxyPool#readme" } ================================================ FILE: proxy_pool.js ================================================ const request = require('request') const cheerio = require('cheerio') const sqlite3 = require('sqlite3') const db = new sqlite3.Database('Proxy.db', (err) => { if(!err){ console.log('打开成功') } else { console.log(err) } }) db.run('CREATE TABLE proxy(ip char(15), port char(15), type char(15))',(err) => {}) const useragent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36' const headers = { 'User-Agent': useragent, } //添加数据文件 const insertDb = function(ip, port, type){ db.run("INSERT INTO proxy VALUES(?, ?, ?)",[ip,port,type]) } //提取优化文件数据 const clearN = function(l){ let index = 0 for (let i = 0; i < l.length; i++) { if(l[i] === '' || l[i] === '\n'){ }else{ let ips = l[i].replace('\n','') if (index === 0){ var ip = ips console.log('爬取ip:' + ip) } else if(index === 1){ var port = ips } else if(index === 4){ var type = ips } index += 1 } } insertDb(ip, port, type) } //分析网页内容 const loadHtml = function(data){ let l = [] let e = cheerio.load(data) e('tr').each(function(i, elem){ l[i] = e(this).text() }) for (let i = 1; i < l.length; i ++){ clearN(l[i].split(' ')) } } //链接网络 const requestProxy = function(options){ return new Promise((resolve, reject) => { request(options, function(err, response, body){ if(err === null && response.statusCode === 200){ loadHtml(body) resolve() } else { console.log('链接失败') resolve() } }) }) } //生成网址 const ipUrl = function(resolve){ const url = 'http://www.xicidaili.com/nn/' let options = { url:'http://www.xicidaili.com/nn/', headers, } let arr = [] return new Promise((resolve, reject) => { for (let i = 1; i <= 5; i++) { options.url = url + i arr.push(requestProxy(options)) } Promise.all(arr).then(function(){ resolve() }) }) } //从数据库提取所有ip const allIp = function(callback){ return db.all('select * from proxy', callback) } //代理ip对象 const Proxys = function(ip,port,type){ this.ip = ip this.port = port this.type = type } //提取所有ip,通过check函数检查 const runIp = async function(){ let arr = [] allIp((err,response) => { for (let i = 0; i < response.length; i++) { let ip = response[i] let proxy = new Proxys(ip.ip, ip.port, ip.type) arr.push(check(proxy, headers)) } Promise.all(arr).then(function(){ allIp((err, response)=>{ console.log('\n\n可用ip为:') console.log(response) }) }) }) } //检测ip const check = function(proxy, headers){ return new Promise((resolve, reject) => { request({ url:'http://apps.bdimg.com/libs/jquery/2.1.4/jquery.min.js', proxy: `${proxy.type.toLowerCase()}://${proxy.ip}:${proxy.port}`, method:'GET', timeout: 2000, headers,} ,function(err, response,body){ if(!err && response.statusCode == 200){ console.log(proxy.ip+' 链接成功:') resolve() } else { console.log(proxy.ip+' 链接失败') removeIp(proxy.ip) resolve() } } ) }) } //删除命令 const removeIp = function(ip){ db.run(`DELETE FROM proxy WHERE ip = '${ ip }'`, function(err){ if(err){ console.log(err) }else { console.log('成功删除:'+ip) } }) } exports.run = async function(){ await ipUrl() await runIp() } exports.check = function(){ runIp() } exports.ips = function(callback){ allIp(callback) }