[
  {
    "path": ".gitignore",
    "content": "/no_use\n*.xlsx"
  },
  {
    "path": "CpuToInfluxdb.py",
    "content": "import psutil\nimport os\nfrom influxdb import InfluxDBClient\nimport time,math,random\n\n\n#获取当前运行的pid\np1=psutil.Process(os.getpid()) \n\n\nfrom influxdb import InfluxDBClient\nimport time,math,random\nwhile True:\n    a = psutil.virtual_memory().percent  #内存占用率\n\n    b = psutil.cpu_percent(interval=1.0) #cpu占用率\n\n    json_body = [\n        {\n            \"measurement\": \"cpu_load_short\",\n            \"tags\": {\n                \"host\": \"server01\",\n                \"region\": \"us-west\"\n            },\n            #\"time\": \"2009-11-10T23:00:00Z\",\n            \"fields\": {\n                \"cpu\": b,\n                \"mem\": a\n            }\n        }\n    ]\n    client = InfluxDBClient('localhost', 8086, 'root', 'root', 'xxyyxx')\n    client.create_database('xxyyxx',if_not_exists=False)\n    client.write_points(json_body)\n    #result = client.query('select value from cpu_load_short;')\n    #print(\"Result: {0}\".format(result))\n    time.sleep(2)"
  },
  {
    "path": "ModifyFilename.py",
    "content": "import os\ndir = os.getcwd()\nsubdir = os.listdir(dir)\nfor i in subdir:\n    path = os.path.join(dir, i)\n    if os.path.isdir(path):\n        end_dir = os.listdir(path)\n        for i in range(len(end_dir)):\n            newname = end_dir[i][0:50]\n            os.rename(os.path.join(path, end_dir[\n                      i]), os.path.join(path, newname))\n"
  },
  {
    "path": "Python 黑魔法/Python 远程开机.py",
    "content": "def wake_up(request, mac='DC-4A-3E-78-3E-0A'):\n    MAC = mac\n    BROADCAST = \"192.168.0.255\"\n    if len(MAC) != 17:\n        raise ValueError(\"MAC address should be set as form 'XX-XX-XX-XX-XX-XX'\")\n    mac_address = MAC.replace(\"-\", '')\n    data = ''.join(['FFFFFFFFFFFF', mac_address * 20])  # 构造原始数据格式\n    send_data = b''\n\n    # 把原始数据转换为16进制字节数组，\n    for i in range(0, len(data), 2):\n        send_data = b''.join([send_data, struct.pack('B', int(data[i: i + 2], 16))])\n    print(send_data)\n\n    # 通过socket广播出去，为避免失败，间隔广播三次\n    try:\n        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)\n        sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)\n        sock.sendto(send_data, (BROADCAST, 7))\n        time.sleep(1)\n        sock.sendto(send_data, (BROADCAST, 7))\n        time.sleep(1)\n        sock.sendto(send_data, (BROADCAST, 7))\n        return HttpResponse()\n        print(\"Done\")\n    except Exception as e:\n        return HttpResponse()\n        print(e)"
  },
  {
    "path": "Python 黑魔法/README.MD",
    "content": "# 代码详细说明请看文章\n\n[Python 远程关机](https://mp.weixin.qq.com/s/RSod4XWxyzL32eNcrXLjUQ)\n\n"
  },
  {
    "path": "README.md",
    "content": "\n# 欢迎关注我的微信公众号【智能制造社区】\n\n## 左手代码，右手制造，分享智能制造相关技术和业务，包括 Python, C#, 数据库，工业大数据、物联网技术及MES/ERP/SAP等系统。\n\n## 可以通过微信公众号加我好友\n\n![二维码](qrcode.jpg)\n\n# 内容列表\n\n## [Python微信公众号开发](https://github.com/injetlee/Python/tree/master/wechat)\n\n- ### Python 微信公众号开发—小白篇(一)\n\n- ### Python 公众号开发—颜值检测\n\n## [Python 爬虫入门合集](https://github.com/injetlee/Python/tree/master/%E7%88%AC%E8%99%AB%E9%9B%86%E5%90%88)\n\n- ### Python 爬虫入门(一)——爬取糗事百科\n\n- ### Python 爬虫入门(二)——爬取妹子图\n\n- ### Python 爬虫——Python 岗位分析报告\n\n- ### Python 爬虫利器——Selenium介绍\n\n- ### Python 爬虫—— 抖音 App 视频抓包爬取\n\n## [Python 黑魔法](https://github.com/injetlee/Python/tree/master/Python%20%E9%BB%91%E9%AD%94%E6%B3%95)\n\n- ### Python 远程关机\n\n## SQL 数据库\n\n- [1 小时 SQL 极速入门（一）](https://mp.weixin.qq.com/s/Lx4B349OlD49ihJPnB6YiA)\n- [1 小时 SQL 极速入门（二）](https://mp.weixin.qq.com/s/D-CEtGYomne5kV_Ji4lodA)\n- [1 小时 SQL 极速入门（三）](https://mp.weixin.qq.com/s/7aJqrhCNcvnt2gO3p5P50Q)\n- [SQL 高级查询——（层次化查询，递归）](https://mp.weixin.qq.com/s/R9Yldd-5AK4ObRA9Lfbz-Q)\n- [GROUP BY高级查询,ROLLUP，CUBE，GROUPPING详解](https://mp.weixin.qq.com/s/_OK6dtHGhp7ukC2pe1ginQ)\n- [SQL 行转列，列转行](https://mp.weixin.qq.com/s/xOFIg42FQhNpyg94ajhtqQ)\n\n## 其他\n\n- 1.[获取当前CPU状态，存储到Influxdb](https://github.com/injetlee/demo/blob/master/CpuToInfluxdb.py)\n\n- 2.[模拟登录知乎](https://github.com/injetlee/demo/blob/master/login_zhihu.py)\n\n- 3.[对目录下所有文件计数](https://github.com/injetlee/demo/blob/master/countFile.py)\n\n- 4.[爬取豆瓣电影top250](https://github.com/injetlee/demo/blob/master/douban_movie.py)\n\n- 5.[Excel文件读入数据库](https://github.com/injetlee/demo/blob/master/excelToDatabase.py)\n\n- 6.[爬取拉勾网职位信息](https://github.com/injetlee/demo/blob/master/lagouSpider.py)\n\n- 7.[批量修改文件名](https://github.com/injetlee/demo/blob/master/ModifyFilename.py)\n\n- 8.[读写excel](https://github.com/injetlee/demo/blob/master/readExcel.py)\n\n- 9.[下载必应首页图片,只下载当天的，一张。](https://github.com/injetlee/Python/blob/master/biyingSpider.py)\n"
  },
  {
    "path": "biyingSpider.py",
    "content": "import requests\nimport re\nimport time\nlocal = time.strftime(\"%Y.%m.%d\")\nurl = 'http://cn.bing.com/'\ncon = requests.get(url)\ncontent = con.text\nreg = r\"(az/hprichbg/rb/.*?.jpg)\"\na = re.findall(reg, content, re.S)[0]\nprint(a)\npicUrl = url + a\nread = requests.get(picUrl)\nf = open('%s.jpg' % local, 'wb')\nf.write(read.content)\nf.close()\n"
  },
  {
    "path": "countFile.py",
    "content": "import os\nresult = []\ndef get_all(cwd):\n    get_dir = os.listdir(cwd)  #遍历当前目录，获取文件列表\n    for i in get_dir:          \n        sub_dir = os.path.join(cwd,i)  # 把第一步获取的文件加入路径\n        if os.path.isdir(sub_dir):     #如果当前仍然是文件夹，递归调用\n            get_all(sub_dir)\n        else:\n            ax = os.path.basename(sub_dir)  #如果当前路径不是文件夹，则把文件名放入列表\n            result.append(ax)\n            print(len(result))   #对列表计数\n            \nif __name__ == \"__main__\": \n    cur_path = os.getcwd()   #当前目录\n    get_all(cur_path)"
  },
  {
    "path": "countPm.py",
    "content": "# -*- coding:utf-8 -*-\ndef count_pm(*args):\n    alist = list([round(i*2-8,2) for i in args])  #计算三种颗粒浓度\n    result = []\n    for pm in alist:\n    \tpm_abs = abs(pm)\n    \tresult.append(generate_iso_code(pm_abs))\n    print (result)\n    return result\n    \t\ndef generate_iso_code(x):\n\tpm_value = [0.01,0.02,0.04,0.08,0.16,0.32,0.64,1.3,2.5,5,10,20,40,80]  #颗粒浓度\n\tiso = list(range(1,25))   #iso级别，共24级\n\tfor i in range(len(pm_value)):           #for循环得到某个浓度范围的iso4006级别\n\t\tif pm_value[i] < x <= pm_value[i+1]:\n\t\t\tiso_code = iso[i]\n\t\t\tbreak\n\treturn iso_code\n\t\t\t\nif __name__ == '__main__':\n    count_pm(7.95,5.85,3.98)\t\t\n    count_pm(7.918,5.949,5.456)\t\n    count_pm(6.916,3.956,3.956)\t\t\n"
  },
  {
    "path": "douban_book.py",
    "content": "from bs4 import BeautifulSoup\nimport requests\nfrom openpyxl import Workbook\nexcel_name = \"书籍.xlsx\"\nwb = Workbook()\nws1 = wb.active\nws1.title='书籍'\n\n\ndef get_html(url):\n    header = {\n        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'}\n    html = requests.get(url, headers=header).content\n    return html\n\n\ndef get_con(html):\n    soup = BeautifulSoup(html,'html.parser')\n    book_list = soup.find('div', attrs={'class': 'article'})\n    page = soup.find('div', attrs={'class': 'paginator'})\n    next_page = page.find('span', attrs={'class': 'next'}).find('a')\n    name = []\n    for i in book_list.find_all('table'):\n        book_name = i.find('div', attrs={'class': 'pl2'})\n        m = list(book_name.find('a').stripped_strings)\n        if len(m)>1:\n            x = m[0]+m[1]\n        else:\n            x = m[0]\n        #print(x)\n        name.append(x)\n    if next_page:\n        return name, next_page.get('href')\n    else:\n        return name, None\n\n\ndef main():\n    url = 'https://book.douban.com/top250'\n    name_list=[]\n    while url:\n        html = get_html(url)\n        name, url = get_con(html)\n        name_list = name_list + name\n    for i in name_list:\n        location = 'A%s'%(name_list.index(i)+1)\n        print(i)\n        print(location)\n        ws1[location]=i\n    wb.save(filename=excel_name)\n\n\nif __name__ == '__main__':\n    main()\n\n"
  },
  {
    "path": "douban_movie.py",
    "content": "#!/usr/bin/env python\n# encoding=utf-8\nimport requests\nimport re\nimport codecs\nfrom bs4 import BeautifulSoup\nfrom openpyxl import Workbook\nwb = Workbook()\ndest_filename = '电影.xlsx'\nws1 = wb.active\nws1.title = \"电影top250\"\n\nDOWNLOAD_URL = 'http://movie.douban.com/top250/'\n\n\ndef download_page(url):\n    \"\"\"获取url地址页面内容\"\"\"\n    headers = {\n        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36'\n    }\n    data = requests.get(url, headers=headers).content\n    return data\n\n\ndef get_li(doc):\n    soup = BeautifulSoup(doc, 'html.parser')\n    ol = soup.find('ol', class_='grid_view')\n    name = []  # 名字\n    star_con = []  # 评价人数\n    score = []  # 评分\n    info_list = []  # 短评\n    for i in ol.find_all('li'):\n        detail = i.find('div', attrs={'class': 'hd'})\n        movie_name = detail.find(\n            'span', attrs={'class': 'title'}).get_text()  # 电影名字\n        level_star = i.find(\n            'span', attrs={'class': 'rating_num'}).get_text()  # 评分\n        star = i.find('div', attrs={'class': 'star'})\n        star_num = star.find(text=re.compile('评价'))  # 评价\n\n        info = i.find('span', attrs={'class': 'inq'})  # 短评\n        if info:  # 判断是否有短评\n            info_list.append(info.get_text())\n        else:\n            info_list.append('无')\n        score.append(level_star)\n\n        name.append(movie_name)\n        star_con.append(star_num)\n    page = soup.find('span', attrs={'class': 'next'}).find('a')  # 获取下一页\n    if page:\n        return name, star_con, score, info_list, DOWNLOAD_URL + page['href']\n    return name, star_con, score, info_list, None\n\n\ndef main():\n    url = DOWNLOAD_URL\n    name = []\n    star_con = []\n    score = []\n    info = []\n    while url:\n        doc = download_page(url)\n        movie, star, level_num, info_list, url = get_li(doc)\n        name = name + movie\n        star_con = star_con + star\n        score = score + level_num\n        info = info + info_list\n    for (i, m, o, p) in zip(name, star_con, score, info):\n        col_A = 'A%s' % (name.index(i) + 1)\n        col_B = 'B%s' % (name.index(i) + 1)\n        col_C = 'C%s' % (name.index(i) + 1)\n        col_D = 'D%s' % (name.index(i) + 1)\n        ws1[col_A] = i\n        ws1[col_B] = m\n        ws1[col_C] = o\n        ws1[col_D] = p\n    wb.save(filename=dest_filename)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "excelToDatabase.py",
    "content": "from openpyxl import load_workbook\nimport pymysql\nconfig = {\n\t'host': '127.0.0.1',\n\t'port':3306,\n\t'user': 'root',\n\t'password': 'root',\n\t'charset': 'utf8mb4',\n\t#'cursorclass': pymysql.cursors.DictCursor\n\n}\nconn = pymysql.connect(**config)\nconn.autocommit(1)\ncursor = conn.cursor()\nname = 'lyexcel'\ncursor.execute('create database if not exists %s' %name)\nconn.select_db(name)\ntable_name = 'info'\ncursor.execute('create table if not exists %s(id MEDIUMINT NOT NULL AUTO_INCREMENT,name varchar(30),tel varchar(30),primary key (id))'%table_name)\n\nwb2 = load_workbook('hpu.xlsx')\nws=wb2.get_sheet_names()\nfor row in wb2:\n\tprint(\"1\")\n\tfor cell in row:\n\t\tvalue1=(cell[0].value,cell[4].value)\n\t\tcursor.execute('insert into info (name,tel) values(%s,%s)',value1)\n\nprint(\"overing...\")\n# for row in A:\n# \tprint(row)\n#print (wb2.get_sheet_names())\n"
  },
  {
    "path": "image_recognition_zhihu.py",
    "content": "# -*- coding:UTF-8 -*-\n\nimport  requests , time ,random\nimport  hmac ,json ,base64\nfrom bs4 import BeautifulSoup\nfrom hashlib import sha1\nimport TencentYoutuyun\nfrom PIL import Image\nimport uuid\n\n\n    \ndef recognition_captcha(data):\n    ''' 识别验证码 '''\n\n    file_id = str(uuid.uuid1())\n    filename = 'captcha_'+ file_id +'.gif'\n    filename_png =  'captcha_'+ file_id +'.png'\n\n    if(data is None):\n        return \n    data = base64.b64decode(data.encode('utf-8'))\n    with open( filename ,'wb') as fb:\n        fb.write( data )    \n    \n    appid = 'appid' # 接入优图服务，注册账号获取 \n    secret_id = 'secret_id'  \n    secret_key = 'secret_key'  \n    userid= 'userid' \n    end_point = TencentYoutuyun.conf.API_YOUTU_END_POINT   \n\n    youtu = TencentYoutuyun.YouTu(appid, secret_id, secret_key, userid, end_point) # 初始化\n\n    # 拿到的是gif格式，而优图只支持 JPG PNG BMP 其中之一，这时我们需要 pip install Pillow 来转换格式\n    im = Image.open( filename)\n    im.save( filename_png ,\"png\")\n    im.close()\n    \n    result = youtu.generalocr( filename_png , data_type = 0 , seq = '')  #  0代表本地路径，1代表url\n\n    return result\n\n\ndef get_captcha(sessiona,headers):\n    ''' 获取验证码 '''\n    \n    need_cap = False\n\n    while( need_cap is not True):\n        try:\n            sessiona.get('https://www.zhihu.com/signin',headers=headers)  # 拿cookie:_xsrf\n            resp2 = sessiona.get('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn',headers=headers)  # 拿cookie:capsion_ticket \n            need_cap = json.loads(resp2.text)[\"show_captcha\"]  # {\"show_captcha\":false} 表示不用验证码\n            time.sleep( 0.5 + random.randint(1,9)/10 )\n        except Exception:\n            continue\n\n    try:\n        resp3 = sessiona.put('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn',headers=headers) # 拿到验证码数据，注意是put\n        img_data = json.loads(resp3.text)[\"img_base64\"]\n    except Exception:\n        return     \n    \n\n    return img_data\n\ndef create_point( point_data, confidence ):\n    ''' 获得点阵 '''\n\n    # 实际操作下，套路不深，x间隔25，y相同，共7个点 ，先模拟意思一下\n    points = {1:[ 20.5,25.1875],2:[ 45.5,25.1875],3:[ 70.5,25.1875],4:[ 95.5,25.1875],5:[120.5,25.1875],6:[145.5,25.1875],7:[170.5,25.1875]}\n    wi = 0\n    input_points = []\n    \n    for word in ( point_data['items'][0]['words'] ):\n        wi = wi+1\n        if( word['confidence'] < confidence ):\n            try:\n                input_points.append(points[wi]) # 倒置的中文，优图识别不出来，置信度会低于0.5\n            except KeyError:\n                continue\n        \n    if( len(input_points) > 2 or len(input_points) == 0 ):\n        return []  # 7个字中只有2个倒置中文的成功率高\n    \n    result = {}\n    result['img_size']=[200,44]\n    result['input_points']=input_points\n    result = json.dumps(result)\n    print(result)\n    return result\n\ndef bolting(k_low,k_hi,k3_confidence):\n    ''' 筛选把握大的进行验证 '''\n\n    start = time.time()\n    \n    is_success = False\n    while(is_success is not True):\n    \n        points_len = 1\n        angle = -20\n        img_ko = []\n\n        while(points_len != 21  or  angle < k_low  or angle > k_hi ):  \n            img_data = get_captcha(sessiona,headers)\n            img_ko = recognition_captcha(img_data)\n     \n            ## json.dumps 序列化时对中文默认使用的ascii编码.想输出真正的中文需要指定ensure_ascii=False\n            # img_ko_json = json.dumps(img_ko , indent =2 ,ensure_ascii=False ) \n            # img_ko_json = img_ko_json.encode('raw_unicode_escape') ## 因为python3的原因，也因为优图自身的原因，此处要特殊处理\n        \n            # with open( \"json.txt\" ,'wb') as fb:\n            #     fb.write( img_ko_json )  \n    \n            try:\n                points_len = len(img_ko['items'][0]['itemstring'])\n                angle = img_ko['angle']\n            except Exception:\n                points_len = 1\n                angle = -20\n                continue\n\n        # print(img_ko_json.decode('utf8')) ## stdout用的是utf8，需转码才能正常显示\n        # print('-'*50)\n        \n        input_text = create_point( img_ko ,k3_confidence )\n        if(type(input_text) == type([])):\n            continue\n        \n        data = {\n            \"input_text\":input_text   \n            }\n\n        # 提交过快会被拒绝，{\"code\":120005,\"name\":\"ERR_VERIFY_CAPTCHA_TOO_QUICK\"} ，假装思考5秒钟\n        time.sleep( 4 + random.randint(1,9)/10 )\n        try:    \n            resp5 = sessiona.post('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn',data,headers=headers)\n        except Exception:\n            continue\n        \n        print(\"angle: \"+ str(angle) )\n        print(BeautifulSoup(resp5.content ,'html.parser')) # 如果验证成功，会回应{\"success\":true}，开心\n        print('-'*50)\n        try:\n            is_success = json.loads(resp5.text)[\"success\"]\n        except KeyError:\n            continue\n\n    end = time.time()\n\n    return end-start\n\n\nif __name__ == \"__main__\":\n    \n    sessiona = requests.Session()\n    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0','authorization':'oauth c3cef7c66a1843f8b3a9e6a1e3160e20'}\n\n    k3_confidence = 0.71\n    \n    '''\n    # 可视化数据会被保存在云端供浏览\n    # https://plot.ly/~weldon2010/4\n    # 纯属学习，并未看出\"角度\"范围扩大对图像识别的影响，大部分时候60s内能搞定，说明优图还是很强悍的，识别速度也非常快\n    '''\n    runtime_list_x = []\n    runtime_list_y = []\n    nn = range(1,11) # 愿意的话搞多线程，1百万次更有意思\n    \n    # 成功尝试100次，形成2维数据以热力图的方式展示\n    for y in nn :\n        for x in  nn :\n            runtime_list_x.append( bolting(-3,3,k3_confidence) )\n            print( \"y: \" + str(runtime_list_y) )\n            print( \"x: \" + str(runtime_list_x) )\n        runtime_list_y.append(runtime_list_x.copy())\n        runtime_list_x = []\n\n    print (\"-\"*30)    \n    print( runtime_list_y )\n    print (\"-\"*30)\n\n    # pip install plotly 数据可视化\n    import plotly\n    import plotly.graph_objs as go\n    plotly.tools.set_credentials_file(username='username', api_key='username') # 设置账号，去官网注册\n    trace = go.Heatmap(z = runtime_list_y , x = [n for n in nn ] ,y =[n for n in nn ])\n    data=[trace]\n    plotly.plotly.plot(data, filename='weldon-time2-heatmap')    \n   \n    # 尝试后发现一个特点，基本都是1~2个倒置中文，这样我们可以借此提速\n    # 角度范围放大，仅当识别出倒置中文为1~2个时才提交验证否则放弃继续寻找\n\n### chcp 65001 (win下改变cmd字符集)\n### python  c:\\python34\\image_recognition_zhihu.py\n\n\n\n\n\n\n"
  },
  {
    "path": "lagouSpider.py",
    "content": "import requests\nfrom openpyxl import Workbook\n\ndef get_json(url, page, lang_name):\n    data = {'first': 'true', 'pn': page, 'kd': lang_name}\n    json = requests.post(url, data).json()\n    list_con = json['content']['positionResult']['result']\n    info_list = []\n    for i in list_con:\n        info = []\n        info.append(i['companyShortName'])\n        info.append(i['companyName'])\n        info.append(i['salary'])\n        info.append(i['city'])\n        info.append(i['education'])\n        info_list.append(info)\n    return info_list\n\n\ndef main():\n    lang_name = input('职位名：')\n    page = 1\n    url = 'http://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'\n    info_result = []\n    while page < 31:\n        info = get_json(url, page, lang_name)\n        info_result = info_result + info\n        page += 1\n    wb = Workbook()\n    ws1 = wb.active\n    ws1.title = lang_name\n    for row in info_result:\n        ws1.append(row)\n    wb.save('职位信息.xlsx')\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "login_zhihu.py",
    "content": "# -*- coding:UTF-8 -*-\n\nimport  requests , time\nimport  hmac ,json\nfrom bs4 import BeautifulSoup\nfrom hashlib import sha1\n\n\ndef get_captcha(data,need_cap):\n    ''' 处理验证码 '''\n    if need_cap is False:\n        return\n    with open('captcha.gif','wb') as fb:\n        fb.write(data)\n    return input('captcha:')\n    \ndef get_signature(grantType,clientId,source,timestamp):\n    ''' 处理签名 '''\n\t\n    hm = hmac.new(b'd1b964811afb40118a12068ff74a12f4',None,sha1)\n    hm.update(str.encode(grantType))\n    hm.update(str.encode(clientId))\n    hm.update(str.encode(source))\n    hm.update(str.encode(timestamp))\n\n    return  str(hm.hexdigest())\n\n\n\ndef login(username,password,oncaptcha,sessiona,headers):\n    ''' 处理登录 '''\n    \n    resp1 = sessiona.get('https://www.zhihu.com/signin',headers=headers)  # 拿cookie:_xsrf\n    resp2 = sessiona.get('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn',headers=headers)  # 拿cookie:capsion_ticket \n    need_cap = json.loads(resp2.text)[\"show_captcha\"]  # {\"show_captcha\":false} 表示不用验证码\n\n    grantType = 'password'\n    clientId = 'c3cef7c66a1843f8b3a9e6a1e3160e20'\n    source ='com.zhihu.web'\n    timestamp = str((time.time()*1000)).split('.')[0]  # 签名只按这个时间戳变化\n       \n    captcha_content = sessiona.get('https://www.zhihu.com/captcha.gif?r=%d&type=login'%(time.time()*1000),headers=headers).content\n    \n    data = {\n        \"client_id\":clientId,\n        \"grant_type\":grantType,\n        \"timestamp\":timestamp,\n        \"source\":source,\n        \"signature\": get_signature(grantType,clientId,source,timestamp), # 获取签名\n        \"username\":username,\n        \"password\":password,\n        \"lang\":\"cn\",\n        \"captcha\":oncaptcha(captcha_content,need_cap), # 获取图片验证码\n        \"ref_source\":\"other_\",\n        \"utm_source\":\"\"\n    }\n    \n    print(\"**2**: \"+str(data))\n    print(\"-\"*50)\n    resp = sessiona.post('https://www.zhihu.com/api/v3/oauth/sign_in',data,headers=headers).content\n    print(BeautifulSoup(resp,'html.parser'))\n    \n    print(\"-\"*50)\n    return resp \n\nif __name__ == \"__main__\":\n    sessiona = requests.Session()\n    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0','authorization':'oauth c3cef7c66a1843f8b3a9e6a1e3160e20'}\n\n    login('12345678@qq.com','12345678',get_captcha,sessiona,headers) # 用户名密码换自己的就好了\n    resp = sessiona.get('https://www.zhihu.com/inbox',headers=headers)  # 登录进去了，可以看私信了\n    print(BeautifulSoup(resp.content ,'html.parser'))\n    \n    \n    \n    \n### chcp 65001 (win下改变cmd字符集)\n### python  c:\\python34\\login_zhihu.py\n### 有非常无语的事情发生，还以为代码没生效\n\n\n\n\n\n"
  },
  {
    "path": "qiubai_crawer.py",
    "content": "import requests\nfrom bs4 import BeautifulSoup\n\n\ndef download_page(url):\n    headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0\"}\n    r = requests.get(url, headers=headers)\n    return r.text\n\n\ndef get_content(html, page):\n    output = \"\"\"第{}页 作者：{} 性别：{} 年龄：{} 点赞：{} 评论：{}\\n{}\\n------------\\n\"\"\"\n    soup = BeautifulSoup(html, 'html.parser')\n    con = soup.find(id='content-left')\n    con_list = con.find_all('div', class_=\"article\")\n    for i in con_list:\n        author = i.find('h2').string  # 获取作者名字\n        content = i.find('div', class_='content').find('span').get_text()  # 获取内容\n        stats = i.find('div', class_='stats')\n        vote = stats.find('span', class_='stats-vote').find('i', class_='number').string\n        comment = stats.find('span', class_='stats-comments').find('i', class_='number').string\n        author_info = i.find('div', class_='articleGender')  # 获取作者 年龄，性别\n        if author_info is not None:  # 非匿名用户\n            class_list = author_info['class']\n            if \"womenIcon\" in class_list:\n                gender = '女'\n            elif \"manIcon\" in class_list:\n                gender = '男'\n            else:\n                gender = ''\n            age = author_info.string   # 获取年龄\n        else:  # 匿名用户\n            gender = ''\n            age = ''\n\n        save_txt(output.format(page, author, gender, age, vote, comment, content))\n\n\ndef save_txt(*args):\n    for i in args:\n        with open('qiubai.txt', 'a', encoding='utf-8') as f:\n            f.write(i)\n\n\ndef main():\n    # 我们点击下面链接，在页面下方可以看到共有13页，可以构造如下 url，\n    # 当然我们最好是用 Beautiful Soup找到页面底部有多少页。\n    for i in range(1, 14):\n        url = 'https://qiushibaike.com/text/page/{}'.format(i)\n        html = download_page(url)\n        get_content(html, i)\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "readExcel.py",
    "content": "from openpyxl import Workbook\nfrom openpyxl.compat import range\nfrom openpyxl.cell import get_column_letter\nwb = Workbook()\ndest_filename = 'empty_book2.xlsx'\nws1 = wb.active  # 第一个表\nws1.title = \"range names\"  # 第一个表命名\n# 遍历第一个表的1到40行，赋值一个600内的随机数\nfor row in range(1, 40):\n    ws1.append(range(60))\nws2 = wb.create_sheet(title=\"Pi\")\nws2['F5'] = 3.14\nws3 = wb.create_sheet(title=\"Data\")\nfor row in range(10, 20):\n    for col in range(27, 54):\n        _ = ws3.cell(column=col, row=row, value=\"%s\" % get_column_letter(col))\nwb.save(filename=dest_filename)\n"
  },
  {
    "path": "wechat/README.MD",
    "content": "# 详细使用请看文章\n\n[Python微信公众号开发—小白篇(一)](https://mp.weixin.qq.com/s/iMPUC0yxI-zuf4AjtyAu6g)\n\n[Python公众号开发—颜值检测](https://mp.weixin.qq.com/s/I0DxhIHkeqhc2LeQ2ICHeA)"
  },
  {
    "path": "wechat/connect.py",
    "content": "# -*-coding:utf-8 -*-\nimport falcon\nfrom falcon import uri\nfrom wechatpy.utils import check_signature\nfrom wechatpy.exceptions import InvalidSignatureException\nfrom wechatpy import parse_message\nfrom wechatpy.replies import TextReply, ImageReply\n\nfrom utils import img_download, img_upload\nfrom face_id import access_api\n\n\nclass Connect(object):\n\n    def on_get(self, req, resp):\n        query_string = req.query_string\n        query_list = query_string.split('&')\n        b = {}\n        for i in query_list:\n            b[i.split('=')[0]] = i.split('=')[1]\n\n        try:\n            check_signature(token='lengxiao', signature=b['signature'], timestamp=b['timestamp'], nonce=b['nonce'])\n            resp.body = (b['echostr'])\n        except InvalidSignatureException:\n            pass\n        resp.status = falcon.HTTP_200\n\n    def on_post(self, req, resp):\n        xml = req.stream.read()\n        msg = parse_message(xml)\n        if msg.type == 'text':\n            reply = TextReply(content=msg.content, message=msg)\n            xml = reply.render()\n            resp.body = (xml)\n            resp.status = falcon.HTTP_200\n        elif msg.type == 'image':\n            name = img_download(msg.image, msg.source)  \n            print(name)\n            r = access_api('images/' + name)\n            if r == 'success':\n                media_id = img_upload('image', 'faces/' + name)\n                reply = ImageReply(media_id=media_id, message=msg)\n            else:\n                reply = TextReply(content='人脸检测失败，请上传1M以下人脸清晰的照片', message=msg)\n            xml = reply.render()\n            resp.body = (xml)\n            resp.status = falcon.HTTP_200\n\napp = falcon.API()\nconnect = Connect()\napp.add_route('/connect', connect)\n"
  },
  {
    "path": "wechat/face_id.py",
    "content": "# -*-coding:utf-8 -*-\nimport time\nimport random\nimport base64\nimport hashlib\nimport requests\nfrom urllib.parse import urlencode\nimport cv2\nimport numpy as np\nfrom PIL import Image, ImageDraw, ImageFont\nimport os\n\n\n# 一.计算接口鉴权，构造请求参数\n\ndef random_str():\n    '''得到随机字符串nonce_str'''\n    str = 'abcdefghijklmnopqrstuvwxyz'\n    r = ''\n    for i in range(15):\n        index = random.randint(0,25)\n        r += str[index]\n    return r\n\n\ndef image(name):\n    with open(name, 'rb') as f:\n        content = f.read()\n    return base64.b64encode(content)\n\n\ndef get_params(img):\n    '''组织接口请求的参数形式，并且计算sign接口鉴权信息，\n    最终返回接口请求所需要的参数字典'''\n    params = {\n        'app_id': '1106860829',\n        'time_stamp': str(int(time.time())),\n        'nonce_str': random_str(),\n        'image': img,\n        'mode': '0'\n\n    }\n\n    sort_dict = sorted(params.items(), key=lambda item: item[0], reverse=False)  # 排序\n    sort_dict.append(('app_key', 'P8Gt8nxi6k8vLKbS'))  # 添加app_key\n    rawtext = urlencode(sort_dict).encode()  # URL编码\n    sha = hashlib.md5()\n    sha.update(rawtext)\n    md5text = sha.hexdigest().upper()  # 计算出sign，接口鉴权\n    params['sign'] = md5text  # 添加到请求参数列表中\n    return params\n\n# 二.请求接口URL\n\n\ndef access_api(img):\n    frame = cv2.imread(img)\n    nparry_encode = cv2.imencode('.jpg', frame)[1]\n    data_encode = np.array(nparry_encode)\n    img_encode = base64.b64encode(data_encode)  # 图片转为base64编码格式\n    url = 'https://api.ai.qq.com/fcgi-bin/face/face_detectface' \n    res = requests.post(url, get_params(img_encode)).json()  # 请求URL,得到json信息\n    # 把信息显示到图片上\n    if res['ret'] == 0:  # 0代表请求成功\n        pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))  # 把opencv格式转换为PIL格式，方便写汉字\n        draw = ImageDraw.Draw(pil_img)\n        for obj in res['data']['face_list']:\n            img_width = res['data']['image_width']  # 图像宽度\n            img_height = res['data']['image_height']  # 图像高度\n            # print(obj)\n            x = obj['x']  # 人脸框左上角x坐标\n            y = obj['y']  # 人脸框左上角y坐标\n            w = obj['width']  # 人脸框宽度\n            h = obj['height']  # 人脸框高度\n            # 根据返回的值，自定义一下显示的文字内容\n            if obj['glass'] == 1:  # 眼镜\n                glass = '有'\n            else:\n                glass = '无'\n            if obj['gender'] >= 70:  # 性别值从0-100表示从女性到男性\n                gender = '男'\n            elif 50 <= obj['gender'] < 70:\n                gender = \"娘\"\n            elif obj['gender'] < 30:\n                gender = '女'\n            else:\n                gender = '女汉子'\n            if 90 < obj['expression'] <= 100:  # 表情从0-100，表示笑的程度\n                expression = '一笑倾城'\n            elif 80 < obj['expression'] <= 90:\n                expression = '心花怒放'\n            elif 70 < obj['expression'] <= 80:\n                expression = '兴高采烈'\n            elif 60 < obj['expression'] <= 70:\n                expression = '眉开眼笑'\n            elif 50 < obj['expression'] <= 60:\n                expression = '喜上眉梢'\n            elif 40 < obj['expression'] <= 50:\n                expression = '喜气洋洋'\n            elif 30 < obj['expression'] <= 40:\n                expression = '笑逐颜开'\n            elif 20 < obj['expression'] <= 30:\n                expression = '似笑非笑'\n            elif 10 < obj['expression'] <= 20:\n                expression = '半嗔半喜'\n            elif 0 <= obj['expression'] <= 10:\n                expression = '黯然伤神'\n            delt = h // 5  # 确定文字垂直距离\n            # 写入图片\n            if len(res['data']['face_list']) > 1:  # 检测到多个人脸，就把信息写入人脸框内\n                font = ImageFont.truetype('yahei.ttf', w // 8, encoding='utf-8')  # 提前把字体文件下载好\n                draw.text((x + 10, y + 10), '性别 :' + gender, (76, 176, 80), font=font)\n                draw.text((x + 10, y + 10 + delt * 1), '年龄 :' + str(obj['age']), (76, 176, 80), font=font)\n                draw.text((x + 10, y + 10 + delt * 2), '表情 :' + expression, (76, 176, 80), font=font)\n                draw.text((x + 10, y + 10 + delt * 3), '魅力 :' + str(obj['beauty']), (76, 176, 80), font=font)\n                draw.text((x + 10, y + 10 + delt * 4), '眼镜 :' + glass, (76, 176, 80), font=font)\n            elif img_width - x - w < 170:  # 避免图片太窄，导致文字显示不完全\n                font = ImageFont.truetype('yahei.ttf', w // 8, encoding='utf-8')\n                draw.text((x + 10, y + 10), '性别 :' + gender, (76, 176, 80), font=font)\n                draw.text((x + 10, y + 10 + delt * 1), '年龄 :' + str(obj['age']), (76, 176, 80), font=font)\n                draw.text((x + 10, y + 10 + delt * 2), '表情 :' + expression, (76, 176, 80), font=font)\n                draw.text((x + 10, y + 10 + delt * 3), '魅力 :' + str(obj['beauty']), (76, 176, 80), font=font)\n                draw.text((x + 10, y + 10 + delt * 4), '眼镜 :' + glass, (76, 176, 80), font=font)\n            else:\n                font = ImageFont.truetype('yahei.ttf', 20, encoding='utf-8')\n                draw.text((x + w + 10, y + 10), '性别 :' + gender, (76, 176, 80), font=font)\n                draw.text((x + w + 10, y + 10 + delt * 1), '年龄 :' + str(obj['age']), (76, 176, 80), font=font)\n                draw.text((x + w + 10, y + 10 + delt * 2), '表情 :' + expression, (76, 176, 80), font=font)\n                draw.text((x + w + 10, y + 10 + delt * 3), '魅力 :' + str(obj['beauty']), (76, 176, 80), font=font)\n                draw.text((x + w + 10, y + 10 + delt * 4), '眼镜 :' + glass, (76, 176, 80), font=font)\n\n            draw.rectangle((x, y, x + w, y + h), outline=\"#4CB050\")  # 画出人脸方框\n            cv2img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)  # 把 pil 格式转换为 cv\n            cv2.imwrite('faces/{}'.format(os.path.basename(img)), cv2img)  # 保存图片到 face 文件夹下\n        return 'success'\n    else:\n        return 'fail'"
  },
  {
    "path": "wechat/requirements.txt",
    "content": "certifi==2018.4.16\r\nchardet==3.0.4\r\nfalcon==1.4.1\r\nidna==2.6\r\nnumpy==1.14.5\r\nopencv-python==3.4.1.15\r\noptionaldict==0.1.1\r\nPillow==5.1.0\r\npycrypto==2.6.1\r\npython-dateutil==2.7.3\r\npython-mimeparse==1.6.0\r\nrequests==2.18.4\r\nsix==1.11.0\r\nurllib3==1.22\r\nwaitress==1.1.0\r\nwechatpy==1.7.0\r\nxmltodict==0.11.0\r\n"
  },
  {
    "path": "wechat/utils.py",
    "content": "# -*-coding:utf-8 -*-\nimport requests\nimport json\nimport threading\nimport time\nimport os\n\ntoken = ''\napp_id = '开发者ID(AppID)'\nsecret = '开发者密码(AppSecret)'\n\n\ndef img_download(url, name):\n    r = requests.get(url)\n    with open('images/{}-{}.jpg'.format(name, time.strftime(\"%Y_%m_%d%H_%M_%S\", time.localtime())), 'wb') as fd:\n        fd.write(r.content)\n    if os.path.getsize(fd.name) >= 1048576:\n        return 'large'\n    # print('namename', os.path.basename(fd.name))\n    return os.path.basename(fd.name)\n\n\ndef get_access_token(appid, secret):\n    '''获取access_token,100分钟刷新一次'''\n\n    url = 'https://api.weixin.qq.com/cgi-bin/token?grant_type=client_credential&appid={}&secret={}'.format(appid, secret)\n    r = requests.get(url)\n    parse_json = json.loads(r.text)\n    global token\n    token = parse_json['access_token']\n    global timer\n    timer = threading.Timer(6000, get_access_token)\n    timer.start()\n\n\ndef img_upload(mediaType, name):\n    global token\n    url = \"https://api.weixin.qq.com/cgi-bin/media/upload?access_token=%s&type=%s\" % (token, mediaType)\n    files = {'media': open('{}'.format(name), 'rb')}\n    r = requests.post(url, files=files)\n    parse_json = json.loads(r.text)\n    return parse_json['media_id']\n\nget_access_token(app_id, secret)"
  },
  {
    "path": "爬虫集合/README.MD",
    "content": "# 代码详细说明请看文章\n\n[Python 爬虫入门(一)——爬取糗事百科](https://mp.weixin.qq.com/s/ApnEy6NWS2f-DqIIrhHzGw)\n\n[Python 爬虫入门(二)——爬取妹子图](https://mp.weixin.qq.com/s/4TZHgoE_yqeDha17f3Tbew)\n\n[Python 爬虫——Python 岗位分析报告](https://mp.weixin.qq.com/s/8wAHBPnQMbcrP9La7WZiJA)\n\n[Python 爬虫利器——Selenium介绍](https://mp.weixin.qq.com/s/YJGjZkUejEos_yJ1ukp5kw)\n\n[Python 爬虫——抖音App视频抓包](https://mp.weixin.qq.com/s/a8Tky_u1u0A4vbssnAK2_g)"
  },
  {
    "path": "爬虫集合/lagou.py",
    "content": "import random\nimport time\n\nimport requests\nfrom openpyxl import Workbook\nimport pymysql.cursors\n\n\ndef get_conn():\n    '''建立数据库连接'''\n    conn = pymysql.connect(host='localhost',\n                                user='root',\n                                password='root',\n                                db='python',\n                                charset='utf8mb4',\n                                cursorclass=pymysql.cursors.DictCursor)\n    return conn\n\n\ndef insert(conn, info):\n    '''数据写入数据库'''\n    with conn.cursor() as cursor:\n        sql = \"INSERT INTO `python` (`shortname`, `fullname`, `industryfield`, `companySize`, `salary`, `city`, `education`) VALUES (%s, %s, %s, %s, %s, %s, %s)\"\n        cursor.execute(sql, info)\n    conn.commit()\n\n\ndef get_json(url, page, lang_name):\n    '''返回当前页面的信息列表'''\n    headers = {\n        'Host': 'www.lagou.com',\n        'Connection': 'keep-alive',\n        'Content-Length': '23',\n        'Origin': 'https://www.lagou.com',\n        'X-Anit-Forge-Code': '0',\n        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',\n        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',\n        'Accept': 'application/json, text/javascript, */*; q=0.01',\n        'X-Requested-With': 'XMLHttpRequest',\n        'X-Anit-Forge-Token': 'None',\n        'Referer': 'https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput=',\n        'Accept-Encoding': 'gzip, deflate, br',\n        'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7'\n    }\n    data = {'first': 'false', 'pn': page, 'kd': lang_name}\n    json = requests.post(url, data, headers=headers).json()\n    list_con = json['content']['positionResult']['result']\n    info_list = []\n    for i in list_con:\n        info = []\n        info.append(i.get('companyShortName', '无'))\n        info.append(i.get('companyFullName', '无'))\n        info.append(i.get('industryField', '无'))\n        info.append(i.get('companySize', '无'))\n        info.append(i.get('salary', '无'))\n        info.append(i.get('city', '无'))\n        info.append(i.get('education', '无'))\n        info_list.append(info)\n    return info_list\n\n\ndef main():\n    lang_name = 'python'\n    wb = Workbook()  # 打开 excel 工作簿\n    conn = get_conn()  # 建立数据库连接  不存数据库 注释此行\n    for i in ['北京', '上海', '广州', '深圳', '杭州']:   # 五个城市\n        page = 1\n        ws1 = wb.active\n        ws1.title = lang_name\n        url = 'https://www.lagou.com/jobs/positionAjax.json?city={}&needAddtionalResult=false'.format(i)\n        while page < 31:   # 每个城市30页信息\n            info = get_json(url, page, lang_name)\n            page += 1\n            print(i, 'page', page)\n            time.sleep(random.randint(10, 20))\n            for row in info:\n                insert(conn, tuple(row))  # 插入数据库，若不想存入 注释此行\n                ws1.append(row)\n    conn.close()  # 关闭数据库连接，不存数据库 注释此行\n    wb.save('{}职位信息.xlsx'.format(lang_name))\n\nif __name__ == '__main__':\n    main()"
  },
  {
    "path": "爬虫集合/meizitu.py",
    "content": "import requests\nimport os\nimport time\nimport threading\nfrom bs4 import BeautifulSoup\n\n\ndef download_page(url):\n    '''\n    用于下载页面\n    '''\n    headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0\"}\n    r = requests.get(url, headers=headers)\n    r.encoding = 'gb2312'\n    return r.text\n\n\ndef get_pic_list(html):\n    '''\n    获取每个页面的套图列表,之后循环调用get_pic函数获取图片\n    '''\n    soup = BeautifulSoup(html, 'html.parser')\n    pic_list = soup.find_all('li', class_='wp-item')\n    for i in pic_list:\n        a_tag = i.find('h3', class_='tit').find('a')\n        link = a_tag.get('href')\n        text = a_tag.get_text()\n        get_pic(link, text)\n\n\ndef get_pic(link, text):\n    '''\n    获取当前页面的图片,并保存\n    '''\n    html = download_page(link)  # 下载界面\n    soup = BeautifulSoup(html, 'html.parser')\n    pic_list = soup.find('div', id=\"picture\").find_all('img')  # 找到界面所有图片\n    headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0\"}\n    create_dir('pic/{}'.format(text))\n    for i in pic_list:\n        pic_link = i.get('src')  # 拿到图片的具体 url\n        r = requests.get(pic_link, headers=headers)  # 下载图片，之后保存到文件\n        with open('pic/{}/{}'.format(text, pic_link.split('/')[-1]), 'wb') as f:\n            f.write(r.content)\n            time.sleep(1)   # 休息一下，不要给网站太大压力，避免被封\n\n\ndef create_dir(name):\n    if not os.path.exists(name):\n        os.makedirs(name)\n\n\ndef execute(url):\n    page_html = download_page(url)\n    get_pic_list(page_html)\n\n\ndef main():\n    create_dir('pic')\n    queue = [i for i in range(1, 72)]   # 构造 url 链接 页码。\n    threads = []\n    while len(queue) > 0:\n        for thread in threads:\n            if not thread.is_alive():\n                threads.remove(thread)\n        while len(threads) < 5 and len(queue) > 0:   # 最大线程数设置为 5\n            cur_page = queue.pop(0)\n            url = 'http://meizitu.com/a/more_{}.html'.format(cur_page)\n            thread = threading.Thread(target=execute, args=(url,))\n            thread.setDaemon(True)\n            thread.start()\n            print('{}正在下载{}页'.format(threading.current_thread().name, cur_page))\n            threads.append(thread)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "爬虫集合/qiubai_crawer.py",
    "content": "import requests\nfrom bs4 import BeautifulSoup\n\n\ndef download_page(url):\n    headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0\"}\n    r = requests.get(url, headers=headers)\n    return r.text\n\n\ndef get_content(html, page):\n    output = \"\"\"第{}页 作者：{} 性别：{} 年龄：{} 点赞：{} 评论：{}\\n{}\\n------------\\n\"\"\"\n    soup = BeautifulSoup(html, 'html.parser')\n    con = soup.find(id='content-left')\n    con_list = con.find_all('div', class_=\"article\")\n    for i in con_list:\n        author = i.find('h2').string  # 获取作者名字\n        content = i.find('div', class_='content').find('span').get_text()  # 获取内容\n        stats = i.find('div', class_='stats')\n        vote = stats.find('span', class_='stats-vote').find('i', class_='number').string\n        comment = stats.find('span', class_='stats-comments').find('i', class_='number').string\n        author_info = i.find('div', class_='articleGender')  # 获取作者 年龄，性别\n        if author_info is not None:  # 非匿名用户\n            class_list = author_info['class']\n            if \"womenIcon\" in class_list:\n                gender = '女'\n            elif \"manIcon\" in class_list:\n                gender = '男'\n            else:\n                gender = ''\n            age = author_info.string   # 获取年龄\n        else:  # 匿名用户\n            gender = ''\n            age = ''\n\n        save_txt(output.format(page, author, gender, age, vote, comment, content))\n\n\ndef save_txt(*args):\n    for i in args:\n        with open('qiubai.txt', 'a', encoding='utf-8') as f:\n            f.write(i)\n\n\ndef main():\n    # 我们点击下面链接，在页面下方可以看到共有13页，可以构造如下 url，\n    # 当然我们最好是用 Beautiful Soup找到页面底部有多少页。\n    for i in range(1, 14):\n        url = 'https://qiushibaike.com/text/page/{}'.format(i)\n        html = download_page(url)\n        get_content(html, i)\n\nif __name__ == '__main__':\n    main()\n"
  }
]