Repository: Jack-Cherish/python-spider Branch: master Commit: 60b7ed839713 Files: 68 Total size: 274.5 KB Directory structure: gitextract_u9lbtfzc/ ├── 12306.py ├── 2020/ │ ├── README.md │ ├── api/ │ │ └── api.py │ ├── bilibili/ │ │ ├── download.py │ │ └── xml2ass.py │ ├── dmzj/ │ │ └── cartoon.py │ ├── taobao/ │ │ └── taobao_login.py │ ├── xbqg/ │ │ └── xbqg_spider.py │ └── zycjw/ │ └── video_download.py ├── Netease/ │ ├── Netease.py │ └── music_list.txt ├── README.md ├── baiduwenku.py ├── baiduwenku_pro_1.py ├── baiwan/ │ ├── app.js │ ├── baiwan.py │ ├── file.txt │ ├── index.html │ └── question.txt ├── bilibili/ │ ├── README.md │ ├── bilibili.py │ └── xml2ass.py ├── bilibili_luckyman/ │ ├── README.md │ └── bilibili_luckyman.py ├── biqukan.py ├── cartoon/ │ ├── cartoon/ │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders/ │ │ ├── __init__.py │ │ └── comic_spider.py │ └── scrapy.cfg ├── daili.py ├── dingdong/ │ ├── README.md │ └── jd.py ├── douyin/ │ ├── README.md │ ├── douyin.py │ └── fuck-byted-acrawler.js ├── douyin.py ├── douyin_pro.py ├── downloader.py ├── financical.py ├── geetest.py ├── hero.py ├── one_hour_spider/ │ ├── biquge20180731.py │ ├── biqukan.py │ ├── unsplash.py │ ├── unsplash20180731.py │ └── vidoe_downloader.py ├── shuaia.py ├── video_downloader/ │ ├── MyQR/ │ │ ├── __init__.py │ │ ├── mylibs/ │ │ │ ├── ECC.py │ │ │ ├── __init__.py │ │ │ ├── constant.py │ │ │ ├── data.py │ │ │ ├── draw.py │ │ │ ├── matrix.py │ │ │ ├── structure.py │ │ │ └── theqrmodule.py │ │ ├── myqr.py │ │ └── terminal.py │ ├── requirements.txt │ └── video_downloader.py └── zhengfang_system_spider/ ├── README.md ├── requirements.txt ├── spider.py └── zhengfang.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: 12306.py ================================================ # -*- coding: utf-8 -*- """ @author: liuyw """ from splinter.browser import Browser from time import sleep import traceback import time, sys class huoche(object): driver_name = '' executable_path = '' #用户名,密码 username = u"xxx" passwd = u"xxx" # cookies值得自己去找, 下面两个分别是沈阳, 哈尔滨 starts = u"%u6C88%u9633%2CSYT" ends = u"%u54C8%u5C14%u6EE8%2CHBB" # 时间格式2018-01-19 dtime = u"2018-01-19" # 车次,选择第几趟,0则从上之下依次点击 order = 0 ###乘客名 users = [u"xxx",u"xxx"] ##席位 xb = u"二等座" pz = u"成人票" """网址""" ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init" login_url = "https://kyfw.12306.cn/otn/login/init" initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306" buy = "https://kyfw.12306.cn/otn/confirmPassenger/initDc" def __init__(self): self.driver_name = 'chrome' self.executable_path = 'D:/chromedriver' def login(self): self.driver.visit(self.login_url) self.driver.fill("loginUserDTO.user_name", self.username) # sleep(1) self.driver.fill("userDTO.password", self.passwd) print(u"等待验证码,自行输入...") while True: if self.driver.url != self.initmy_url: sleep(1) else: break def start(self): self.driver = Browser(driver_name=self.driver_name,executable_path=self.executable_path) self.driver.driver.set_window_size(1400, 1000) self.login() # sleep(1) self.driver.visit(self.ticket_url) try: print(u"购票页面开始...") # sleep(1) # 加载查询信息 self.driver.cookies.add({"_jc_save_fromStation": self.starts}) self.driver.cookies.add({"_jc_save_toStation": self.ends}) self.driver.cookies.add({"_jc_save_fromDate": self.dtime}) self.driver.reload() count = 0 if self.order != 0: while self.driver.url == self.ticket_url: self.driver.find_by_text(u"查询").click() count += 1 print(u"循环点击查询... 第 %s 次" % count) # sleep(1) try: self.driver.find_by_text(u"预订")[self.order - 1].click() except Exception as e: print(e) print(u"还没开始预订") continue else: while self.driver.url == self.ticket_url: self.driver.find_by_text(u"查询").click() count += 1 print(u"循环点击查询... 第 %s 次" % count) # sleep(0.8) try: for i in self.driver.find_by_text(u"预订"): i.click() sleep(1) except Exception as e: print(e) print(u"还没开始预订 %s" % count) continue print(u"开始预订...") # sleep(3) # self.driver.reload() sleep(1) print(u'开始选择用户...') for user in self.users: self.driver.find_by_text(user).last.click() print(u"提交订单...") sleep(1) self.driver.find_by_text(self.pz).click() self.driver.find_by_id('').select(self.pz) # sleep(1) self.driver.find_by_text(self.xb).click() sleep(1) self.driver.find_by_id('submitOrder_id').click() print(u"开始选座...") self.driver.find_by_id('1D').last.click() self.driver.find_by_id('1F').last.click() sleep(1.5) print(u"确认选座...") self.driver.find_by_id('qr_submit_id').click() except Exception as e: print(e) if __name__ == '__main__': huoche = huoche() huoche.start() ================================================ FILE: 2020/README.md ================================================ # Python Spider 2020 由于这个项目时间太长了,陆陆续续,很多实战示例也早已失效。 网络爬虫,是一门比较通用的基础技术,各个领域都会有所涉及,比如我做视觉算法的,也需要用到网络爬虫,例如调用 API 接口清洗数据等,这本质也都是一个小的爬虫程序。 为了提供各位更好的学习示例,我决定重写这一系列教程,对一些失效的示例,重新找例子,并查缺补漏,完善这一些列教程。 2020年,最新版的 Python3 网络爬虫实战系列教程。 原创文章每周最少两篇,**后续最新文章**会在[【公众号】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)首发,视频[【B站】](https://space.bilibili.com/331507846)首发,大家可以加我[【微信】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)进**交流群**,技术交流或提意见都可以,欢迎**Star**!

微信群 公众号 B站 知乎 CSDN 头条 掘金

## Python3 网络爬虫教程 2020 | 文章 | 公众号 | 代码 | | :------ | :--------: | :--------: | | Python3 网络爬虫(一):初识网络爬虫之夜探老王家 | [公众号](https://mp.weixin.qq.com/s/1rcq9RQYuAuHFg1w1j8HXg "Python3 网络爬虫(一)") | no | | Python3 网络爬虫(二):下载小说的正确姿势 | [公众号](https://mp.weixin.qq.com/s/5e2_r0QXUISVp9GdDsqbzg "Python3 网络爬虫(二)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/xbqg "Python3 网络爬虫(二)") | | Python3 网络爬虫(三):漫画下载,动态加载、反爬虫这都不叫事!| [公众号](https://mp.weixin.qq.com/s/wyS-OP04K3Vs9arSelRlyA "Python3网络爬虫(三)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/dmzj "Python3 网络爬虫(三)") | | Python3 网络爬虫(四):视频下载,那些事儿!| [公众号](https://mp.weixin.qq.com/s/_geNA6Dwo4kx25X7trJzlg "Python3 网络爬虫(四)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/zycjw "Python3 网络爬虫(四)") | | Python3 网络爬虫(五):老板,需要特殊服务吗?| [公众号](https://mp.weixin.qq.com/s/PPTSnIHV71b-wB3oRiYnIA "Python3 网络爬虫(五)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/api "Python3 网络爬虫(五)") | | Python3 网络爬虫(六):618,爱他/她,就清空他/她的购物车!| [公众号](https://mp.weixin.qq.com/s/lXXDfzyLVrf3f-aqJN1C3A "Python3 网络爬虫(六)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/taobao "Python3 网络爬虫(六)") | | 宝藏B站UP主,视频弹幕尽收囊中!| [公众号](https://mp.weixin.qq.com/s/aWratg1j9RBAjIghoY66yQ "宝藏B站UP主,视频弹幕尽收囊中!") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/bilibili "宝藏B站UP主,视频弹幕尽收囊中!") | 更多精彩,敬请期待! wechat ================================================ FILE: 2020/api/api.py ================================================ import requests import base64 import json import cv2 import numpy as np import matplotlib.pyplot as plt %matplotlib inline beautify_url = "https://api-cn.faceplusplus.com/facepp/v2/beautify" # 你创建的应用的 API Key 和 API Secret(也叫 Secret Key) AK = '' SK = '' # 可选参数,不填写,默认50 # 美白程度 0 - 100 whitening = 80 # 磨皮程度 0 - 100 smoothing = 80 # 瘦脸程度 0 - 100 thinface = 20 # 小脸程度 0 - 100 shrink_face = 50 # 大眼程度 0 - 100 enlarge_eye = 50 # 去眉毛程度 0 - 100 remove_eyebrow = 50 # 滤镜名称,不填写,默认无滤镜 filter_type = '' # 二进制方式打开图片 img_name = 'test_1.png' f = open(img_name, 'rb') # 转 base64 img_base64 = base64.b64encode(f.read()) # 使用 whitening、smoothing、thinface 三个可选参数,其他用默认值 data = { 'api_key': AK, 'api_secret': SK, 'image_base64': img_base64, 'whitening': whitening, 'smoothing': smoothing, 'thinface': thinface, } r = requests.post(url=beautify_url, data=data) html = json.loads(r.text) # 解析base64图片 base64_data = html['result'] imgData = base64.b64decode(base64_data) nparr = np.frombuffer(imgData, np.uint8) img_res = cv2.imdecode(nparr, cv2.IMREAD_COLOR) img_res_BGR = cv2.cvtColor(img_res, cv2.COLOR_RGB2BGR) # 原始图片 img = cv2.imread(img_name) img_BGR = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # 显示图片 fig, axs = plt.subplots(nrows=1, ncols=2, sharex=False, sharey=False, figsize=(10,10)) axs[0].imshow(img_BGR) axs[1].imshow(img_res_BGR) plt.show() ================================================ FILE: 2020/bilibili/download.py ================================================ # -*-coding:utf-8 -*- # Website: https://cuijiahua.com # Author: Jack Cui # Date: 2020.07.22 import requests import json import re import json import math import xml2ass import time from contextlib import closing from bs4 import BeautifulSoup import os from win32com.client import Dispatch def addTasktoXunlei(down_url): flag = False o = Dispatch('ThunderAgent.Agent64.1') try: o.AddTask(down_url, "", "", "", "", -1, 0, 5) o.CommitTasks() flag = True except Exception: print(Exception.message) print(" AddTask is fail!") return flag def get_download_url(arcurl): # 微信搜索 JackCui-AI 关注公众号,后台回复「B 站」获取视频解析地址 jiexi_url = 'xxx' payload = {'url': arcurl} jiexi_req = requests.get(jiexi_url, params=payload) jiexi_bf = BeautifulSoup(jiexi_req.text) jiexi_dn_url = jiexi_bf.iframe.get('src') dn_req = requests.get(jiexi_dn_url) dn_bf = BeautifulSoup(dn_req.text) video_script = dn_bf.find('script',src = None) DPlayer = str(video_script.string) download_url = re.findall('\'(http[s]?:(?:[a-zA-Z]|[0-9]|[$-_@.&~+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)\'', DPlayer)[0] download_url = download_url.replace('\\', '') return download_url space_url = 'https://space.bilibili.com/280793434' search_url = 'https://api.bilibili.com/x/space/arc/search' mid = space_url.split('/')[-1] sess = requests.Session() search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': 'application/json, text/plain, */*'} # 获取视频个数 ps = 1 pn = 1 search_params = {'mid': mid, 'ps': ps, 'tid': 0, 'pn': pn} req = sess.get(url=search_url, headers=search_headers, params=search_params, verify=False) info = json.loads(req.text) video_count = info['data']['page']['count'] ps = 10 page = math.ceil(video_count/ps) videos_list = [] for pn in range(1, page+1): search_params = {'mid': mid, 'ps': ps, 'tid': 0, 'pn': pn} req = sess.get(url=search_url, headers=search_headers, params=search_params, verify=False) info = json.loads(req.text) vlist = info['data']['list']['vlist'] for video in vlist: title = video['title'] bvid = video['bvid'] vurl = 'https://www.bilibili.com/video/' + bvid videos_list.append([title, vurl]) print('共 %d 个视频' % len(videos_list)) all_video = {} # 下载前 10 个视频 for video in videos_list[:10]: download_url = get_download_url(video[1]) print(video[0] + ':' + download_url) # 记录视频名字 xunlei_video_name = download_url.split('?')[0].split('/')[-1] filename = video[0] for c in u'´☆❤◦\/:*?"<>| ': filename = filename.replace(c, '') save_video_name = filename + '.mp4' all_video[xunlei_video_name] = save_video_name addTasktoXunlei(download_url) # 弹幕下载 danmu_name = filename + '.xml' danmu_ass = filename + '.ass' oid = download_url.split('/')[6] danmu_url = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(oid) danmu_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9'} with closing(sess.get(danmu_url, headers=danmu_header, stream=True, verify=False)) as response: if response.status_code == 200: with open(danmu_name, 'wb') as file: for data in response.iter_content(): file.write(data) file.flush() else: print('链接异常') time.sleep(0.5) xml2ass.Danmaku2ASS(danmu_name, danmu_ass, 1280, 720) # 视频重命名 for key, item in all_video.items(): while key not in os.listdir('./'): time.sleep(1) os.rename(key, item) ================================================ FILE: 2020/bilibili/xml2ass.py ================================================ # The original author of this program, Danmaku2ASS, is StarBrilliant. # This file is released under General Public License version 3. # You should have received a copy of General Public License text alongside with # this program. If not, you can obtain it at http://gnu.org/copyleft/gpl.html . # This program comes with no warranty, the author will not be resopnsible for # any damage or problems caused by this program. import argparse import calendar import gettext import io import json import logging import math import os import random import re import sys import time import xml.dom.minidom if sys.version_info < (3,): raise RuntimeError('at least Python 3.0 is required') gettext.install('danmaku2ass', os.path.join(os.path.dirname(os.path.abspath(os.path.realpath(sys.argv[0] or 'locale'))), 'locale')) def SeekZero(function): def decorated_function(file_): file_.seek(0) try: return function(file_) finally: file_.seek(0) return decorated_function def EOFAsNone(function): def decorated_function(*args, **kwargs): try: return function(*args, **kwargs) except EOFError: return None return decorated_function @SeekZero @EOFAsNone def ProbeCommentFormat(f): tmp = f.read(1) if tmp == '[': return 'Acfun' # It is unwise to wrap a JSON object in an array! # See this: http://haacked.com/archive/2008/11/20/anatomy-of-a-subtle-json-vulnerability.aspx/ # Do never follow what Acfun developers did! elif tmp == '{': tmp = f.read(14) if tmp == '"status_code":': return 'Tudou' elif tmp == '"root":{"total': return 'sH5V' elif tmp == '<': tmp = f.read(1) if tmp == '?': tmp = f.read(38) if tmp == 'xml version="1.0" encoding="UTF-8"?>\n<': return 'Bilibili' # Komica, with the same file format as Bilibili elif tmp == 'xml version="1.0" encoding="UTF-8"?>\n<': return 'MioMio' elif tmp == 'p': return 'Niconico' # Himawari Douga, with the same file format as Niconico Douga # # ReadComments**** protocol # # Input: # f: Input file # fontsize: Default font size # # Output: # yield a tuple: # (timeline, timestamp, no, comment, pos, color, size, height, width) # timeline: The position when the comment is replayed # timestamp: The UNIX timestamp when the comment is submitted # no: A sequence of 1, 2, 3, ..., used for sorting # comment: The content of the comment # pos: 0 for regular moving comment, # 1 for bottom centered comment, # 2 for top centered comment, # 3 for reversed moving comment # color: Font color represented in 0xRRGGBB, # e.g. 0xffffff for white # size: Font size # height: The estimated height in pixels # i.e. (comment.count('\n')+1)*size # width: The estimated width in pixels # i.e. CalculateLength(comment)*size # # After implementing ReadComments****, make sure to update ProbeCommentFormat # and CommentFormatMap. # def ReadCommentsNiconico(f, fontsize): NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffcc00, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000, 'niconicowhite': 0xcccc99, 'white2': 0xcccc99, 'truered': 0xcc0033, 'red2': 0xcc0033, 'passionorange': 0xff6600, 'orange2': 0xff6600, 'madyellow': 0x999900, 'yellow2': 0x999900, 'elementalgreen': 0x00cc66, 'green2': 0x00cc66, 'marineblue': 0x33ffcc, 'blue2': 0x33ffcc, 'nobleviolet': 0x6633cc, 'purple2': 0x6633cc} dom = xml.dom.minidom.parse(f) comment_element = dom.getElementsByTagName('chat') for comment in comment_element: try: c = str(comment.childNodes[0].wholeText) if c.startswith('/'): continue # ignore advanced comments pos = 0 color = 0xffffff size = fontsize for mailstyle in str(comment.getAttribute('mail')).split(): if mailstyle == 'ue': pos = 1 elif mailstyle == 'shita': pos = 2 elif mailstyle == 'big': size = fontsize*1.44 elif mailstyle == 'small': size = fontsize*0.64 elif mailstyle in NiconicoColorMap: color = NiconicoColorMap[mailstyle] yield (max(int(comment.getAttribute('vpos')), 0)*0.01, int(comment.getAttribute('date')), int(comment.getAttribute('no')), c, pos, color, size, (c.count('\n')+1)*size, CalculateLength(c)*size) except (AssertionError, AttributeError, IndexError, TypeError, ValueError): logging.warning(_('Invalid comment: %s') % comment.toxml()) continue def ReadCommentsAcfun(f, fontsize): comment_element = json.load(f) for i, comment in enumerate(comment_element): try: p = str(comment['c']).split(',') assert len(p) >= 6 assert p[2] in ('1', '2', '4', '5', '7') size = int(p[3])*fontsize/25.0 if p[2] != '7': c = str(comment['m']).replace('\\r', '\n').replace('\r', '\n') yield (float(p[0]), int(p[5]), i, c, {'1': 0, '2': 0, '4': 2, '5': 1}[p[2]], int(p[1]), size, (c.count('\n')+1)*size, CalculateLength(c)*size) else: c = dict(json.loads(comment['m'])) yield (float(p[0]), int(p[5]), i, c, 'acfunpos', int(p[1]), size, 0, 0) except (AssertionError, AttributeError, IndexError, TypeError, ValueError): logging.warning(_('Invalid comment: %r') % comment) continue def ReadCommentsBilibili(f, fontsize): dom = xml.dom.minidom.parse(f) comment_element = dom.getElementsByTagName('d') for i, comment in enumerate(comment_element): try: p = str(comment.getAttribute('p')).split(',') assert len(p) >= 5 assert p[1] in ('1', '4', '5', '6', '7') if p[1] != '7': c = str(comment.childNodes[0].wholeText).replace('/n', '\n') size = int(p[2])*fontsize/25.0 yield (float(p[0]), int(p[4]), i, c, {'1': 0, '4': 2, '5': 1, '6': 3}[p[1]], int(p[3]), size, (c.count('\n')+1)*size, CalculateLength(c)*size) else: # positioned comment c = str(comment.childNodes[0].wholeText) yield (float(p[0]), int(p[4]), i, c, 'bilipos', int(p[3]), int(p[2]), 0, 0) except (AssertionError, AttributeError, IndexError, TypeError, ValueError): logging.warning(_('Invalid comment: %s') % comment.toxml()) continue def ReadCommentsTudou(f, fontsize): comment_element = json.load(f) for i, comment in enumerate(comment_element['comment_list']): try: assert comment['pos'] in (3, 4, 6) c = str(comment['data']) assert comment['size'] in (0, 1, 2) size = {0: 0.64, 1: 1, 2: 1.44}[comment['size']]*fontsize yield (int(comment['replay_time']*0.001), int(comment['commit_time']), i, c, {3: 0, 4: 2, 6: 1}[comment['pos']], int(comment['color']), size, (c.count('\n')+1)*size, CalculateLength(c)*size) except (AssertionError, AttributeError, IndexError, TypeError, ValueError): logging.warning(_('Invalid comment: %r') % comment) continue def ReadCommentsMioMio(f, fontsize): NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffc000, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000} dom = xml.dom.minidom.parse(f) comment_element = dom.getElementsByTagName('data') for i, comment in enumerate(comment_element): try: message = comment.getElementsByTagName('message')[0] c = str(message.childNodes[0].wholeText) pos = 0 size = int(message.getAttribute('fontsize'))*fontsize/25.0 yield (float(comment.getElementsByTagName('playTime')[0].childNodes[0].wholeText), int(calendar.timegm(time.strptime(comment.getElementsByTagName('times')[0].childNodes[0].wholeText, '%Y-%m-%d %H:%M:%S')))-28800, i, c, {'1': 0, '4': 2, '5': 1}[message.getAttribute('mode')], int(message.getAttribute('color')), size, (c.count('\n')+1)*size, CalculateLength(c)*size) except (AssertionError, AttributeError, IndexError, TypeError, ValueError): logging.warning(_('Invalid comment: %s') % comment.toxml()) continue def ReadCommentsSH5V(f, fontsize): comment_element = json.load(f) for i, comment in enumerate(comment_element["root"]["bgs"]): try: c_at = str(comment['at']) c_type = str(comment['type']) c_date = str(comment['timestamp']) c_color = str(comment['color']) c = str(comment['text']) size = fontsize if c_type != '7': yield (float(c_at), int(c_date), i, c, {'0': 0, '1': 0, '4': 2, '5': 1}[c_type], int(c_color[1:], 16), size, (c.count('\n')+1)*size, CalculateLength(c)*size) else: c_x = float(comment['x']) c_y = float(comment['y']) size = int(comment['size']) dur = int(comment['dur']) data1 = float(comment['data1']) data2 = float(comment['data2']) data3 = int(comment['data3']) data4 = int(comment['data4']) yield (float(c_at), int(c_date), i, c, 'sH5Vpos', int(c_color[1:], 16), size, 0, 0, c_x, c_y, dur, data1, data2, data3, data4) except (AssertionError, AttributeError, IndexError, TypeError, ValueError): logging.warning(_('Invalid comment: %r') % comment) continue CommentFormatMap = {None: None, 'Niconico': ReadCommentsNiconico, 'Acfun': ReadCommentsAcfun, 'Bilibili': ReadCommentsBilibili, 'Tudou': ReadCommentsTudou, 'MioMio': ReadCommentsMioMio, 'sH5V': ReadCommentsSH5V} def WriteCommentBilibiliPositioned(f, c, width, height, styleid): #BiliPlayerSize = (512, 384) # Bilibili player version 2010 #BiliPlayerSize = (540, 384) # Bilibili player version 2012 BiliPlayerSize = (672, 438) # Bilibili player version 2014 ZoomFactor = GetZoomFactor(BiliPlayerSize, (width, height)) def GetPosition(InputPos, isHeight): isHeight = int(isHeight) # True -> 1 if isinstance(InputPos, int): return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1] elif isinstance(InputPos, float): if InputPos > 1: return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1] else: return BiliPlayerSize[isHeight]*ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1] else: try: InputPos = int(InputPos) except ValueError: InputPos = float(InputPos) return GetPosition(InputPos, isHeight) try: comment_args = safe_list(json.loads(c[3])) text = ASSEscape(str(comment_args[4]).replace('/n', '\n')) from_x = comment_args.get(0, 0) from_y = comment_args.get(1, 0) to_x = comment_args.get(7, from_x) to_y = comment_args.get(8, from_y) from_x = round(GetPosition(from_x, False)) from_y = round(GetPosition(from_y, True)) to_x = round(GetPosition(to_x, False)) to_y = round(GetPosition(to_y, True)) alpha = safe_list(str(comment_args.get(2, '1')).split('-')) from_alpha = float(alpha.get(0, 1)) to_alpha = float(alpha.get(1, from_alpha)) from_alpha = 255-round(from_alpha*255) to_alpha = 255-round(to_alpha*255) rotate_z = int(comment_args.get(5, 0)) rotate_y = int(comment_args.get(6, 0)) lifetime = float(comment_args.get(3, 4500)) duration = int(comment_args.get(9, lifetime*1000)) delay = int(comment_args.get(10, 0)) fontface = comment_args.get(12) isborder = comment_args.get(11, 'true') styles = [] if (from_x, from_y) == (to_x, to_y): styles.append('\\pos(%s, %s)' % (from_x, from_y)) else: styles.append('\\move(%s, %s, %s, %s, %s, %s)' % (from_x, from_y, to_x, to_y, delay, delay+duration)) styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (from_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (from_y-ZoomFactor[2])/(height-ZoomFactor[2]*2))) if (from_x, from_y) != (to_x, to_y): styles.append('\\t(%s, %s, ' % (delay, delay+duration)) styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(height-ZoomFactor[2]*2))) styles.append(')') if fontface: styles.append('\\fn%s' % ASSEscape(fontface)) styles.append('\\fs%s' % round(c[6]*ZoomFactor[0])) if c[5] != 0xffffff: styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff)) if c[5] == 0x000000: styles.append('\\3c&HFFFFFF&') if from_alpha == to_alpha: styles.append('\\alpha&H%02X' % from_alpha) elif (from_alpha, to_alpha) == (255, 0): styles.append('\\fad(%s,0)' % (lifetime*1000)) elif (from_alpha, to_alpha) == (0, 255): styles.append('\\fad(0, %s)' % (lifetime*1000)) else: styles.append('\\fade(%(from_alpha)s, %(to_alpha)s, %(to_alpha)s, 0, %(end_time)s, %(end_time)s, %(end_time)s)' % {'from_alpha': from_alpha, 'to_alpha': to_alpha, 'end_time': lifetime*1000}) if isborder == 'false': styles.append('\\bord0') f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid}) except (IndexError, ValueError) as e: try: logging.warning(_('Invalid comment: %r') % c[3]) except IndexError: logging.warning(_('Invalid comment: %r') % c) def WriteCommentAcfunPositioned(f, c, width, height, styleid): AcfunPlayerSize = (560, 400) ZoomFactor = GetZoomFactor(AcfunPlayerSize, (width, height)) def GetPosition(InputPos, isHeight): isHeight = int(isHeight) # True -> 1 return AcfunPlayerSize[isHeight]*ZoomFactor[0]*InputPos*0.001+ZoomFactor[isHeight+1] def GetTransformStyles(x=None, y=None, scale_x=None, scale_y=None, rotate_z=None, rotate_y=None, color=None, alpha=None): styles = [] if x is not None and y is not None: styles.append('\\pos(%s, %s)' % (x, y)) if scale_x is not None: styles.append('\\fscx%s' % scale_x) if scale_y is not None: styles.append('\\fscy%s' % scale_y) if rotate_z is not None and rotate_y is not None: assert x is not None assert y is not None styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (y-ZoomFactor[2])/(height-ZoomFactor[2]*2))) if color is not None: styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff)) if color == 0x000000: styles.append('\\3c&HFFFFFF&') if alpha is not None: alpha = 255-round(alpha*255) styles.append('\\alpha&H%02X' % alpha) return styles def FlushCommentLine(f, text, styles, start_time, end_time, styleid): if end_time > start_time: f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid}) try: comment_args = c[3] text = ASSEscape(str(comment_args['n']).replace('\r', '\n').replace('\r', '\n')) common_styles = [] anchor = {0: 7, 1: 8, 2: 9, 3: 4, 4: 5, 5: 6, 6: 1, 7: 2, 8: 3}.get(comment_args.get('c', 0), 7) if anchor != 7: common_styles.append('\\an%s' % anchor) font = comment_args.get('w') if font: font = dict(font) fontface = font.get('f') if fontface: common_styles.append('\\fn%s' % ASSEscape(str(fontface))) fontbold = bool(font.get('b')) if fontbold: common_styles.append('\\b1') common_styles.append('\\fs%s' % round(c[6]*ZoomFactor[0])) isborder = bool(comment_args.get('b', True)) if not isborder: common_styles.append('\\bord0') to_pos = dict(comment_args.get('p', {'x': 0, 'y': 0})) to_x = round(GetPosition(int(to_pos.get('x', 0)), False)) to_y = round(GetPosition(int(to_pos.get('y', 0)), True)) to_scale_x = round(float(comment_args.get('e', 1.0))*100) to_scale_y = round(float(comment_args.get('f', 1.0))*100) to_rotate_z = float(comment_args.get('r', 0.0)) to_rotate_y = float(comment_args.get('k', 0.0)) to_color = c[5] to_alpha = float(comment_args.get('a', 1.0)) from_time = float(comment_args.get('t', 0.0)) action_time = float(comment_args.get('l', 3.0)) actions = list(comment_args.get('z', [])) transform_styles = GetTransformStyles(to_x, to_y, to_scale_x, to_scale_y, to_rotate_z, to_rotate_y, to_color, to_alpha) FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid) for action in actions: action = dict(action) from_x, from_y = to_x, to_y from_scale_x, from_scale_y = to_scale_x, to_scale_y from_rotate_z, from_rotate_y = to_rotate_z, to_rotate_y from_color, from_alpha = to_color, to_alpha from_time += action_time action_time = float(action.get('l', 0.0)) action_styles = [] if 'x' in action: to_x = round(GetPosition(int(action['x']), False)) if 'y' in action: to_y = round(GetPosition(int(action['y']), True)) if 'f' in action: to_scale_x = round(float(action['f'])*100) action_styles.append('\\fscx%s' % to_scale_x) if 'g' in action: to_scale_y = round(float(action['g'])*100) action_styles.append('\\fscy%s' % to_scale_y) if 'c' in action: to_color = int(action['c']) action_styles.append('\\c&H%02X%02X%02X&' % (to_color & 0xff, (to_color >> 8) & 0xff, (to_color >> 16) & 0xff)) if 't' in action: to_alpha = float(action['t']) action_styles.append('\\alpha&H%02X' % (255-round(to_alpha*255))) if 'd' in action: to_rotate_z = float(action['d']) if 'e' in action: to_rotate_y = float(action['e']) if ('x' in action) or ('y' in action): transform_styles = GetTransformStyles(None, None, from_scale_x, from_scale_y, None, None, from_color, from_alpha) transform_styles.append('\\move(%s, %s, %s, %s)' % (from_x, from_y, to_x, to_y)) action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2))) elif ('d' in action) or ('e' in action): action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2))) else: transform_styles = GetTransformStyles(from_x, from_y, from_scale_x, from_scale_y, from_rotate_z, from_rotate_y, from_color, from_alpha) if action_styles: transform_styles.append('\\t(%s)' % (''.join(action_styles))) FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid) except (IndexError, ValueError) as e: logging.warning(_('Invalid comment: %r') % c[3]) def WriteCommentSH5VPositioned(f, c, width, height, styleid): def GetTransformStyles(x=None, y=None, fsize=None, rotate_z=None, rotate_y=None, color=None, alpha=None): styles = [] if x is not None and y is not None: styles.append('\\pos(%s, %s)' % (x, y)) if fsize is not None: styles.append('\\fs%s' % fsize) if rotate_y is not None and rotate_z is not None: styles.append('\\frz%s' % round(rotate_z)) styles.append('\\fry%s' % round(rotate_y)) if color is not None: styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff)) if color == 0x000000: styles.append('\\3c&HFFFFFF&') if alpha is not None: alpha = 255-round(alpha*255) styles.append('\\alpha&H%02X' % alpha) return styles def FlushCommentLine(f, text, styles, start_time, end_time, styleid): if end_time > start_time: f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid}) try: text = ASSEscape(str(c[3])) to_x = round(float(c[9])*width) to_y = round(float(c[10])*height) to_rotate_z = -int(c[14]) to_rotate_y = -int(c[15]) to_color = c[5] to_alpha = float(c[12]) #Note: Alpha transition hasn't been worked out yet. to_size = round(int(c[6])*math.sqrt(width*height/307200)) #Note: Because sH5V's data is the absolute size of font,temporarily solve by it at present.[*math.sqrt(width/640*height/480)] #But it seems to be working fine... from_time = float(c[0]) action_time = float(c[11])/1000 transform_styles = GetTransformStyles(to_x, to_y, to_size, to_rotate_z, to_rotate_y, to_color, to_alpha) FlushCommentLine(f, text, transform_styles, from_time, from_time+action_time, styleid) except (IndexError, ValueError) as e: logging.warning(_('Invalid comment: %r') % c[3]) # Result: (f, dx, dy) # To convert: NewX = f*x+dx, NewY = f*y+dy def GetZoomFactor(SourceSize, TargetSize): try: if (SourceSize, TargetSize) == GetZoomFactor.Cached_Size: return GetZoomFactor.Cached_Result except AttributeError: pass GetZoomFactor.Cached_Size = (SourceSize, TargetSize) try: SourceAspect = SourceSize[0]/SourceSize[1] TargetAspect = TargetSize[0]/TargetSize[1] if TargetAspect < SourceAspect: # narrower ScaleFactor = TargetSize[0]/SourceSize[0] GetZoomFactor.Cached_Result = (ScaleFactor, 0, (TargetSize[1]-TargetSize[0]/SourceAspect)/2) elif TargetAspect > SourceAspect: # wider ScaleFactor = TargetSize[1]/SourceSize[1] GetZoomFactor.Cached_Result = (ScaleFactor, (TargetSize[0]-TargetSize[1]*SourceAspect)/2, 0) else: GetZoomFactor.Cached_Result = (TargetSize[0]/SourceSize[0], 0, 0) return GetZoomFactor.Cached_Result except ZeroDivisionError: GetZoomFactor.Cached_Result = (1, 0, 0) return GetZoomFactor.Cached_Result # Calculation is based on https://github.com/jabbany/CommentCoreLibrary/issues/5#issuecomment-40087282 # and https://github.com/m13253/danmaku2ass/issues/7#issuecomment-41489422 # Input: X relative horizonal coordinate: 0 for left edge, 1 for right edge. # Y relative vertical coordinate: 0 for top edge, 1 for bottom edge. # FOV = 1.0/math.tan(100*math.pi/360.0) # Result: (rotX, rotY, rotZ, shearX, shearY) def ConvertFlashRotation(rotY, rotZ, X, Y, FOV=math.tan(2*math.pi/9.0)): def WrapAngle(deg): return 180-((180-deg)%360) def CalcPerspectiveCorrection(alpha, X, FOV=FOV): alpha = WrapAngle(alpha) if FOV is None: return alpha if 0 <= alpha <= 180: costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV+max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0)) try: if costheta > 1: costheta = 1 raise ValueError elif costheta < -1: costheta = -1 raise ValueError except ValueError: logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X)) theta = math.acos(costheta)*180/math.pi else: costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV-max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0)) try: if costheta > 1: costheta = 1 raise ValueError elif costheta < -1: costheta = -1 raise ValueError except ValueError: logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X)) theta = -math.acos(costheta)*180/math.pi return WrapAngle(theta) X = 2*X-1 Y = 2*Y-1 rotY = WrapAngle(rotY) rotZ = WrapAngle(rotZ) if rotY == 0 or rotZ == 0: outX = 0 outY = -rotY # Positive value means clockwise in Flash outZ = -rotZ else: rotY = rotY*math.pi/180.0 rotZ = rotZ*math.pi/180.0 outY = math.atan2(-math.sin(rotY)*math.cos(rotZ), math.cos(rotY))*180/math.pi outZ = math.atan2(-math.cos(rotY)*math.sin(rotZ), math.cos(rotZ))*180/math.pi outX = math.asin(math.sin(rotY)*math.sin(rotZ))*180/math.pi if FOV is not None: #outX = CalcPerspectiveCorrection(outX, -Y, FOV*0.75) outY = CalcPerspectiveCorrection(outY, X, FOV) return (WrapAngle(round(outX)), WrapAngle(round(outY)), WrapAngle(round(outZ)), 0, round(-0.75*Y*math.sin(outY*math.pi/180.0), 3)) def ProcessComments(comments, f, width, height, bottomReserved, fontface, fontsize, alpha, lifetime, reduced, progress_callback): styleid = 'Danmaku2ASS_%04x' % random.randint(0, 0xffff) WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid) rows = [[None]*(height-bottomReserved+1) for i in range(4)] for idx, i in enumerate(comments): if progress_callback and idx % 1000 == 0: progress_callback(idx, len(comments)) if isinstance(i[4], int): row = 0 rowmax = height-bottomReserved-i[7] while row <= rowmax: freerows = TestFreeRows(rows, i, row, width, height, bottomReserved, lifetime) if freerows >= i[7]: MarkCommentRow(rows, i, row) WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid) break else: row += freerows or 1 else: if not reduced: row = FindAlternativeRow(rows, i, height, bottomReserved) MarkCommentRow(rows, i, row) WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid) elif i[4] == 'bilipos': WriteCommentBilibiliPositioned(f, i, width, height, styleid) elif i[4] == 'acfunpos': WriteCommentAcfunPositioned(f, i, width, height, styleid) elif i[4] == 'sH5Vpos': WriteCommentSH5VPositioned(f, i, width, height, styleid) else: logging.warning(_('Invalid comment: %r') % i[3]) if progress_callback: progress_callback(len(comments), len(comments)) def TestFreeRows(rows, c, row, width, height, bottomReserved, lifetime): res = 0 rowmax = height-bottomReserved targetRow = None if c[4] in (1, 2): while row < rowmax and res < c[7]: if targetRow != rows[c[4]][row]: targetRow = rows[c[4]][row] if targetRow and targetRow[0]+lifetime > c[0]: break row += 1 res += 1 else: try: thresholdTime = c[0]-lifetime*(1-width/(c[8]+width)) except ZeroDivisionError: thresholdTime = c[0]-lifetime while row < rowmax and res < c[7]: if targetRow != rows[c[4]][row]: targetRow = rows[c[4]][row] try: if targetRow and (targetRow[0] > thresholdTime or targetRow[0]+targetRow[8]*lifetime/(targetRow[8]+width) > c[0]): break except ZeroDivisionError: pass row += 1 res += 1 return res def FindAlternativeRow(rows, c, height, bottomReserved): res = 0 for row in range(height-bottomReserved-math.ceil(c[7])): if not rows[c[4]][row]: return row elif rows[c[4]][row][0] < rows[c[4]][res][0]: res = row return res def MarkCommentRow(rows, c, row): try: for i in range(row, row+math.ceil(c[7])): rows[c[4]][i] = c except IndexError: pass def WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid): f.write( ''' [Script Info] ; Script generated by Danmaku2ASS ; https://github.com/m13253/danmaku2ass Script Updated By: Danmaku2ASS (https://github.com/m13253/danmaku2ass) ScriptType: v4.00+ WrapStyle: 2 Collisions: Normal PlayResX: %(width)s PlayResY: %(height)s ScaledBorderAndShadow: yes [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding Style: %(styleid)s, %(fontface)s, %(fontsize)s, &H%(alpha)02XFFFFFF, &H%(alpha)02XFFFFFF, &H%(alpha)02X000000, &H%(alpha)02X000000, 0, 0, 0, 0, 100, 100, 0.00, 0.00, 1, %(outline)s, 0, 7, 0, 0, 0, 0 [Events] Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text ''' % {'width': width, 'height': height, 'fontface': fontface, 'fontsize': round(fontsize), 'alpha': 255-round(alpha*255), 'outline': round(fontsize/25), 'styleid': styleid} ) def WriteComment(f, c, row, width, height, bottomReserved, fontsize, lifetime, styleid): text = ASSEscape(c[3]) styles = [] if c[4] == 1: styles.append('\\an8\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': row}) elif c[4] == 2: styles.append('\\an2\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': ConvertType2(row, height, bottomReserved)}) elif c[4] == 3: styles.append('\\move(%(neglen)s, %(row)s, %(width)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])}) else: styles.append('\\move(%(width)s, %(row)s, %(neglen)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])}) if not (-1 < c[6]-fontsize < 1): styles.append('\\fs%s' % round(c[6])) if c[5] != 0xffffff: styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff)) if c[5] == 0x000000: styles.append('\\3c&HFFFFFF&') f.write('Dialogue: 2,%(start)s,%(end)s,%(styleid)s,,0000,0000,0000,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid}) def ASSEscape(s): return '\\N'.join((i or ' ' for i in str(s).replace('\\', '\\\\').replace('{', '\\{').replace('}', '\\}').split('\n'))) def CalculateLength(s): return max(map(len, s.split('\n'))) # May not be accurate def ConvertTimestamp(timestamp): timestamp = round(timestamp*100.0) hour, minute = divmod(timestamp, 360000) minute, second = divmod(minute, 6000) second, centsecond = divmod(second, 100) return '%d:%02d:%02d.%02d' % (int(hour), int(minute), int(second), int(centsecond)) def ConvertType2(row, height, bottomReserved): return height-bottomReserved-row def ConvertToFile(filename_or_file, *args, **kwargs): if isinstance(filename_or_file, bytes): filename_or_file = str(bytes(filename_or_file).decode('utf-8', 'replace')) if isinstance(filename_or_file, str): return open(filename_or_file, *args, **kwargs) else: return filename_or_file def FilterBadChars(f): s = f.read() s = re.sub('[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]', '\ufffd', s) return io.StringIO(s) class safe_list(list): def get(self, index, default=None): try: return self[index] except IndexError: return default def export(func): global __all__ try: __all__.append(func.__name__) except NameError: __all__ = [func.__name__] return func @export def Danmaku2ASS(input_files, output_file, stage_width, stage_height, reserve_blank=0, font_face=_('(FONT) sans-serif')[7:], font_size=25.0, text_opacity=1.0, comment_duration=5.0, is_reduce_comments=False, progress_callback=None): fo = None comments = ReadComments(input_files, font_size) try: if output_file: fo = ConvertToFile(output_file, 'w', encoding='utf-8-sig', errors='replace', newline='\r\n') else: fo = sys.stdout ProcessComments(comments, fo, stage_width, stage_height, reserve_blank, font_face, font_size, text_opacity, comment_duration, is_reduce_comments, progress_callback) finally: if output_file and fo != output_file: fo.close() @export def ReadComments(input_files, font_size=25.0, progress_callback=None): if isinstance(input_files, bytes): input_files = str(bytes(input_files).decode('utf-8', 'replace')) if isinstance(input_files, str): input_files = [input_files] else: input_files = list(input_files) comments = [] for idx, i in enumerate(input_files): if progress_callback: progress_callback(idx, len(input_files)) with ConvertToFile(i, 'r', encoding='utf-8', errors='replace') as f: CommentProcessor = GetCommentProcessor(f) if not CommentProcessor: raise ValueError(_('Unknown comment file format: %s') % i) comments.extend(CommentProcessor(FilterBadChars(f), font_size)) if progress_callback: progress_callback(len(input_files), len(input_files)) comments.sort() return comments @export def GetCommentProcessor(input_file): return CommentFormatMap[ProbeCommentFormat(input_file)] def main(): if len(sys.argv) == 1: sys.argv.append('--help') parser = argparse.ArgumentParser() parser.add_argument('-o', '--output', metavar=_('OUTPUT'), help=_('Output file')) parser.add_argument('-s', '--size', metavar=_('WIDTHxHEIGHT'), required=True, help=_('Stage size in pixels')) parser.add_argument('-fn', '--font', metavar=_('FONT'), help=_('Specify font face [default: %s]') % _('(FONT) sans-serif')[7:], default=_('(FONT) sans-serif')[7:]) parser.add_argument('-fs', '--fontsize', metavar=_('SIZE'), help=(_('Default font size [default: %s]') % 25), type=float, default=25.0) parser.add_argument('-a', '--alpha', metavar=_('ALPHA'), help=_('Text opacity'), type=float, default=1.0) parser.add_argument('-l', '--lifetime', metavar=_('SECONDS'), help=_('Duration of comment display [default: %s]') % 5, type=float, default=5.0) parser.add_argument('-p', '--protect', metavar=_('HEIGHT'), help=_('Reserve blank on the bottom of the stage'), type=int, default=0) parser.add_argument('-r', '--reduce', action='store_true', help=_('Reduce the amount of comments if stage is full')) parser.add_argument('file', metavar=_('FILE'), nargs='+', help=_('Comment file to be processed')) args = parser.parse_args() try: width, height = str(args.size).split('x', 1) width = int(width) height = int(height) except ValueError: raise ValueError(_('Invalid stage size: %r') % args.size) Danmaku2ASS(args.file, args.output, width, height, args.protect, args.font, args.fontsize, args.alpha, args.lifetime, args.reduce) if __name__ == '__main__': main() ================================================ FILE: 2020/dmzj/cartoon.py ================================================ import requests import os import re from bs4 import BeautifulSoup from contextlib import closing from tqdm import tqdm import time """ Author: Jack Cui Wechat: https://mp.weixin.qq.com/s/OCWwRVDFNslIuKyiCVUoTA """ # 创建保存目录 save_dir = '妖神记' if save_dir not in os.listdir('./'): os.mkdir(save_dir) target_url = "https://www.dmzj.com/info/yaoshenji.html" # 获取动漫章节链接和章节名 r = requests.get(url = target_url) bs = BeautifulSoup(r.text, 'lxml') list_con_li = bs.find('ul', class_="list_con_li") cartoon_list = list_con_li.find_all('a') chapter_names = [] chapter_urls = [] for cartoon in cartoon_list: href = cartoon.get('href') name = cartoon.text chapter_names.insert(0, name) chapter_urls.insert(0, href) # 下载漫画 for i, url in enumerate(tqdm(chapter_urls)): download_header = { 'Referer': url } name = chapter_names[i] # 去掉. while '.' in name: name = name.replace('.', '') chapter_save_dir = os.path.join(save_dir, name) if name not in os.listdir(save_dir): os.mkdir(chapter_save_dir) r = requests.get(url = url) html = BeautifulSoup(r.text, 'lxml') script_info = html.script pics = re.findall('\d{13,14}', str(script_info)) for j, pic in enumerate(pics): if len(pic) == 13: pics[j] = pic + '0' pics = sorted(pics, key=lambda x:int(x)) chapterpic_hou = re.findall('\|(\d{5})\|', str(script_info))[0] chapterpic_qian = re.findall('\|(\d{4})\|', str(script_info))[0] for idx, pic in enumerate(pics): if pic[-1] == '0': url = 'https://images.dmzj.com/img/chapterpic/' + chapterpic_qian + '/' + chapterpic_hou + '/' + pic[:-1] + '.jpg' else: url = 'https://images.dmzj.com/img/chapterpic/' + chapterpic_qian + '/' + chapterpic_hou + '/' + pic + '.jpg' pic_name = '%03d.jpg' % (idx + 1) pic_save_path = os.path.join(chapter_save_dir, pic_name) with closing(requests.get(url, headers = download_header, stream = True)) as response: chunk_size = 1024 content_size = int(response.headers['content-length']) if response.status_code == 200: with open(pic_save_path, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) else: print('链接异常') time.sleep(10) ================================================ FILE: 2020/taobao/taobao_login.py ================================================ from selenium import webdriver import logging import time from selenium.common.exceptions import NoSuchElementException, WebDriverException from retrying import retry from selenium.webdriver import ActionChains import pyautogui pyautogui.PAUSE = 0.5 logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) """ 微信公众号 JackCui-AI 更多精彩教程、源码尽在微信公众号 """ class taobao(): def __init__(self): self.browser = webdriver.Chrome("path\to\your\chromedriver.exe") # 最大化窗口 self.browser.maximize_window() self.browser.implicitly_wait(5) self.domain = 'http://www.taobao.com' self.action_chains = ActionChains(self.browser) def login(self, username, password): while True: self.browser.get(self.domain) time.sleep(1) #会xpath可以简化这几步 #self.browser.find_element_by_class_name('h').click() #self.browser.find_element_by_id('fm-login-id').send_keys(username) #self.browser.find_element_by_id('fm-login-password').send_keys(password) self.browser.find_element_by_xpath('//*[@id="J_SiteNavLogin"]/div[1]/div[1]/a[1]').click() self.browser.find_element_by_xpath('//*[@id="fm-login-id"]').send_keys(username) self.browser.find_element_by_xpath('//*[@id="fm-login-password"]').send_keys(password) time.sleep(1) try: # 出现验证码,滑动验证 slider = self.browser.find_element_by_xpath("//span[contains(@class, 'btn_slide')]") if slider.is_displayed(): # 拖拽滑块 self.action_chains.drag_and_drop_by_offset(slider, 258, 0).perform() time.sleep(0.5) # 释放滑块,相当于点击拖拽之后的释放鼠标 self.action_chains.release().perform() except (NoSuchElementException, WebDriverException): logger.info('未出现登录验证码') # 会xpath可以简化点击登陆按钮,但都无法登录,需要使用 pyautogui 完成点击事件 #self.browser.find_element_by_class_name('password-login').click() #self.browser.find_element_by_xpath('//*[@id="login-form"]/div[4]/button').click() # 图片地址 coords = pyautogui.locateOnScreen('1.png') x, y = pyautogui.center(coords) pyautogui.leftClick(x, y) nickname = self.get_nickname() if nickname: logger.info('登录成功,呢称为:' + nickname) break logger.debug('登录出错,5s后继续登录') time.sleep(5) def get_nickname(self): self.browser.get(self.domain) time.sleep(0.5) try: return self.browser.find_element_by_class_name('site-nav-user').text except NoSuchElementException: return '' def clear_cart(self): cart = self.browser.find_element_by_xpath('//*[@id="J_MiniCart"]') if cart.is_displayed(): cart.click() select = self.browser.find_element_by_xpath('//*[@id="J_SelectAll1"]/div/label') if select.is_displayed(): select.click() time.sleep(0.5) go = self.browser.find_element_by_xpath('//*[@id="J_Go"]') if go.is_displayed(): go.click() submit = self.browser.find_element_by_xpath('//*[@id="submitOrderPC_1"]/div/a[2]') if submit.is_displayed(): submit.click() if __name__ == '__main__': # 填入自己的用户名,密码 username = 'username' password = 'password' tb = taobao() tb.login(username, password) #tb.clear_cart() ================================================ FILE: 2020/xbqg/xbqg_spider.py ================================================ import requests import time from tqdm import tqdm from bs4 import BeautifulSoup """ Author: Jack Cui Wechat: https://mp.weixin.qq.com/s/OCWwRVDFNslIuKyiCVUoTA """ def get_content(target): req = requests.get(url = target) req.encoding = 'utf-8' html = req.text bf = BeautifulSoup(html, 'lxml') texts = bf.find('div', id='content') content = texts.text.strip().split('\xa0'*4) return content if __name__ == '__main__': server = 'https://www.xsbiquge.com' book_name = '诡秘之主.txt' target = 'https://www.xsbiquge.com/15_15338/' req = requests.get(url = target) req.encoding = 'utf-8' html = req.text chapter_bs = BeautifulSoup(html, 'lxml') chapters = chapter_bs.find('div', id='list') chapters = chapters.find_all('a') for chapter in tqdm(chapters): chapter_name = chapter.string url = server + chapter.get('href') content = get_content(url) with open(book_name, 'a', encoding='utf-8') as f: f.write(chapter_name) f.write('\n') f.write('\n'.join(content)) f.write('\n') ================================================ FILE: 2020/zycjw/video_download.py ================================================ import os import ffmpy3 import requests from bs4 import BeautifulSoup from multiprocessing.dummy import Pool as ThreadPool search_keyword = '越狱第一季' search_url = 'http://www.jisudhw.com/index.php' serach_params = { 'm': 'vod-search' } serach_headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36', 'Referer': 'http://www.jisudhw.com/', 'Origin': 'http://www.jisudhw.com', 'Host': 'www.jisudhw.com' } serach_datas = { 'wd': search_keyword, 'submit': 'search' } video_dir = '' r = requests.post(url=search_url, params=serach_params, headers=serach_headers, data=serach_datas) r.encoding = 'utf-8' server = 'http://www.jisudhw.com' search_html = BeautifulSoup(r.text, 'lxml') search_spans = search_html.find_all('span', class_='xing_vb4') for span in search_spans: url = server + span.a.get('href') name = span.a.string print(name) print(url) video_dir = name if name not in os.listdir('./'): os.mkdir(name) detail_url = url r = requests.get(url = detail_url) r.encoding = 'utf-8' detail_bf = BeautifulSoup(r.text, 'lxml') num = 1 serach_res = {} for each_url in detail_bf.find_all('input'): if 'm3u8' in each_url.get('value'): url = each_url.get('value') if url not in serach_res.keys(): serach_res[url] = num print('第%03d集:' % num) print(url) num += 1 def downVideo(url): num = serach_res[url] name = os.path.join(video_dir, '第%03d集.mp4' % num) ffmpy3.FFmpeg(inputs={url: None}, outputs={name:None}).run() # 开8个线程池 pool = ThreadPool(8) results = pool.map(downVideo, serach_res.keys()) pool.close() pool.join() ================================================ FILE: Netease/Netease.py ================================================ # -*- coding:utf-8 -*- import requests, hashlib, sys, click, re, base64, binascii, json, os from Crypto.Cipher import AES from http import cookiejar """ Website:http://cuijiahua.com Author:Jack Cui Refer:https://github.com/darknessomi/musicbox """ class Encrypyed(): """ 解密算法 """ def __init__(self): self.modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7' self.nonce = '0CoJUm6Qyw8W8jud' self.pub_key = '010001' # 登录加密算法, 基于https://github.com/stkevintan/nw_musicbox脚本实现 def encrypted_request(self, text): text = json.dumps(text) sec_key = self.create_secret_key(16) enc_text = self.aes_encrypt(self.aes_encrypt(text, self.nonce), sec_key.decode('utf-8')) enc_sec_key = self.rsa_encrpt(sec_key, self.pub_key, self.modulus) data = {'params': enc_text, 'encSecKey': enc_sec_key} return data def aes_encrypt(self, text, secKey): pad = 16 - len(text) % 16 text = text + chr(pad) * pad encryptor = AES.new(secKey.encode('utf-8'), AES.MODE_CBC, b'0102030405060708') ciphertext = encryptor.encrypt(text.encode('utf-8')) ciphertext = base64.b64encode(ciphertext).decode('utf-8') return ciphertext def rsa_encrpt(self, text, pubKey, modulus): text = text[::-1] rs = pow(int(binascii.hexlify(text), 16), int(pubKey, 16), int(modulus, 16)) return format(rs, 'x').zfill(256) def create_secret_key(self, size): return binascii.hexlify(os.urandom(size))[:16] class Song(): """ 歌曲对象,用于存储歌曲的信息 """ def __init__(self, song_id, song_name, song_num, song_url=None): self.song_id = song_id self.song_name = song_name self.song_num = song_num self.song_url = '' if song_url is None else song_url class Crawler(): """ 网易云爬取API """ def __init__(self, timeout=60, cookie_path='.'): self.headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'music.163.com', 'Referer': 'http://music.163.com/search/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } self.session = requests.Session() self.session.headers.update(self.headers) self.session.cookies = cookiejar.LWPCookieJar(cookie_path) self.download_session = requests.Session() self.timeout = timeout self.ep = Encrypyed() def post_request(self, url, params): """ Post请求 :return: 字典 """ data = self.ep.encrypted_request(params) resp = self.session.post(url, data=data, timeout=self.timeout) result = resp.json() if result['code'] != 200: click.echo('post_request error') else: return result def search(self, search_content, search_type, limit=9): """ 搜索API :params search_content: 搜索内容 :params search_type: 搜索类型 :params limit: 返回结果数量 :return: 字典. """ url = 'http://music.163.com/weapi/cloudsearch/get/web?csrf_token=' params = {'s': search_content, 'type': search_type, 'offset': 0, 'sub': 'false', 'limit': limit} result = self.post_request(url, params) return result def search_song(self, song_name, song_num, quiet=True, limit=9): """ 根据音乐名搜索 :params song_name: 音乐名 :params song_num: 下载的歌曲数 :params quiet: 自动选择匹配最优结果 :params limit: 返回结果数量 :return: Song独享 """ result = self.search(song_name, search_type=1, limit=limit) if result['result']['songCount'] <= 0: click.echo('Song {} not existed.'.format(song_name)) else: songs = result['result']['songs'] if quiet: song_id, song_name = songs[0]['id'], songs[0]['name'] song = Song(song_id=song_id, song_name=song_name, song_num=song_num) return song def get_song_url(self, song_id, bit_rate=320000): """ 获得歌曲的下载地址 :params song_id: 音乐ID. :params bit_rate: {'MD 128k': 128000, 'HD 320k': 320000} :return: 歌曲下载地址 """ url = 'http://music.163.com/weapi/song/enhance/player/url?csrf_token=' csrf = '' params = {'ids': [song_id], 'br': bit_rate, 'csrf_token': csrf} result = self.post_request(url, params) # 歌曲下载地址 song_url = result['data'][0]['url'] # 歌曲不存在 if song_url is None: click.echo('Song {} is not available due to copyright issue.'.format(song_id)) else: return song_url def get_song_by_url(self, song_url, song_name, song_num, folder): """ 下载歌曲到本地 :params song_url: 歌曲下载地址 :params song_name: 歌曲名字 :params song_num: 下载的歌曲数 :params folder: 保存路径 """ if not os.path.exists(folder): os.makedirs(folder) fpath = os.path.join(folder, str(song_num) + '_' + song_name + '.mp3') if sys.platform == 'win32' or sys.platform == 'cygwin': valid_name = re.sub(r'[<>:"/\\|?*]', '', song_name) if valid_name != song_name: click.echo('{} will be saved as: {}.mp3'.format(song_name, valid_name)) fpath = os.path.join(folder, str(song_num) + '_' + valid_name + '.mp3') if not os.path.exists(fpath): resp = self.download_session.get(song_url, timeout=self.timeout, stream=True) length = int(resp.headers.get('content-length')) label = 'Downloading {} {}kb'.format(song_name, int(length/1024)) with click.progressbar(length=length, label=label) as progressbar: with open(fpath, 'wb') as song_file: for chunk in resp.iter_content(chunk_size=1024): if chunk: song_file.write(chunk) progressbar.update(1024) class Netease(): """ 网易云音乐下载 """ def __init__(self, timeout, folder, quiet, cookie_path): self.crawler = Crawler(timeout, cookie_path) self.folder = '.' if folder is None else folder self.quiet = quiet def download_song_by_search(self, song_name, song_num): """ 根据歌曲名进行搜索 :params song_name: 歌曲名字 :params song_num: 下载的歌曲数 """ try: song = self.crawler.search_song(song_name, song_num, self.quiet) except: click.echo('download_song_by_serach error') # 如果找到了音乐, 则下载 if song != None: self.download_song_by_id(song.song_id, song.song_name, song.song_num, self.folder) def download_song_by_id(self, song_id, song_name, song_num, folder='.'): """ 通过歌曲的ID下载 :params song_id: 歌曲ID :params song_name: 歌曲名 :params song_num: 下载的歌曲数 :params folder: 保存地址 """ try: url = self.crawler.get_song_url(song_id) # 去掉非法字符 song_name = song_name.replace('/', '') song_name = song_name.replace('.', '') self.crawler.get_song_by_url(url, song_name, song_num, folder) except: click.echo('download_song_by_id error') if __name__ == '__main__': timeout = 60 output = 'Musics' quiet = True cookie_path = 'Cookie' netease = Netease(timeout, output, quiet, cookie_path) music_list_name = 'music_list.txt' # 如果music列表存在, 那么开始下载 if os.path.exists(music_list_name): with open(music_list_name, 'r') as f: music_list = list(map(lambda x: x.strip(), f.readlines())) for song_num, song_name in enumerate(music_list): netease.download_song_by_search(song_name,song_num + 1) else: click.echo('music_list.txt not exist.') ================================================ FILE: Netease/music_list.txt ================================================ 風見鶏 外婆的话【不才】 We Don't Talk Anymore 【电吉他】《青鸟》 小棋童 千本桜(古筝版) 妄为 借我 你到底有没有爱过我 七月上 ================================================ FILE: README.md ================================================ # 注:2020年最新连载教程请移步:[Python Spider 2020](https://github.com/Jack-Cherish/python-spider/tree/master/2020 "Python Spider 2020") 免责声明: 大家请以学习为目的使用本仓库,爬虫违法违规的案件:https://github.com/HiddenStrawberry/Crawler_Illegal_Cases_In_China 本仓库的所有内容仅供学习和参考之用,禁止用于商业用途。任何人或组织不得将本仓库的内容用于非法用途或侵犯他人合法权益。本仓库所涉及的爬虫技术仅用于学习和研究,不得用于对其他平台进行大规模爬虫或其他非法行为。对于因使用本仓库内容而引起的任何法律责任,本仓库不承担任何责任。使用本仓库的内容即表示您同意本免责声明的所有条款和条件。 # Python Spider 原创文章每周最少两篇,**后续最新文章**会在[【公众号】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)首发,视频[【B站】](https://space.bilibili.com/331507846)首发,大家可以加我[【微信】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)进**交流群**,技术交流或提意见都可以,欢迎**Star**!

微信群 公众号 B站 知乎 CSDN 头条 掘金

## 声明 * 代码、教程**仅限于学习交流,请勿用于任何商业用途!** ## 目录 * [爬虫小工具](#爬虫小工具) * [文件下载小助手](https://github.com/Jack-Cherish/python-spider/blob/master/downloader.py "悬停显示") * [爬虫实战](#爬虫实战) * [笔趣看小说下载](https://github.com/Jack-Cherish/python-spider/blob/master/biqukan.py "悬停显示") * [百度文库免费文章下载助手_rev1](https://github.com/Jack-Cherish/python-spider/blob/master/baiduwenku.py "悬停显示") * [百度文库免费文章下载助手_rev2](https://github.com/Jack-Cherish/python-spider/blob/master/baiduwenku_pro_1.py "悬停显示") * [《帅啊》网帅哥图片下载](https://github.com/Jack-Cherish/python-spider/blob/master/shuaia.py "悬停显示") * [构建代理IP池](https://github.com/Jack-Cherish/python-spider/blob/master/daili.py "悬停显示") * [《火影忍者》漫画下载](https://github.com/Jack-Cherish/python-spider/tree/master/cartoon "悬停显示") * [财务报表下载小助手](https://github.com/Jack-Cherish/python-spider/blob/master/financical.py "悬停显示") * [一小时入门网络爬虫](https://github.com/Jack-Cherish/python-spider/tree/master/one_hour_spider "悬停显示") * [抖音App视频下载](https://github.com/Jack-Cherish/python-spider/tree/master/douyin "悬停显示") * [GEETEST验证码识别](https://github.com/Jack-Cherish/python-spider/blob/master/geetest.py "悬停显示") * [12306抢票小助手](https://github.com/Jack-Cherish/python-spider/blob/master/12306.py "悬停显示") * [百万英雄答题辅助系统](https://github.com/Jack-Cherish/python-spider/tree/master/baiwan "悬停显示") * [网易云音乐免费音乐批量下载](https://github.com/Jack-Cherish/python-spider/tree/master/Netease "悬停显示") * [B站免费视频和弹幕批量下载](https://github.com/Jack-Cherish/python-spider/tree/master/bilibili "悬停显示") * [京东商品晒单图下载](https://github.com/Jack-Cherish/python-spider/tree/master/dingdong "悬停显示") * [正方教务管理系统个人信息查询](https://github.com/Jack-Cherish/python-spider/tree/master/zhengfang_system_spider "悬停显示") * [其它](#其它) ## 爬虫小工具 * downloader.py:文件下载小助手 一个可以用于下载图片、视频、文件的小工具,有下载进度显示功能。稍加修改即可添加到自己的爬虫中。 动态示意图: ![image](https://raw.githubusercontent.com/Jack-Cherish/Pictures/master/9.gif) ## 爬虫实战 * biqukan.py:《笔趣看》盗版小说网站,爬取小说工具 第三方依赖库安装: pip3 install beautifulsoup4 使用方法: python biqukan.py * baiduwenku.py: 百度文库word文章爬取 原理说明:http://blog.csdn.net/c406495762/article/details/72331737 代码不完善,没有进行打包,不具通用性,纯属娱乐。 * shuaia.py: 爬取《帅啊》网,帅哥图片 《帅啊》网URL:http://www.shuaia.net/index.html 原理说明:http://blog.csdn.net/c406495762/article/details/72597755 第三方依赖库安装: pip3 install requests beautifulsoup4 * daili.py: 构建代理IP池 原理说明:http://blog.csdn.net/c406495762/article/details/72793480 * carton: 使用Scrapy爬取《火影忍者》漫画 代码可以爬取整个《火影忍者》漫画所有章节的内容,保存到本地。更改地址,可以爬取其他漫画。保存地址可以在settings.py中修改。 动漫网站:http://comic.kukudm.com/ 原理说明:http://blog.csdn.net/c406495762/article/details/72858983 * hero.py: 《王者荣耀》推荐出装查询小助手 网页爬取已经会了,想过爬取手机APP里的内容吗? 原理说明:http://blog.csdn.net/c406495762/article/details/76850843 * financical.py: 财务报表下载小助手 爬取的数据存入数据库会吗?《跟股神巴菲特学习炒股之财务报表入库(MySQL)》也许能给你一些思路。 原理说明:http://blog.csdn.net/c406495762/article/details/77801899 动态示意图: ![image](https://raw.githubusercontent.com/Jack-Cherish/Pictures/master/10.gif) * one_hour_spider:一小时入门Python3网络爬虫。 原理说明: * 知乎:https://zhuanlan.zhihu.com/p/29809609 * CSDN:http://blog.csdn.net/c406495762/article/details/78123502 本次实战内容有: * 网络小说下载(静态网站)-biqukan * 优美壁纸下载(动态网站)-unsplash * 视频下载 * douyin.py:抖音App视频下载 抖音App的视频下载,就是普通的App爬取。 原理说明: * 个人网站:http://cuijiahua.com/blog/2018/03/spider-5.html * douyin_pro:抖音App视频下载(升级版) 抖音App的视频下载,添加视频解析网站,支持无水印视频下载,使用第三方平台解析。 原理说明: * 个人网站:http://cuijiahua.com/blog/2018/03/spider-5.html * douyin:抖音App视频下载(升级版2) 抖音App的视频下载,添加视频解析网站,支持无水印视频下载,通过url解析,无需第三方平台。 原理说明: * 个人网站:http://cuijiahua.com/blog/2018/03/spider-5.html 动态示意图: ![image](https://github.com/Jack-Cherish/Pictures/blob/master/14.gif) * geetest.py:GEETEST验证码识别 原理说明: 无 * 12306.py:用Python抢火车票简单代码 可以自己慢慢丰富,蛮简单,有爬虫基础很好操作,没有原理说明。 * baiwan:百万英雄辅助答题 效果图: ![image](https://github.com/Jack-Cherish/Pictures/blob/master/11.gif) 原理说明: * 个人网站:http://cuijiahua.com/blog/2018/01/spider_3.html 功能介绍: 服务器端,使用Python(baiwan.py)通过抓包获得的接口获取答题数据,解析之后通过百度知道搜索接口匹配答案,将最终匹配的结果写入文件(file.txt)。 手机抓包不会的朋友,可以看下我的早期[手机APP抓包教程](http://blog.csdn.net/c406495762/article/details/76850843 "悬停显示")。 Node.js(app.js)每隔1s读取一次file.txt文件,并将读取结果通过socket.io推送给客户端(index.html)。 亲测答题延时在3s左右。 声明:没做过后端和前端,花了一天时间,现学现卖弄好的,javascript也是现看现用,百度的程序,调试调试而已。可能有很多用法比较low的地方,用法不对,请勿见怪,有大牛感兴趣,可以自行完善。 * Netease:根据歌单下载网易云音乐 效果图: ![image](https://github.com/Jack-Cherish/Pictures/blob/master/13.gif) 原理说明: 暂无 功能介绍: 根据music_list.txt文件里的歌单的信息下载网易云音乐,将自己喜欢的音乐进行批量下载。 * bilibili:B站视频和弹幕批量下载 原理说明: 暂无 使用说明: python bilibili.py -d 猫 -k 猫 -p 10 三个参数: -d 保存视频的文件夹名 -k B站搜索的关键字 -p 下载搜索结果前多少页 * jingdong:京东商品晒单图下载 效果图: ![image](https://github.com/Jack-Cherish/Pictures/blob/master/jd.gif) 原理说明: 暂无 使用说明: python jd.py -k 芒果 三个参数: -d 保存图片的路径,默认为fd.py文件所在文件夹 -k 搜索关键词 -n 下载商品的晒单图个数,即n个商店的晒单图 * zhengfang_system_spider:对正方教务管理系统个人课表,个人学生成绩,绩点等简单爬取 效果图: ![image](/zhengfang_system_spider/screenshot/zf.png) 原理说明: 暂无 使用说明: cd zhengfang_system_spider pip install -r requirements.txt python spider.py ## 其它 * 欢迎 Pull requests,感谢贡献。 更多精彩,敬请期待! wechat ================================================ FILE: baiduwenku.py ================================================ # -*- coding:UTF-8 -*- from selenium import webdriver from bs4 import BeautifulSoup import re import time if __name__ == '__main__': options = webdriver.ChromeOptions() options.add_argument('user-agent="Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19"') driver = webdriver.Chrome('J:\迅雷下载\chromedriver.exe', chrome_options=options) driver.get('https://wenku.baidu.com/view/aa31a84bcf84b9d528ea7a2c.html') html = driver.page_source bf1 = BeautifulSoup(html, 'lxml') result = bf1.find_all(class_='rtcspage') bf2 = BeautifulSoup(str(result[0]), 'lxml') title = bf2.div.div.h1.string pagenum = bf2.find_all(class_='size') pagenum = BeautifulSoup(str(pagenum), 'lxml').span.string pagepattern = re.compile('页数:(\d+)页') num = int(pagepattern.findall(pagenum)[0]) print('文章标题:%s' % title) print('文章页数:%d' % num) while True: num = num / 5.0 html = driver.page_source bf1 = BeautifulSoup(html, 'lxml') result = bf1.find_all(class_='rtcspage') for each_result in result: bf2 = BeautifulSoup(str(each_result), 'lxml') texts = bf2.find_all('p') for each_text in texts: main_body = BeautifulSoup(str(each_text), 'lxml') for each in main_body.find_all(True): if each.name == 'span': print(each.string.replace('\xa0',''),end='') elif each.name == 'br': print('') print('\n') if num > 1: page = driver.find_elements_by_xpath("//div[@class='page']") driver.execute_script('arguments[0].scrollIntoView();', page[-1]) #拖动到可见的元素去 nextpage = driver.find_element_by_xpath("//a[@data-fun='next']") nextpage.click() time.sleep(3) else: break ================================================ FILE: baiduwenku_pro_1.py ================================================ import requests import re import json import os session = requests.session() def fetch_url(url): return session.get(url).content.decode('gbk') def get_doc_id(url): return re.findall('view/(.*).html', url)[0] def parse_type(content): return re.findall(r"docType.*?\:.*?\'(.*?)\'\,", content)[0] def parse_title(content): return re.findall(r"title.*?\:.*?\'(.*?)\'\,", content)[0] def parse_doc(content): result = '' url_list = re.findall('(https.*?0.json.*?)\\\\x22}', content) url_list = [addr.replace("\\\\\\/", "/") for addr in url_list] for url in url_list[:-5]: content = fetch_url(url) y = 0 txtlists = re.findall('"c":"(.*?)".*?"y":(.*?),', content) for item in txtlists: if not y == item[1]: y = item[1] n = '\n' else: n = '' result += n result += item[0].encode('utf-8').decode('unicode_escape', 'ignore') return result def parse_txt(doc_id): content_url = 'https://wenku.baidu.com/api/doc/getdocinfo?callback=cb&doc_id=' + doc_id content = fetch_url(content_url) md5 = re.findall('"md5sum":"(.*?)"', content)[0] pn = re.findall('"totalPageNum":"(.*?)"', content)[0] rsign = re.findall('"rsign":"(.*?)"', content)[0] content_url = 'https://wkretype.bdimg.com/retype/text/' + doc_id + '?rn=' + pn + '&type=txt' + md5 + '&rsign=' + rsign content = json.loads(fetch_url(content_url)) result = '' for item in content: for i in item['parags']: result += i['c'].replace('\\r', '\r').replace('\\n', '\n') return result def parse_other(doc_id): content_url = "https://wenku.baidu.com/browse/getbcsurl?doc_id=" + doc_id + "&pn=1&rn=99999&type=ppt" content = fetch_url(content_url) url_list = re.findall('{"zoom":"(.*?)","page"', content) url_list = [item.replace("\\", '') for item in url_list] if not os.path.exists(doc_id): os.mkdir(doc_id) for index, url in enumerate(url_list): content = session.get(url).content path = os.path.join(doc_id, str(index) + '.jpg') with open(path, 'wb') as f: f.write(content) print("图片保存在" + doc_id + "文件夹") def save_file(filename, content): with open(filename, 'w', encoding='utf8') as f: f.write(content) print('已保存为:' + filename) # test_txt_url = 'https://wenku.baidu.com/view/cbb4af8b783e0912a3162a89.html?from=search' # test_ppt_url = 'https://wenku.baidu.com/view/2b7046e3f78a6529657d5376.html?from=search' # test_pdf_url = 'https://wenku.baidu.com/view/dd6e15c1227916888586d795.html?from=search' # test_xls_url = 'https://wenku.baidu.com/view/eb4a5bb7312b3169a551a481.html?from=search' def main(): url = input('请输入要下载的文库URL地址') content = fetch_url(url) doc_id = get_doc_id(url) type = parse_type(content) title = parse_title(content) if type == 'doc': result = parse_doc(content) save_file(title + '.txt', result) elif type == 'txt': result = parse_txt(doc_id) save_file(title + '.txt', result) else: parse_other(doc_id) if __name__ == "__main__": main() ================================================ FILE: baiwan/app.js ================================================ var http = require('http'); var fs = require('fs'); var schedule = require("node-schedule"); var message = {}; var count = 0; var server = http.createServer(function (req,res){ fs.readFile('./index.html',function(error,data){ res.writeHead(200,{'Content-Type':'text/html'}); res.end(data,'utf-8'); }); }).listen(80); console.log('Server running!'); var lineReader = require('line-reader'); function messageGet(){ lineReader.eachLine('file.txt', function(line, last) { count++; var name = 'line' + count; console.log(name); console.log(line); message[name] = line; }); if(count == 25){ count = 0; } else{ for(var i = count+1; i <= 25; i++){ var name = 'line' + i; message[name] = 'f'; } count = 0; } } var io = require('socket.io').listen(server); var rule = new schedule.RecurrenceRule(); var times = []; for(var i=1; i<1800; i++){ times.push(i); } rule.second = times; schedule.scheduleJob(rule, function(){ messageGet(); }); io.sockets.on('connection',function(socket){ // console.log('User connected' + count + 'user(s) present'); socket.emit('users',message); socket.broadcast.emit('users',message); socket.on('disconnect',function(){ console.log('User disconnected'); //socket.broadcast.emit('users',message); }); }); ================================================ FILE: baiwan/baiwan.py ================================================ # -*-coding:utf-8 -*- import requests from lxml import etree from bs4 import BeautifulSoup import urllib import time, re, types, os """ 代码写的匆忙,本来想再重构下,完善好注释再发,但是比较忙,想想算了,所以自行完善吧!写法很不规范,勿见怪。 作者: Jack Cui Website:http://cuijiahua.com 注: 本软件仅用于学习交流,请勿用于任何商业用途! """ class BaiWan(): def __init__(self): # 百度知道搜索接口 self.baidu = 'http://zhidao.baidu.com/search?' # 百万英雄及接口,每个人的接口都不一样,里面包含的手机信息,因此不公布,请自行抓包,有疑问欢迎留言:http://cuijiahua.com/liuyan.html self.api = 'https://api-spe-ttl.ixigua.com/xxxxxxx={}'.format(int(time.time()*1000)) # 获取答案并解析问题 def get_question(self): to = True while to: list_dir = os.listdir('./') if 'question.txt' not in list_dir: fw = open('question.txt', 'w') fw.write('百万英雄尚未出题请稍后!') fw.close() go = True while go: req = requests.get(self.api, verify=False) req.encoding = 'utf-8' html = req.text print(html) if '*' in html: question_start = html.index('*') try: question_end = html.index('?') except: question_end = html.index('?') question = html[question_start:question_end][2:] if question != None: fr = open('question.txt', 'r') text = fr.readline() fr.close() if text != question: print(question) go = False with open('question.txt', 'w') as f: f.write(question) else: time.sleep(1) else: to = False else: to = False temp = re.findall(r'[\u4e00-\u9fa5a-zA-Z0-9\+\-\*/]', html[question_end+1:]) b_index = [] print(temp) for index, each in enumerate(temp): if each == 'B': b_index.append(index) elif each == 'P' and (len(temp) - index) <= 3 : b_index.append(index) break if len(b_index) == 4: a = ''.join(temp[b_index[0] + 1:b_index[1]]) b = ''.join(temp[b_index[1] + 1:b_index[2]]) c = ''.join(temp[b_index[2] + 1:b_index[3]]) alternative_answers = [a,b,c] if '下列' in question: question = a + ' ' + b + ' ' + c + ' ' + question.replace('下列', '') elif '以下' in question: question = a + ' ' + b + ' ' + c + ' ' + question.replace('以下', '') else: alternative_answers = [] # 根据问题和备选答案搜索答案 self.search(question, alternative_answers) time.sleep(1) def search(self, question, alternative_answers): print(question) print(alternative_answers) infos = {"word":question} # 调用百度接口 url = self.baidu + 'lm=0&rn=10&pn=0&fr=search&ie=gbk&' + urllib.parse.urlencode(infos, encoding='GB2312') print(url) headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36', } sess = requests.Session() req = sess.get(url = url, headers=headers, verify=False) req.encoding = 'gbk' # print(req.text) bf = BeautifulSoup(req.text, 'lxml') answers = bf.find_all('dd',class_='dd answer') for answer in answers: print(answer.text) # 推荐答案 recommend = '' if alternative_answers != []: best = [] print('\n') for answer in answers: # print(answer.text) for each_answer in alternative_answers: if each_answer in answer.text: best.append(each_answer) print(each_answer,end=' ') # print(answer.text) print('\n') break statistics = {} for each in best: if each not in statistics.keys(): statistics[each] = 1 else: statistics[each] += 1 errors = ['没有', '不是', '不对', '不正确','错误','不包括','不包含','不在','错'] error_list = list(map(lambda x: x in question, errors)) print(error_list) if sum(error_list) >= 1: for each_answer in alternative_answers: if each_answer not in statistics.items(): recommend = each_answer print('推荐答案:', recommend) break elif statistics != {}: recommend = sorted(statistics.items(), key=lambda e:e[1], reverse=True)[0][0] print('推荐答案:', recommend) # 写入文件 with open('file.txt', 'w') as f: f.write('问题:' + question) f.write('\n') f.write('*' * 50) f.write('\n') if alternative_answers != []: f.write('选项:') for i in range(len(alternative_answers)): f.write(alternative_answers[i]) f.write(' ') f.write('\n') f.write('*' * 50) f.write('\n') f.write('参考答案:\n') for answer in answers: f.write(answer.text) f.write('\n') f.write('*' * 50) f.write('\n') if recommend != '': f.write('最终答案请自行斟酌!\t') f.write('推荐答案:' + sorted(statistics.items(), key=lambda e:e[1], reverse=True)[0][0]) if __name__ == '__main__': bw = BaiWan() bw.get_question() ================================================ FILE: baiwan/file.txt ================================================ ⣺Ǽ¼ ************************************************** ѡ723 81 101 ************************************************** ο𰸣 Ƽ 81 ÿİһйžգҲСһڡ August 1, anniversary of the founding of the Chinese People's Liberation Army֪Ⱦ뵽http://baike.baidu.com/view/23211.htm [ϸ] ãйžĽÿİһգưһڣİһպ 𣺽81գ71ա ÿ81йžգ׳ơһڡ192781գй챱ˣܶ Ҷͦ е쵼£ڽϲװ壬췴Թ񵳷... 730 𣺰һǽڣǰһ첻731ô 192781һϲ,йװ񵳷ɵĵһǹ,־йй쵼װʱ,־й͵ӵĵÿİһйž Դйʱй쵼ϲ塣192781յϲ壬йװ񵳷ɵĵһǹ־йй쵼װʱڣ־й͵ӵĵ 19337£... Ԫ1181101 𣺰һŽ 201581 ũ ʮ 201681 ũ إ ÿİһйžգҲСһڡ1933711գлά͹ʱίԱ630յĽ飬81... ************************************************** մã Ƽ𰸣81 ================================================ FILE: baiwan/index.html ================================================ Jack Cui答题辅助系统

百万英雄答题辅助系统

================================================ FILE: baiwan/question.txt ================================================ Ǽ¼ ================================================ FILE: bilibili/README.md ================================================ ## 功能 下载B站视频和弹幕,将xml原生弹幕转换为ass弹幕文件,支持plotplayer等播放器的弹幕播放。 ## 作者 * Website: [http://cuijiahua.com](http://cuijiahua.com "悬停显示") * Author: Jack Cui * Date: 2018.6.12 ## 更新 * 2018.09.12:添加FFmpeg分段视频合并 ## 使用说明 FFmpeg下载,并配置环境变量。http://ffmpeg.org/ python bilibili.py -d 猫 -k 猫 -p 10 三个参数: -d 保存视频的文件夹名 -k B站搜索的关键字 -p 下载搜索结果前多少页 ================================================ FILE: bilibili/bilibili.py ================================================ # -*-coding:utf-8 -*- # Website: http://cuijiahua.com # Author: Jack Cui # Date: 2018.6.9 import requests, json, re, sys, os, urllib, argparse, time from urllib.request import urlretrieve from contextlib import closing from urllib import parse import xml2ass class BiliBili: def __init__(self, dirname, keyword): self.dn_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Referer': 'https://search.bilibili.com/all?keyword=%s' % parse.quote(keyword)} self.search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': 'application/json, text/plain, */*'} self.video_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'} self.danmu_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9'} self.sess = requests.Session() self.dir = dirname def video_downloader(self, video_url, video_name): """ 视频下载 Parameters: video_url: 带水印的视频地址 video_name: 视频名 Returns: 无 """ size = 0 with closing(self.sess.get(video_url, headers=self.dn_headers, stream=True, verify=False)) as response: chunk_size = 1024 content_size = int(response.headers['content-length']) if response.status_code == 200: sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) video_name = os.path.join(self.dir, video_name) with open(video_name, 'wb') as file: for data in response.iter_content(chunk_size = chunk_size): file.write(data) size += len(data) file.flush() sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r') # sys.stdout.flush() if size / content_size == 1: print('\n') else: print('链接异常') def search_video(self, search_url): """ 搜索接口 Parameters: search_url: 带水印的视频地址 Returns: titles:视频名列表 arcurls: 视频播放地址列表 """ req = self.sess.get(url=search_url, headers=self.search_headers, verify=False) html = json.loads(req.text) videos = html["data"]['result'] titles = [] arcurls = [] for video in videos: titles.append(video['title'].replace('','').replace('','')) arcurls.append(video['arcurl']) return titles, arcurls def get_download_url(self, arcurl): """ 获取视频下载地址 Parameters: arcurl: 视频播放地址 oid:弹幕地址参数 Returns: download_url:视频下载地址 """ req = self.sess.get(url=arcurl, headers=self.video_headers, verify=False) pattern = '.__playinfo__=(.*)") tac = _tac_re.search(share_user.text).group(1) _dytk_re = re.compile(r"dytk\s*:\s*'(.+)'") dytk = _dytk_re.search(share_user.text).group(1) _nickname_re = re.compile(r'

(.+?)<\/p>') nickname = _nickname_re.search(share_user.text).group(1) data = { 'tac': tac.split('|')[0], 'user_id': user_id, } req = requests.post(sign_api, data=data) while req.status_code != 200: req = requests.post(sign_api, data=data) sign = req.json().get('signature') user_url_prefix = 'https://www.iesdouyin.com/web/api/v2/aweme/like' if type_flag == 'f' else 'https://www.iesdouyin.com/web/api/v2/aweme/post' print('解析视频链接中') while has_more != 0: user_url = user_url_prefix + '/?user_id=%s&sec_uid=&count=21&max_cursor=%s&aid=1128&_signature=%s&dytk=%s' % (user_id, max_cursor, sign, dytk) req = requests.get(user_url, headers=self.headers) while req.status_code != 200: req = requests.get(user_url, headers=self.headers) html = json.loads(req.text) for each in html['aweme_list']: try: url = 'https://aweme.snssdk.com/aweme/v1/play/?video_id=%s&line=0&ratio=720p&media_type=4&vr_type=0&improve_bitrate=0&is_play_url=1&is_support_h265=0&source=PackSourceEnum_PUBLISH' vid = each['video']['vid'] video_url = url % vid except: continue share_desc = each['desc'] if os.name == 'nt': for c in r'\/:*?"<>|': nickname = nickname.replace(c, '').strip().strip('\.') share_desc = share_desc.replace(c, '').strip() share_id = each['aweme_id'] if share_desc in ['抖音-原创音乐短视频社区', 'TikTok', '']: video_names.append(share_id + '.mp4') else: video_names.append(share_id + '-' + share_desc + '.mp4') share_url = 'https://www.iesdouyin.com/share/video/%s' % share_id share_urls.append(share_url) video_urls.append(video_url) max_cursor = html['max_cursor'] has_more = html['has_more'] return video_names, video_urls, share_urls, nickname def get_download_url(self, video_url, watermark_flag): """ 获得带水印的视频播放地址 Parameters: video_url:带水印的视频播放地址 Returns: download_url: 带水印的视频下载地址 """ # 带水印视频 if watermark_flag == True: download_url = video_url.replace('/play/', '/playwm/') # 无水印视频 else: download_url = video_url.replace('/playwm/', '/play/') return download_url def video_downloader(self, video_url, video_name, watermark_flag=False): """ 视频下载 Parameters: video_url: 带水印的视频地址 video_name: 视频名 watermark_flag: 是否下载带水印的视频 Returns: 无 """ size = 0 video_url = self.get_download_url(video_url, watermark_flag=watermark_flag) with closing(requests.get(video_url, headers=self.headers1, stream=True)) as response: chunk_size = 1024 content_size = int(response.headers['content-length']) if response.status_code == 200: sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) with open(video_name, 'wb') as file: for data in response.iter_content(chunk_size = chunk_size): file.write(data) size += len(data) file.flush() sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r') sys.stdout.flush() def run(self): """ 运行函数 Parameters: None Returns: None """ self.hello() print('UID取得方式:\n分享用户页面,用浏览器打开短链接,原始链接中/share/user/后的数字即是UID') user_id = input('请输入UID (例如60388937600):') user_id = user_id if user_id else '60388937600' watermark_flag = input('是否下载带水印的视频 (0-否(默认), 1-是):') watermark_flag = watermark_flag if watermark_flag!='' else '0' watermark_flag = bool(int(watermark_flag)) type_flag = input('f-收藏的(默认), p-上传的:') type_flag = type_flag if type_flag!='' else 'f' save_dir = input('保存路径 (例如"E:/Download/", 默认"./Download/"):') save_dir = save_dir if save_dir else "./Download/" video_names, video_urls, share_urls, nickname = self.get_video_urls(user_id, type_flag) nickname_dir = os.path.join(save_dir, nickname) if not os.path.exists(save_dir): os.makedirs(save_dir) if nickname not in os.listdir(save_dir): os.mkdir(nickname_dir) if type_flag == 'f': if 'favorite' not in os.listdir(nickname_dir): os.mkdir(os.path.join(nickname_dir, 'favorite')) print('视频下载中:共有%d个作品!\n' % len(video_urls)) for num in range(len(video_urls)): print(' 解析第%d个视频链接 [%s] 中,请稍后!\n' % (num + 1, share_urls[num])) if '\\' in video_names[num]: video_name = video_names[num].replace('\\', '') elif '/' in video_names[num]: video_name = video_names[num].replace('/', '') else: video_name = video_names[num] video_path = os.path.join(nickname_dir, video_name) if type_flag!='f' else os.path.join(nickname_dir, 'favorite', video_name) if os.path.isfile(video_path): print('视频已存在') else: self.video_downloader(video_urls[num], video_path, watermark_flag) print('\n') print('下载完成!') def hello(self): """ 打印欢迎界面 Parameters: None Returns: None """ print('*' * 100) print('\t\t\t\t抖音App视频下载小助手') print('\t\t作者:Jack Cui、steven7851') print('*' * 100) if __name__ == '__main__': douyin = DouYin() douyin.run() ================================================ FILE: douyin/fuck-byted-acrawler.js ================================================ // Referer:https://raw.githubusercontent.com/loadchange/amemv-crawler/master/fuck-byted-acrawler.js function generateSignature(userId) { this.navigator = { userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1" } var e = {} var r = (function () { function e(e, a, r) { return (b[e] || (b[e] = t("x,y", "return x " + e + " y")))(r, a) } function a(e, a, r) { return (k[r] || (k[r] = t("x,y", "return new x[y](" + Array(r + 1).join(",x[++y]").substr(1) + ")")))(e, a) } function r(e, a, r) { var n, t, s = {}, b = s.d = r ? r.d + 1 : 0; for (s["$" + b] = s, t = 0; t < b; t++) s[n = "$" + t] = r[n]; for (t = 0, b = s.length = a.length; t < b; t++) s[t] = a[t]; return c(e, 0, s) } function c(t, b, k) { function u(e) { v[x++] = e } function f() { return g = t.charCodeAt(b++) - 32, t.substring(b, b += g) } function l() { try { y = c(t, b, k) } catch (e) { h = e, y = l } } for (var h, y, d, g, v = [], x = 0; ;) switch (g = t.charCodeAt(b++) - 32) { case 1: u(!v[--x]); break; case 4: v[x++] = f(); break; case 5: u(function (e) { var a = 0, r = e.length; return function () { var c = a < r; return c && u(e[a++]), c } }(v[--x])); break; case 6: y = v[--x], u(v[--x](y)); break; case 8: if (g = t.charCodeAt(b++) - 32, l(), b += g, g = t.charCodeAt(b++) - 32, y === c) b += g; else if (y !== l) return y; break; case 9: v[x++] = c; break; case 10: u(s(v[--x])); break; case 11: y = v[--x], u(v[--x] + y); break; case 12: for (y = f(), d = [], g = 0; g < y.length; g++) d[g] = y.charCodeAt(g) ^ g + y.length; u(String.fromCharCode.apply(null, d)); break; case 13: y = v[--x], h = delete v[--x][y]; break; case 14: v[x++] = t.charCodeAt(b++) - 32; break; case 59: u((g = t.charCodeAt(b++) - 32) ? (y = x, v.slice(x -= g, y)) : []); break; case 61: u(v[--x][t.charCodeAt(b++) - 32]); break; case 62: g = v[--x], k[0] = 65599 * k[0] + k[1].charCodeAt(g) >>> 0; break; case 65: h = v[--x], y = v[--x], v[--x][y] = h; break; case 66: u(e(t[b++], v[--x], v[--x])); break; case 67: y = v[--x], d = v[--x], u((g = v[--x]).x === c ? r(g.y, y, k) : g.apply(d, y)); break; case 68: u(e((g = t[b++]) < "<" ? (b--, f()) : g + g, v[--x], v[--x])); break; case 70: u(!1); break; case 71: v[x++] = n; break; case 72: v[x++] = +f(); break; case 73: u(parseInt(f(), 36)); break; case 75: if (v[--x]) { b++; break } case 74: g = t.charCodeAt(b++) - 32 << 16 >> 16, b += g; break; case 76: u(k[t.charCodeAt(b++) - 32]); break; case 77: y = v[--x], u(v[--x][y]); break; case 78: g = t.charCodeAt(b++) - 32, u(a(v, x -= g + 1, g)); break; case 79: g = t.charCodeAt(b++) - 32, u(k["$" + g]); break; case 81: h = v[--x], v[--x][f()] = h; break; case 82: u(v[--x][f()]); break; case 83: h = v[--x], k[t.charCodeAt(b++) - 32] = h; break; case 84: v[x++] = !0; break; case 85: v[x++] = void 0; break; case 86: u(v[x - 1]); break; case 88: h = v[--x], y = v[--x], v[x++] = h, v[x++] = y; break; case 89: u(function () { function e() { return r(e.y, arguments, k) } return e.y = f(), e.x = c, e }()); break; case 90: v[x++] = null; break; case 91: v[x++] = h; break; case 93: h = v[--x]; break; case 0: return v[--x]; default: u((g << 16 >> 16) - 16) } } var n = this, t = n.Function, s = Object.keys || function (e) { var a = {}, r = 0; for (var c in e) a[r++] = c; return a.length = r, a }, b = {}, k = {}; return r })() ('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb>>s!0s%yA0s"l"l!r&lengthb&l!l Bd>&+l!l &+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d= self.total: end_str = '\n' self.status = status or self.fin_status print(self.__get_info(), end=end_str, ) if __name__ == '__main__': #url = 'http://www.demongan.com/source/game/二十四点.zip' #filename = '二十四点.zip' print('*' * 100) print('\t\t\t\t欢迎使用文件下载小助手') print('作者:Jack-Cui\n博客:http://blog.csdn.net/c406495762') print('*' * 100) url = input('请输入需要下载的文件链接:\n') filename = url.split('/')[-1] with closing(requests.get(url, stream=True)) as response: chunk_size = 1024 content_size = int(response.headers['content-length']) if response.status_code == 200: print('文件大小:%0.2f KB' % (content_size / chunk_size)) progress = ProgressBar("%s下载进度" % filename , total = content_size , unit = "KB" , chunk_size = chunk_size , run_status = "正在下载" , fin_status = "下载完成") with open(filename, "wb") as file: for data in response.iter_content(chunk_size=chunk_size): file.write(data) progress.refresh(count=len(data)) else: print('链接异常') ================================================ FILE: financical.py ================================================ #-*- coding:UTF-8 -*- import sys import pymysql import requests import json import re from bs4 import BeautifulSoup """ 类说明:获取财务数据 Author: Jack Cui Blog: http://blog.csdn.net/c406495762 Zhihu: https://www.zhihu.com/people/Jack--Cui/ Modify: 2017-08-31 """ class FinancialData(): def __init__(self): #服务器域名 self.server = 'http://quotes.money.163.com/' self.cwnb = 'http://quotes.money.163.com/hkstock/cwsj_' #主要财务指标 self.cwzb_dict = {'EPS':'基本每股收益','EPS_DILUTED':'摊薄每股收益','GROSS_MARGIN':'毛利率', 'CAPITAL_ADEQUACY':'资本充足率','LOANS_DEPOSITS':'贷款回报率','ROTA':'总资产收益率', 'ROEQUITY':'净资产收益率','CURRENT_RATIO':'流动比率','QUICK_RATIO':'速动比率', 'ROLOANS':'存贷比','INVENTORY_TURNOVER':'存货周转率','GENERAL_ADMIN_RATIO':'管理费用比率', 'TOTAL_ASSET2TURNOVER':'资产周转率','FINCOSTS_GROSSPROFIT':'财务费用比率','TURNOVER_CASH':'销售现金比率','YEAREND_DATE':'报表日期'} #利润表 self.lrb_dict = {'TURNOVER':'总营收','OPER_PROFIT':'经营利润','PBT':'除税前利润', 'NET_PROF':'净利润','EPS':'每股基本盈利','DPS':'每股派息', 'INCOME_INTEREST':'利息收益','INCOME_NETTRADING':'交易收益','INCOME_NETFEE':'费用收益','YEAREND_DATE':'报表日期'} #资产负债表 self.fzb_dict = { 'FIX_ASS':'固定资产','CURR_ASS':'流动资产','CURR_LIAB':'流动负债', 'INVENTORY':'存款','CASH':'现金及银行存结','OTHER_ASS':'其他资产', 'TOTAL_ASS':'总资产','TOTAL_LIAB':'总负债','EQUITY':'股东权益', 'CASH_SHORTTERMFUND':'库存现金及短期资金','DEPOSITS_FROM_CUSTOMER':'客户存款', 'FINANCIALASSET_SALE':'可供出售之证券','LOAN_TO_BANK':'银行同业存款及贷款', 'DERIVATIVES_LIABILITIES':'金融负债','DERIVATIVES_ASSET':'金融资产','YEAREND_DATE':'报表日期'} #现金流表 self.llb_dict = { 'CF_NCF_OPERACT':'经营活动产生的现金流','CF_INT_REC':'已收利息','CF_INT_PAID':'已付利息', 'CF_INT_REC':'已收股息','CF_DIV_PAID':'已派股息','CF_INV':'投资活动产生现金流', 'CF_FIN_ACT':'融资活动产生现金流','CF_BEG':'期初现金及现金等价物','CF_CHANGE_CSH':'现金及现金等价物净增加额', 'CF_END':'期末现金及现金等价物','CF_EXCH':'汇率变动影响','YEAREND_DATE':'报表日期'} #总表 self.table_dict = {'cwzb':self.cwzb_dict,'lrb':self.lrb_dict,'fzb':self.fzb_dict,'llb':self.llb_dict} #请求头 self.headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36'} """ 函数说明:获取股票页面信息 Author: Jack Cui Parameters: url - 股票财务数据界面地址 Returns: name - 股票名 table_name_list - 财务报表名称 table_date_list - 财务报表年限 url_list - 财务报表查询连接 Blog: http://blog.csdn.net/c406495762 Zhihu: https://www.zhihu.com/people/Jack--Cui/ Modify: 2017-08-31 """ def get_informations(self, url): req = requests.get(url = url, headers = self.headers) req.encoding = 'utf-8' html = req.text page_bf = BeautifulSoup(html, 'lxml') #股票名称,股票代码 name = page_bf.find_all('span', class_ = 'name')[0].string # code = page_bf.find_all('span', class_ = 'code')[0].string # code = re.findall('\d+',code)[0] #存储各个表名的列表 table_name_list = [] table_date_list = [] each_date_list = [] url_list = [] #表名和表时间 table_name = page_bf.find_all('div', class_ = 'titlebar3') for each_table_name in table_name: #表名 table_name_list.append(each_table_name.span.string) #表时间 for each_table_date in each_table_name.div.find_all('select', id = re.compile('.+1$')): url_list.append(re.findall('(\w+)1',each_table_date.get('id'))[0]) for each_date in each_table_date.find_all('option'): each_date_list.append(each_date.string) table_date_list.append(each_date_list) each_date_list = [] return name,table_name_list,table_date_list,url_list """ 函数说明:财务报表入库 Author: Jack Cui Parameters: name - 股票名 table_name_list - 财务报表名称 table_date_list - 财务报表年限 url_list - 财务报表查询连接 Returns: 无 Blog: http://blog.csdn.net/c406495762 Zhihu: https://www.zhihu.com/people/Jack--Cui/ Modify: 2017-08-31 """ def insert_tables(self, name, table_name_list,table_date_list, url_list): #打开数据库连接:host-连接主机地址,port-端口号,user-用户名,passwd-用户密码,db-数据库名,charset-编码 conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='yourpasswd',db='financialdata',charset='utf8') #使用cursor()方法获取操作游标 cursor = conn.cursor() #插入信息 for i in range(len(table_name_list)): sys.stdout.write(' [正在下载 ] %s' % table_name_list[i] + '\r') #获取数据地址 url = self.server + 'hk/service/cwsj_service.php?symbol={}&start={}&end={}&type={}&unit=yuan'.format(code,table_date_list[i][-1],table_date_list[i][0],url_list[i]) req_table = requests.get(url = url, headers = self.headers) table = req_table.json() nums = len(table) value_dict = {} for num in range(nums): sys.stdout.write(' [正在下载 %.2f%%] ' % (((num+1) / nums)*100) + '\r') sys.stdout.flush() value_dict['股票名'] = name value_dict['股票代码'] = code for key, value in table[i].items(): if key in self.table_dict[url_list[i]]: value_dict[self.table_dict[url_list[i]][key]] = value sql1 = """ INSERT INTO %s (`股票名`,`股票代码`,`报表日期`) VALUES ('%s','%s','%s')""" % (url_list[i],value_dict['股票名'],value_dict['股票代码'],value_dict['报表日期']) try: cursor.execute(sql1) # 执行sql语句 conn.commit() except: # 发生错误时回滚 conn.rollback() for key, value in value_dict.items(): if key not in ['股票名','股票代码','报表日期']: sql2 = """ UPDATE %s SET %s='%s' WHERE `股票名`='%s' AND `报表日期`='%s'""" % (url_list[i],key,value,value_dict['股票名'],value_dict['报表日期']) try: cursor.execute(sql2) # 执行sql语句 conn.commit() except: # 发生错误时回滚 conn.rollback() value_dict = {} print(' [下载完成 ') # 关闭数据库连接 cursor.close() conn.close() if __name__ == '__main__': print('*' * 100) print('\t\t\t\t\t财务数据下载助手\n') print('作者:Jack-Cui\n') print('About Me:\n') print(' 知乎:https://www.zhihu.com/people/Jack--Cui') print(' Blog:http://blog.csdn.net/c406495762') print(' Gihub:https://github.com/Jack-Cherish\n') print('*' * 100) fd = FinancialData() #上市股票地址 code = input('请输入股票代码:') name,table_name_list,table_date_list,url_list = fd.get_informations(fd.cwnb + code + '.html') print('\n %s:(%s)财务数据下载中!\n' % (name,code)) fd.insert_tables(name,table_name_list,table_date_list,url_list) print('\n %s:(%s)财务数据下载完成!' % (name,code)) ================================================ FILE: geetest.py ================================================ # -*-coding:utf-8 -*- import random import re import time # 图片转换 import base64 from urllib.request import urlretrieve from bs4 import BeautifulSoup import PIL.Image as image from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait def save_base64img(data_str, save_name): """ 将 base64 数据转化为图片保存到指定位置 :param data_str: base64 数据,不包含类型 :param save_name: 保存的全路径 """ img_data = base64.b64decode(data_str) file = open(save_name, 'wb') file.write(img_data) file.close() def get_base64_by_canvas(driver, class_name, contain_type): """ 将 canvas 标签内容转换为 base64 数据 :param driver: webdriver 对象 :param class_name: canvas 标签的类名 :param contain_type: 返回的数据是否包含类型 :return: base64 数据 """ # 防止图片未加载完就下载一张空图 bg_img = '' while len(bg_img) < 5000: getImgJS = 'return document.getElementsByClassName("' + class_name + '")[0].toDataURL("image/png");' bg_img = driver.execute_script(getImgJS) time.sleep(0.5) # print(bg_img) if contain_type: return bg_img else: return bg_img[bg_img.find(',') + 1:] def save_bg(driver, bg_path="bg.png", bg_class="geetest_canvas_bg geetest_absolute"): """ 保存包含缺口的背景图 :param driver: webdriver 对象 :param bg_path: 保存路径 :param bg_class: 背景图的 class 属性 :return: 保存路径 """ bg_img_data = get_base64_by_canvas(driver, bg_class, False) save_base64img(bg_img_data, bg_path) return bg_path def save_full_bg(driver, full_bg_path="fbg.png", full_bg_class="geetest_canvas_fullbg geetest_fade geetest_absolute"): """ 保存完整的的背景图 :param driver: webdriver 对象 :param full_bg_path: 保存路径 :param full_bg_class: 完整背景图的 class 属性 :return: 保存路径 """ bg_img_data = get_base64_by_canvas(driver, full_bg_class, False) save_base64img(bg_img_data, full_bg_path) return full_bg_path class Crack(): def __init__(self,keyword): self.url = '*' self.browser = webdriver.Chrome('D:\\chromedriver.exe') self.wait = WebDriverWait(self.browser, 100) self.keyword = keyword self.BORDER = 6 def open(self): """ 打开浏览器,并输入查询内容 """ self.browser.get(self.url) keyword = self.wait.until(EC.presence_of_element_located((By.ID, 'keyword_qycx'))) bowton = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'btn'))) keyword.send_keys(self.keyword) bowton.click() def get_images(self, bg_filename = 'bg.jpg', fullbg_filename = 'fullbg.jpg'): """ 获取验证码图片 :return: 图片的location信息 """ bg = [] fullgb = [] while bg == [] and fullgb == []: bf = BeautifulSoup(self.browser.page_source, 'lxml') bg = bf.find_all('div', class_ = 'gt_cut_bg_slice') fullgb = bf.find_all('div', class_ = 'gt_cut_fullbg_slice') bg_url = re.findall('url\(\"(.*)\"\);', bg[0].get('style'))[0].replace('webp', 'jpg') fullgb_url = re.findall('url\(\"(.*)\"\);', fullgb[0].get('style'))[0].replace('webp', 'jpg') bg_location_list = [] fullbg_location_list = [] for each_bg in bg: location = {} location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][0]) location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][1]) bg_location_list.append(location) for each_fullgb in fullgb: location = {} location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][0]) location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][1]) fullbg_location_list.append(location) urlretrieve(url = bg_url, filename = bg_filename) print('缺口图片下载完成') urlretrieve(url = fullgb_url, filename = fullbg_filename) print('背景图片下载完成') return bg_location_list, fullbg_location_list def get_merge_image(self, filename, location_list): """ 根据位置对图片进行合并还原 :filename:图片 :location_list:图片位置 """ im = image.open(filename) new_im = image.new('RGB', (260,116)) im_list_upper=[] im_list_down=[] for location in location_list: if location['y'] == -58: im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x']) + 10, 166))) if location['y'] == 0: im_list_down.append(im.crop((abs(location['x']),0,abs(location['x']) + 10, 58))) new_im = image.new('RGB', (260,116)) x_offset = 0 for im in im_list_upper: new_im.paste(im, (x_offset,0)) x_offset += im.size[0] x_offset = 0 for im in im_list_down: new_im.paste(im, (x_offset,58)) x_offset += im.size[0] new_im.save(filename) return new_im def get_merge_image(self, filename, location_list): """ 根据位置对图片进行合并还原 :filename:图片 :location_list:图片位置 """ im = image.open(filename) new_im = image.new('RGB', (260,116)) im_list_upper=[] im_list_down=[] for location in location_list: if location['y']==-58: im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166))) if location['y']==0: im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58))) new_im = image.new('RGB', (260,116)) x_offset = 0 for im in im_list_upper: new_im.paste(im, (x_offset,0)) x_offset += im.size[0] x_offset = 0 for im in im_list_down: new_im.paste(im, (x_offset,58)) x_offset += im.size[0] new_im.save(filename) return new_im def is_pixel_equal(self, img1, img2, x, y): """ 判断两个像素是否相同 :param image1: 图片1 :param image2: 图片2 :param x: 位置x :param y: 位置y :return: 像素是否相同 """ # 取两个图片的像素点 pix1 = img1.load()[x, y] pix2 = img2.load()[x, y] threshold = 60 if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) and abs(pix1[2] - pix2[2] < threshold)): return True else: return False def get_gap(self, img1, img2): """ 获取缺口偏移量 :param img1: 不带缺口图片 :param img2: 带缺口图片 :return: """ left = 43 for i in range(left, img1.size[0]): for j in range(img1.size[1]): if not self.is_pixel_equal(img1, img2, i, j): left = i return left return left def get_track(self, distance): """ 根据偏移量获取移动轨迹 :param distance: 偏移量 :return: 移动轨迹 """ # 移动轨迹 track = [] # 当前位移 current = 0 # 减速阈值 mid = distance * 4 / 5 # 计算间隔 t = 0.2 # 初速度 v = 0 while current < distance: if current < mid: # 加速度为正2 a = 2 else: # 加速度为负3 a = -3 # 初速度v0 v0 = v # 当前速度v = v0 + at v = v0 + a * t # 移动距离x = v0t + 1/2 * a * t^2 move = v0 * t + 1 / 2 * a * t * t # 当前位移 current += move # 加入轨迹 track.append(round(move)) return track def get_slider(self): """ 获取滑块 :return: 滑块对象 """ while True: try: slider = self.browser.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']") break except: time.sleep(0.5) return slider def move_to_gap(self, slider, track): """ 拖动滑块到缺口处 :param slider: 滑块 :param track: 轨迹 :return: """ ActionChains(self.browser).click_and_hold(slider).perform() while track: x = random.choice(track) ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() track.remove(x) time.sleep(0.5) ActionChains(self.browser).release().perform() def crack(self): # 打开浏览器 self.open() # 保存的图片名字 bg_filename = 'bg.jpg' fullbg_filename = 'fullbg.jpg' # 获取图片 bg_location_list, fullbg_location_list = self.get_images(bg_filename, fullbg_filename) # 根据位置对图片进行合并还原 # 方法1 # bg_img = self.get_merge_image(bg_filename, bg_location_list) # fullbg_img = self.get_merge_image(fullbg_filename, fullbg_location_list) # 方法2 bg_img = save_bg(self.browser) full_bg_img = save_full_bg(self.browser) # 获取缺口位置 # 方法1 # gap = self.get_gap(fullbg_img, bg_img) # 方法2 gap = self.get_gap(image.open(full_bg_img), image.open(bg_img)) print('缺口位置', gap) track = self.get_track(gap-self.BORDER) print('滑动滑块') print(track) # # 点按呼出缺口 # slider = self.get_slider() # # 拖动滑块到缺口处 # self.move_to_gap(slider, track) if __name__ == '__main__': print('开始验证') crack = Crack(u'中国移动') crack.crack() print('验证成功') ================================================ FILE: hero.py ================================================ #-*- coding: UTF-8 -*- from urllib.request import urlretrieve import requests import os """ 函数说明:下载《英雄联盟盒子》中的英雄图片 Parameters: url - GET请求地址,通过Fiddler抓包获取 header - headers信息 Returns: 无 Author: Jack Cui Blog: http://blog.csdn.net/c406495762 Modify: 2017-08-07 """ def hero_imgs_download(url, header): req = requests.get(url = url, headers = header).json() hero_num = len(req['list']) print('一共有%d个英雄' % hero_num) hero_images_path = 'hero_images' for each_hero in req['list']: hero_photo_url = each_hero['cover'] hero_name = each_hero['name'] + '.jpg' filename = hero_images_path + '/' + hero_name if hero_images_path not in os.listdir(): os.makedirs(hero_images_path) urlretrieve(url = hero_photo_url, filename = filename) """ 函数说明:打印所有英雄的名字和ID Parameters: url - GET请求地址,通过Fiddler抓包获取 header - headers信息 Returns: 无 Author: Jack Cui Blog: http://blog.csdn.net/c406495762 Modify: 2017-08-07 """ def hero_list(url, header): print('*' * 100) print('\t\t\t\t欢迎使用《王者荣耀》出装下助手!') print('*' * 100) req = requests.get(url = url, headers = header).json() flag = 0 for each_hero in req['list']: flag += 1 print('%s的ID为:%-7s' % (each_hero['name'], each_hero['hero_id']), end = '\t\t') if flag == 3: print('\n', end = '') flag = 0 """ 函数说明:根据equip_id查询武器名字和价格 Parameters: equip_id - 武器的ID weapon_info - 存储所有武器的字典 Returns: weapon_name - 武器的名字 weapon_price - 武器的价格 Author: Jack Cui Blog: http://blog.csdn.net/c406495762 Modify: 2017-08-07 """ def seek_weapon(equip_id, weapon_info): for each_weapon in weapon_info: if each_weapon['equip_id'] == str(equip_id): weapon_name = each_weapon['name'] weapon_price = each_weapon['price'] return weapon_name, weapon_price """ 函数说明:获取并打印出装信息 Parameters: url - GET请求地址,通过Fiddler抓包获取 header - headers信息 weapon_info - 存储所有武器的字典 Returns: 无 Author: Jack Cui Blog: http://blog.csdn.net/c406495762 Modify: 2017-08-07 """ def hero_info(url, header, weapon_info): req = requests.get(url = url, headers = header).json() print('\n历史上的%s:\n %s' % (req['info']['name'], req['info']['history_intro'])) for each_equip_choice in req['info']['equip_choice']: print('\n%s:\n %s' % (each_equip_choice['title'], each_equip_choice['description'])) total_price = 0 flag = 0 for each_weapon in each_equip_choice['list']: flag += 1 weapon_name, weapon_price = seek_weapon(each_weapon['equip_id'], weapon_info) print('%s:%s' % (weapon_name, weapon_price), end = '\t') if flag == 3: print('\n', end = '') flag = 0 total_price += int(weapon_price) print('神装套件价格共计:%d' % total_price) """ 函数说明:获取武器信息 Parameters: url - GET请求地址,通过Fiddler抓包获取 header - headers信息 Returns: weapon_info_dict - 武器信息 Author: Jack Cui Blog: http://blog.csdn.net/c406495762 Modify: 2017-08-07 """ def hero_weapon(url, header): req = requests.get(url = url, headers = header).json() weapon_info_dict = req['list'] return weapon_info_dict if __name__ == '__main__': headers = {'Accept-Charset': 'UTF-8', 'Accept-Encoding': 'gzip,deflate', 'User-Agent': 'Dalvik/2.1.0 (Linux; U; Android 6.0.1; MI 5 MIUI/V8.1.6.0.MAACNDI)', 'X-Requested-With': 'XMLHttpRequest', 'Content-type': 'application/x-www-form-urlencoded', 'Connection': 'Keep-Alive', 'Host': 'gamehelper.gm825.com'} weapon_url = "http://gamehelper.gm825.com/wzry/equip/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8" heros_url = "http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8" hero_list(heros_url, headers) hero_id = input("请输入要查询的英雄ID:") hero_url = "http://gamehelper.gm825.com/wzry/hero/detail?hero_id={}&channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8".format(hero_id) weapon_info_dict = hero_weapon(weapon_url, headers) hero_info(hero_url, headers, weapon_info_dict) ================================================ FILE: one_hour_spider/biquge20180731.py ================================================ # -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup import os """ 从www.biqubao.com笔趣阁爬取小说,楼主教程中的网址我当时没打开, 就参照楼主教程,爬取了笔趣阁小说网的内容。 2018-07-31 """ if __name__=='__main__': #所要爬取的小说主页,每次使用时,修改该网址即可,同时保证本地保存根路径存在即可 target="https://www.biqubao.com/book/17570/" # 本地保存爬取的文本根路径 save_path = 'G:/pythonlearn' #笔趣阁网站根路径 index_path='https://www.biqubao.com' req=requests.get(url=target) #查看request默认的编码,发现与网站response不符,改为网站使用的gdk print(req.encoding) req.encoding = 'gbk' #解析html soup=BeautifulSoup(req.text,"html.parser") list_tag=soup.div(id="list") print('list_tag:',list_tag) #获取小说名称 story_title=list_tag[0].dl.dt.string # 根据小说名称创建一个文件夹,如果不存在就新建 dir_path=save_path+'/'+story_title if not os.path.exists(dir_path): os.path.join(save_path,story_title) os.mkdir(dir_path) #开始循环每一个章节,获取章节名称,与章节对应的网址 for dd_tag in list_tag[0].dl.find_all('dd'): #章节名称 chapter_name=dd_tag.string #章节网址 chapter_url=index_path+dd_tag.a.get('href') #访问该章节详情网址,爬取该章节正文 chapter_req = requests.get(url=chapter_url) chapter_req.encoding = 'gbk' chapter_soup = BeautifulSoup(chapter_req.text, "html.parser") #解析出来正文所在的标签 content_tag = chapter_soup.div.find(id="content") #获取正文文本,并将空格替换为换行符 content_text = str(content_tag.text.replace('\xa0','\n')) #将当前章节,写入以章节名字命名的txt文件 with open(dir_path+'/'+chapter_name+'.txt', 'w') as f: f.write('本文网址:'+chapter_url) f.write(content_text) ================================================ FILE: one_hour_spider/biqukan.py ================================================ # -*- coding:UTF-8 -*- from bs4 import BeautifulSoup import requests, sys """ 类说明:下载《笔趣看》网小说《一念永恒》 Parameters: 无 Returns: 无 Modify: 2017-09-13 """ class downloader(object): def __init__(self): self.server = 'http://www.biqukan.com/' self.target = 'http://www.biqukan.com/1_1094/' self.names = [] #存放章节名 self.urls = [] #存放章节链接 self.nums = 0 #章节数 """ 函数说明:获取下载链接 Parameters: 无 Returns: 无 Modify: 2017-09-13 """ def get_download_url(self): req = requests.get(url = self.target) html = req.text div_bf = BeautifulSoup(html) div = div_bf.find_all('div', class_ = 'listmain') a_bf = BeautifulSoup(str(div[0])) a = a_bf.find_all('a') self.nums = len(a[15:]) #剔除不必要的章节,并统计章节数 for each in a[15:]: self.names.append(each.string) self.urls.append(self.server + each.get('href')) """ 函数说明:获取章节内容 Parameters: target - 下载连接(string) Returns: texts - 章节内容(string) Modify: 2017-09-13 """ def get_contents(self, target): req = requests.get(url = target) html = req.text bf = BeautifulSoup(html) texts = bf.find_all('div', class_ = 'showtxt') texts = texts[0].text.replace('\xa0'*8,'\n\n') return texts """ 函数说明:将爬取的文章内容写入文件 Parameters: name - 章节名称(string) path - 当前路径下,小说保存名称(string) text - 章节内容(string) Returns: 无 Modify: 2017-09-13 """ def writer(self, name, path, text): write_flag = True with open(path, 'a', encoding='utf-8') as f: f.write(name + '\n') f.writelines(text) f.write('\n\n') if __name__ == "__main__": dl = downloader() dl.get_download_url() print('《一年永恒》开始下载:') for i in range(dl.nums): dl.writer(dl.names[i], '一念永恒.txt', dl.get_contents(dl.urls[i])) sys.stdout.write(" 已下载:%.3f%%" % float(i/dl.nums*100) + '\r') sys.stdout.flush() print('《一年永恒》下载完成') ================================================ FILE: one_hour_spider/unsplash.py ================================================ # -*- coding:UTF-8 -*- import requests, json, time, sys from contextlib import closing class get_photos(object): def __init__(self): self.photos_id = [] self.download_server = 'https://unsplash.com/photos/xxx/download?force=trues' self.target = 'http://unsplash.com/napi/feeds/home' self.headers = {'authorization':'Client-ID c94869b36aa272dd62dfaeefed769d4115fb3189a9d1ec88ed457207747be626'} """ 函数说明:获取图片ID Parameters: 无 Returns: 无 Modify: 2017-09-13 """ def get_ids(self): req = requests.get(url=self.target, headers=self.headers, verify=False) html = json.loads(req.text) next_page = html['next_page'] for each in html['photos']: self.photos_id.append(each['id']) time.sleep(1) for i in range(5): req = requests.get(url=next_page, headers=self.headers, verify=False) html = json.loads(req.text) next_page = html['next_page'] for each in html['photos']: self.photos_id.append(each['id']) time.sleep(1) """ 函数说明:图片下载 Parameters: 无 Returns: 无 Modify: 2017-09-13 """ def download(self, photo_id, filename): headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'} target = self.download_server.replace('xxx', photo_id) with closing(requests.get(url=target, stream=True, verify = False, headers = self.headers)) as r: with open('%d.jpg' % filename, 'ab+') as f: for chunk in r.iter_content(chunk_size = 1024): if chunk: f.write(chunk) f.flush() if __name__ == '__main__': gp = get_photos() print('获取图片连接中:') gp.get_ids() print('图片下载中:') for i in range(len(gp.photos_id)): print(' 正在下载第%d张图片' % (i+1)) gp.download(gp.photos_id[i], (i+1)) ================================================ FILE: one_hour_spider/unsplash20180731.py ================================================ # -*- coding:utf-8 -*- import requests import json import os from contextlib import closing """ 从https://unsplash.com/爬取壁纸代码,使用时我是开启了代理软件 国内网速貌似有些限制,很慢 2018-07-31 """ # 本地保存图片根路径(请确保根路径存在) save_path = 'G:/pythonlearn' dir_path=save_path+'/'+'unsplash-image' if not os.path.exists(dir_path): os.path.join(save_path, 'unsplash-image') os.mkdir(dir_path) n=10 #n建议从第2页开始,因为第一页的per_page可能是1,不是12 while n>2: print('当前爬取第'+str(n)+'次加载图片(本次共12张)') url='https://unsplash.com/napi/photos?page='+str(n)+'&per_page=12&order_by=latest' req=requests.get(url=url) html=json.loads(req.text) for each in html: downloadurl=each['links']["download"] jpgrep=requests.get(url=downloadurl) with closing(requests.get(url=downloadurl, stream=True)) as r: with open(dir_path+'/'+each['id']+'.jpg', 'ab+') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() n=n-1 ================================================ FILE: one_hour_spider/vidoe_downloader.py ================================================ #-*- coding:UTF-8 -*- import requests,re, json, sys from bs4 import BeautifulSoup from urllib import request class video_downloader(): def __init__(self, url): self.server = 'http://api.xfsub.com' self.api = 'http://api.xfsub.com/xfsub_api/?url=' self.get_url_api = 'http://api.xfsub.com/xfsub_api/url.php' self.url = url.split('#')[0] self.headers = {'Referer': 'http://api.xfsub.com/xfsub_api/?url=%s?qqdrsign=055a4' % self.url} self.target = self.api + self.url self.s = requests.session() """ 函数说明:获取key、time、url等参数 Parameters: 无 Returns: 无 Modify: 2017-09-18 """ def get_key(self): req = self.s.get(url=self.target) req.encoding = 'utf-8' self.info = json.loads(re.findall('"url.php",\ (.+),', req.text)[0]) #使用正则表达式匹配结果,将匹配的结果存入info变量中 """ 函数说明:获取视频地址 Parameters: 无 Returns: video_url - 视频存放地址 Modify: 2017-09-18 """ def get_url(self): data = {'time':self.info['time'], 'key':self.info['key'], 'url':self.info['url'], 'type':''} req = self.s.post(url=self.get_url_api,data=data, headers=self.headers) url = self.server + json.loads(req.text)['url'] req = self.s.get(url=url, headers=self.headers) bf = BeautifulSoup(req.text,'xml') #因为文件是xml格式的,所以要进行xml解析。 video_url = bf.find('file').string #匹配到视频地址 return video_url """ 函数说明:回调函数,打印下载进度 Parameters: a b c - 返回信息 Returns: 无 Modify: 2017-09-18 """ def Schedule(self, a, b, c): per = 100.0*a*b/c if per > 100 : per = 1 sys.stdout.write(" " + "%.2f%% 已经下载的大小:%ld 文件大小:%ld" % (per,a*b,c) + '\r') sys.stdout.flush() """ 函数说明:视频下载 Parameters: url - 视频地址 filename - 视频名字 Returns: 无 Modify: 2017-09-18 """ def video_download(self, url, filename): request.urlretrieve(url=url,filename=filename,reporthook=self.Schedule) if __name__ == '__main__': url = 'http://www.iqiyi.com/v_19rr7qhfg0.html#vfrm=19-9-0-1' vd = video_downloader(url) filename = '加勒比海盗5' print('%s下载中:' % filename) vd.get_key() video_url = vd.get_url() print(' 获取地址成功:%s' % video_url) vd.video_download(video_url, filename+'.mp4') print('\n下载完成!') ================================================ FILE: shuaia.py ================================================ # -*- coding:UTF-8 -*- from bs4 import BeautifulSoup from urllib.request import urlretrieve import requests import os import time if __name__ == '__main__': list_url = [] for num in range(1,3): if num == 1: url = 'http://www.shuaia.net/index.html' else: url = 'http://www.shuaia.net/index_%d.html' % num headers = { "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" } req = requests.get(url = url,headers = headers) req.encoding = 'utf-8' html = req.text bf = BeautifulSoup(html, 'lxml') targets_url = bf.find_all(class_='item-img') for each in targets_url: list_url.append(each.img.get('alt') + '=' + each.get('href')) print('连接采集完成') for each_img in list_url: img_info = each_img.split('=') target_url = img_info[1] filename = img_info[0] + '.jpg' print('下载:' + filename) headers = { "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36" } img_req = requests.get(url = target_url,headers = headers) img_req.encoding = 'utf-8' img_html = img_req.text img_bf_1 = BeautifulSoup(img_html, 'lxml') img_url = img_bf_1.find_all('div', class_='wr-single-content-list') img_bf_2 = BeautifulSoup(str(img_url), 'lxml') img_url = 'http://www.shuaia.net' + img_bf_2.div.img.get('src') if 'images' not in os.listdir(): os.makedirs('images') urlretrieve(url = img_url,filename = 'images/' + filename) time.sleep(1) print('下载完成!') ================================================ FILE: video_downloader/MyQR/__init__.py ================================================ ================================================ FILE: video_downloader/MyQR/mylibs/ECC.py ================================================ # -*- coding: utf-8 -*- from MyQR.mylibs.constant import GP_list, ecc_num_per_block, lindex, po2, log #ecc: Error Correction Codewords def encode(ver, ecl, data_codewords): en = ecc_num_per_block[ver-1][lindex[ecl]] ecc = [] for dc in data_codewords: ecc.append(get_ecc(dc, en)) return ecc def get_ecc(dc, ecc_num): gp = GP_list[ecc_num] remainder = dc for i in range(len(dc)): remainder = divide(remainder, *gp) return remainder def divide(MP, *GP): if MP[0]: GP = list(GP) for i in range(len(GP)): GP[i] += log[MP[0]] if GP[i] > 255: GP[i] %= 255 GP[i] = po2[GP[i]] return XOR(GP, *MP) else: return XOR([0]*len(GP), *MP) def XOR(GP, *MP): MP = list(MP) a = len(MP) - len(GP) if a < 0: MP += [0] * (-a) elif a > 0: GP += [0] * a remainder = [] for i in range(1, len(MP)): remainder.append(MP[i]^GP[i]) return remainder ================================================ FILE: video_downloader/MyQR/mylibs/__init__.py ================================================ # -*- coding: utf-8 -*- ================================================ FILE: video_downloader/MyQR/mylibs/constant.py ================================================ # -*- coding: utf-8 -*- """ ***** for data.py ******* """ # character capacities # {level1: [version1(mode1,mode2,mode3,mode4), version2(..,..,..,..), ...], # level2: [version1(mode1,mode2,mode3,mode4), version2(..,..,..,..),...], # ...} char_cap = { 'L': [(41, 25, 17, 10), (77, 47, 32, 20), (127, 77, 53, 32), (187, 114, 78, 48), (255, 154, 106, 65), (322, 195, 134, 82), (370, 224, 154, 95), (461, 279, 192, 118), (552, 335, 230, 141), (652, 395, 271, 167), (772, 468, 321, 198), (883, 535, 367, 226), (1022, 619, 425, 262), (1101, 667, 458, 282), (1250, 758, 520, 320), (1408, 854, 586, 361), (1548, 938, 644, 397), (1725, 1046, 718, 442), (1903, 1153, 792, 488), (2061, 1249, 858, 528), (2232, 1352, 929, 572), (2409, 1460, 1003, 618), (2620, 1588, 1091, 672), (2812, 1704, 1171, 721), (3057, 1853, 1273, 784), (3283, 1990, 1367, 842), (3517, 2132, 1465, 902), (3669, 2223, 1528, 940), (3909, 2369, 1628, 1002), (4158, 2520, 1732, 1066), (4417, 2677, 1840, 1132), (4686, 2840, 1952, 1201), (4965, 3009, 2068, 1273), (5253, 3183, 2188, 1347), (5529, 3351, 2303, 1417), (5836, 3537, 2431, 1496), (6153, 3729, 2563, 1577), (6479, 3927, 2699, 1661), (6743, 4087, 2809, 1729), (7089, 4296, 2953, 1817)], 'M': [(34, 20, 14, 8), (63, 38, 26, 16), (101, 61, 42, 26), (149, 90, 62, 38), (202, 122, 84, 52), (255, 154, 106, 65), (293, 178, 122, 75), (365, 221, 152, 93), (432, 262, 180, 111), (513, 311, 213, 131), (604, 366, 251, 155), (691, 419, 287, 177), (796, 483, 331, 204), (871, 528, 362, 223), (991, 600, 412, 254), (1082, 656, 450, 277), (1212, 734, 504, 310), (1346, 816, 560, 345), (1500, 909, 624, 384), (1600, 970, 666, 410), (1708, 1035, 711, 438), (1872, 1134, 779, 480), (2059, 1248, 857, 528), (2188, 1326, 911, 561), (2395, 1451, 997, 614), (2544, 1542, 1059, 652), (2701, 1637, 1125, 692), (2857, 1732, 1190, 732), (3035, 1839, 1264, 778), (3289, 1994, 1370, 843), (3486, 2113, 1452, 894), (3693, 2238, 1538, 947), (3909, 2369, 1628, 1002), (4134, 2506, 1722, 1060), (4343, 2632, 1809, 1113), (4588, 2780, 1911, 1176), (4775, 2894, 1989, 1224), (5039, 3054, 2099, 1292), (5313, 3220, 2213, 1362), (5596, 3391, 2331, 1435)], 'Q': [(27, 16, 11, 7), (48, 29, 20, 12), (77, 47, 32, 20), (111, 67, 46, 28), (144, 87, 60, 37), (178, 108, 74, 45), (207, 125, 86, 53), (259, 157, 108, 66), (312, 189, 130, 80), (364, 221, 151, 93), (427, 259, 177, 109), (489, 296, 203, 125), (580, 352, 241, 149), (621, 376, 258, 159), (703, 426, 292, 180), (775, 470, 322, 198), (876, 531, 364, 224), (948, 574, 394, 243), (1063, 644, 442, 272), (1159, 702, 482, 297), (1224, 742, 509, 314), (1358, 823, 565, 348), (1468, 890, 611, 376), (1588, 963, 661, 407), (1718, 1041, 715, 440), (1804, 1094, 751, 462), (1933, 1172, 805, 496), (2085, 1263, 868, 534), (2181, 1322, 908, 559), (2358, 1429, 982, 604), (2473, 1499, 1030, 634), (2670, 1618, 1112, 684), (2805, 1700, 1168, 719), (2949, 1787, 1228, 756), (3081, 1867, 1283, 790), (3244, 1966, 1351, 832), (3417, 2071, 1423, 876), (3599, 2181, 1499, 923), (3791, 2298, 1579, 972), (3993, 2420, 1663, 1024)], 'H': [(17, 10, 7, 4), (34, 20, 14, 8), (58, 35, 24, 15), (82, 50, 34, 21), (106, 64, 44, 27), (139, 84, 58, 36), (154, 93, 64, 39), (202, 122, 84, 52), (235, 143, 98, 60), (288, 174, 119, 74), (331, 200, 137, 85), (374, 227, 155, 96), (427, 259, 177, 109), (468, 283, 194, 120), (530, 321, 220, 136), (602, 365, 250, 154), (674, 408, 280, 173), (746, 452, 310, 191), (813, 493, 338, 208), (919, 557, 382, 235), (969, 587, 403, 248), (1056, 640, 439, 270), (1108, 672, 461, 284), (1228, 744, 511, 315), (1286, 779, 535, 330), (1425, 864, 593, 365), (1501, 910, 625, 385), (1581, 958, 658, 405), (1677, 1016, 698, 430), (1782, 1080, 742, 457), (1897, 1150, 790, 486), (2022, 1226, 842, 518), (2157, 1307, 898, 553), (2301, 1394, 958, 590), (2361, 1431, 983, 605), (2524, 1530, 1051, 647), (2625, 1591, 1093, 673), (2735, 1658, 1139, 701), (2927, 1774, 1219, 750), (3057, 1852, 1273, 784)] } mindex = {'numeric':0, 'alphanumeric':1, 'byte':2, 'kanji':3} # [ # version1[level1,level2,level3,level4], # version2[..,..,..,..], # ... # ] required_bytes = [ [19, 16, 13, 9], [34, 28, 22, 16], [55, 44, 34, 26], [80, 64, 48, 36], [108, 86, 62, 46], [136, 108, 76, 60], [156, 124, 88, 66], [194, 154, 110, 86], [232, 182, 132, 100], [274, 216, 154, 122], [324, 254, 180, 140], [370, 290, 206, 158], [428, 334, 244, 180], [461, 365, 261, 197], [523, 415, 295, 223], [589, 453, 325, 253], [647, 507, 367, 283], [721, 563, 397, 313], [795, 627, 445, 341], [861, 669, 485, 385], [932, 714, 512, 406], [1006, 782, 568, 442], [1094, 860, 614, 464], [1174, 914, 664, 514], [1276, 1000, 718, 538], [1370, 1062, 754, 596], [1468, 1128, 808, 628], [1531, 1193, 871, 661], [1631, 1267, 911, 701], [1735, 1373, 985, 745], [1843, 1455, 1033, 793], [1955, 1541, 1115, 845], [2071, 1631, 1171, 901], [2191, 1725, 1231, 961], [2306, 1812, 1286, 986], [2434, 1914, 1354, 1054], [2566, 1992, 1426, 1096], [2702, 2102, 1502, 1142], [2812, 2216, 1582, 1222], [2956, 2334, 1666, 1276] ] num_list = '0123456789' alphanum_list = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ $%*+-./:' # [ # version1[ # level1(num_of_group_1_blocks, DC_per_group_1_block, num_of_group_2_blocks, DC_per_group_2_block), # level2(..,..,..,..), # level3(..,..,..,..), # level4(..,..,..,..) # ], # version2[level1(..), level2(..), level3(..), level4(..)], # ... # ] grouping_list = [ [(1, 19, 0, 0), (1, 16, 0, 0), (1, 13, 0, 0), (1, 9, 0, 0)], [(1, 34, 0, 0), (1, 28, 0, 0), (1, 22, 0, 0), (1, 16, 0, 0)], [(1, 55, 0, 0), (1, 44, 0, 0), (2, 17, 0, 0), (2, 13, 0, 0)], [(1, 80, 0, 0), (2, 32, 0, 0), (2, 24, 0, 0), (4, 9, 0, 0)], [(1, 108, 0, 0), (2, 43, 0, 0), (2, 15, 2, 16), (2, 11, 2, 12)], [(2, 68, 0, 0), (4, 27, 0, 0), (4, 19, 0, 0), (4, 15, 0, 0)], [(2, 78, 0, 0), (4, 31, 0, 0), (2, 14, 4, 15), (4, 13, 1, 14)], [(2, 97, 0, 0), (2, 38, 2, 39), (4, 18, 2, 19), (4, 14, 2, 15)], [(2, 116, 0, 0), (3, 36, 2, 37), (4, 16, 4, 17), (4, 12, 4, 13)], [(2, 68, 2, 69), (4, 43, 1, 44), (6, 19, 2, 20), (6, 15, 2, 16)], [(4, 81, 0, 0), (1, 50, 4, 51), (4, 22, 4, 23), (3, 12, 8, 13)], [(2, 92, 2, 93), (6, 36, 2, 37), (4, 20, 6, 21), (7, 14, 4, 15)], [(4, 107, 0, 0), (8, 37, 1, 38), (8, 20, 4, 21), (12, 11, 4, 12)], [(3, 115, 1, 116), (4, 40, 5, 41), (11, 16, 5, 17), (11, 12, 5, 13)], [(5, 87, 1, 88), (5, 41, 5, 42), (5, 24, 7, 25), (11, 12, 7, 13)], [(5, 98, 1, 99), (7, 45, 3, 46), (15, 19, 2, 20), (3, 15, 13, 16)], [(1, 107, 5, 108), (10, 46, 1, 47), (1, 22, 15, 23), (2, 14, 17, 15)], [(5, 120, 1, 121), (9, 43, 4, 44), (17, 22, 1, 23), (2, 14, 19, 15)], [(3, 113, 4, 114), (3, 44, 11, 45), (17, 21, 4, 22), (9, 13, 16, 14)], [(3, 107, 5, 108), (3, 41, 13, 42), (15, 24, 5, 25), (15, 15, 10, 16)], [(4, 116, 4, 117), (17, 42, 0, 0), (17, 22, 6, 23), (19, 16, 6, 17)], [(2, 111, 7, 112), (17, 46, 0, 0), (7, 24, 16, 25), (34, 13, 0, 0)], [(4, 121, 5, 122), (4, 47, 14, 48), (11, 24, 14, 25), (16, 15, 14, 16)], [(6, 117, 4, 118), (6, 45, 14, 46), (11, 24, 16, 25), (30, 16, 2, 17)], [(8, 106, 4, 107), (8, 47, 13, 48), (7, 24, 22, 25), (22, 15, 13, 16)], [(10, 114, 2, 115), (19, 46, 4, 47), (28, 22, 6, 23), (33, 16, 4, 17)], [(8, 122, 4, 123), (22, 45, 3, 46), (8, 23, 26, 24), (12, 15, 28, 16)], [(3, 117, 10, 118), (3, 45, 23, 46), (4, 24, 31, 25), (11, 15, 31, 16)], [(7, 116, 7, 117), (21, 45, 7, 46), (1, 23, 37, 24), (19, 15, 26, 16)], [(5, 115, 10, 116), (19, 47, 10, 48), (15, 24, 25, 25), (23, 15, 25, 16)], [(13, 115, 3, 116), (2, 46, 29, 47), (42, 24, 1, 25), (23, 15, 28, 16)], [(17, 115, 0, 0), (10, 46, 23, 47), (10, 24, 35, 25), (19, 15, 35, 16)], [(17, 115, 1, 116), (14, 46, 21, 47), (29, 24, 19, 25), (11, 15, 46, 16)], [(13, 115, 6, 116), (14, 46, 23, 47), (44, 24, 7, 25), (59, 16, 1, 17)], [(12, 121, 7, 122), (12, 47, 26, 48), (39, 24, 14, 25), (22, 15, 41, 16)], [(6, 121, 14, 122), (6, 47, 34, 48), (46, 24, 10, 25), (2, 15, 64, 16)], [(17, 122, 4, 123), (29, 46, 14, 47), (49, 24, 10, 25), (24, 15, 46, 16)], [(4, 122, 18, 123), (13, 46, 32, 47), (48, 24, 14, 25), (42, 15, 32, 16)], [(20, 117, 4, 118), (40, 47, 7, 48), (43, 24, 22, 25), (10, 15, 67, 16)], [(19, 118, 6, 119), (18, 47, 31, 48), (34, 24, 34, 25), (20, 15, 61, 16)] ] mode_indicator = {'numeric': '0001', 'alphanumeric': '0010', 'byte': '0100', 'kanji': '1000'} """ ****** for ECC.py ******* """ #GP: Generator Polynomial, MP: Message Polynomial GP_list = { 7: [0, 87, 229, 146, 149, 238, 102, 21], 10: [0, 251, 67, 46, 61, 118, 70, 64, 94, 32, 45], 13: [0, 74, 152, 176, 100, 86, 100, 106, 104, 130, 218, 206, 140, 78], 15: [0, 8, 183, 61, 91, 202, 37, 51, 58, 58, 237, 140, 124, 5, 99, 105], 16: [0, 120, 104, 107, 109, 102, 161, 76, 3, 91, 191, 147, 169, 182, 194, 225, 120], 17: [0, 43, 139, 206, 78, 43, 239, 123, 206, 214, 147, 24, 99, 150, 39, 243, 163, 136], 18: [0, 215, 234, 158, 94, 184, 97, 118, 170, 79, 187, 152, 148, 252, 179, 5, 98, 96, 153], 20: [0, 17, 60, 79, 50, 61, 163, 26, 187, 202, 180, 221, 225, 83, 239, 156, 164, 212, 212, 188, 190], 22: [0, 210, 171, 247, 242, 93, 230, 14, 109, 221, 53, 200, 74, 8, 172, 98, 80, 219, 134, 160, 105, 165, 231], 24: [0, 229, 121, 135, 48, 211, 117, 251, 126, 159, 180, 169, 152, 192, 226, 228, 218, 111, 0, 117, 232, 87, 96, 227, 21], 26: [0, 173, 125, 158, 2, 103, 182, 118, 17, 145, 201, 111, 28, 165, 53, 161, 21, 245, 142, 13, 102, 48, 227, 153, 145, 218, 70], 28: [0, 168, 223, 200, 104, 224, 234, 108, 180, 110, 190, 195, 147, 205, 27, 232, 201, 21, 43, 245, 87, 42, 195, 212, 119, 242, 37, 9, 123], 30: [0, 41, 173, 145, 152, 216, 31, 179, 182, 50, 48, 110, 86, 239, 96, 222, 125, 42, 173, 226, 193, 224, 130, 156, 37, 251, 216, 238, 40, 192, 180] } # Error Correction Codewords per block # [version1(level1,level2,level3,level4), # version2(..,..,..,..), # ....] ecc_num_per_block = [ (7, 10, 13, 17), (10, 16, 22, 28), (15, 26, 18, 22), (20, 18, 26, 16), (26, 24, 18, 22), (18, 16, 24, 28), (20, 18, 18, 26), (24, 22, 22, 26), (30, 22, 20, 24), (18, 26, 24, 28), (20, 30, 28, 24), (24, 22, 26, 28), (26, 22, 24, 22), (30, 24, 20, 24), (22, 24, 30, 24), (24, 28, 24, 30), (28, 28, 28, 28), (30, 26, 28, 28), (28, 26, 26, 26), (28, 26, 30, 28), (28, 26, 28, 30), (28, 28, 30, 24), (30, 28, 30, 30), (30, 28, 30, 30), (26, 28, 30, 30), (28, 28, 28, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30), (30, 28, 30, 30) ] # powers of 2 list po2 = [ 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1 ] # log list log = [ None, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 ] """ ****** for data.py + ECC.py + structure.py + matrix.py ******* """ lindex = {'L':0, 'M':1, 'Q':2, 'H':3} """ ****** for structure.py ******* """ required_remainder_bits = (0,7,7,7,7,7,0,0,0,0,0,0,0,3,3,3,3,3,3,3,4,4,4,4,4,4,4,3,3,3,3,3,3,3,0,0,0,0,0,0) # [ # version1[ # level1(num_of_group_1_blocks, DC_per_group_1_block, num_of_group_2_blocks, DC_per_group_2_block), # level2(..,..,..,..), # level3(..,..,..,..), # level4(..,..,..,..) # ], # version2[level1(..), level2(..), level3(..), level4(..)], # ... # ] grouping_list = [ [(1, 19, 0, 0), (1, 16, 0, 0), (1, 13, 0, 0), (1, 9, 0, 0)], [(1, 34, 0, 0), (1, 28, 0, 0), (1, 22, 0, 0), (1, 16, 0, 0)], [(1, 55, 0, 0), (1, 44, 0, 0), (2, 17, 0, 0), (2, 13, 0, 0)], [(1, 80, 0, 0), (2, 32, 0, 0), (2, 24, 0, 0), (4, 9, 0, 0)], [(1, 108, 0, 0), (2, 43, 0, 0), (2, 15, 2, 16), (2, 11, 2, 12)], [(2, 68, 0, 0), (4, 27, 0, 0), (4, 19, 0, 0), (4, 15, 0, 0)], [(2, 78, 0, 0), (4, 31, 0, 0), (2, 14, 4, 15), (4, 13, 1, 14)], [(2, 97, 0, 0), (2, 38, 2, 39), (4, 18, 2, 19), (4, 14, 2, 15)], [(2, 116, 0, 0), (3, 36, 2, 37), (4, 16, 4, 17), (4, 12, 4, 13)], [(2, 68, 2, 69), (4, 43, 1, 44), (6, 19, 2, 20), (6, 15, 2, 16)], [(4, 81, 0, 0), (1, 50, 4, 51), (4, 22, 4, 23), (3, 12, 8, 13)], [(2, 92, 2, 93), (6, 36, 2, 37), (4, 20, 6, 21), (7, 14, 4, 15)], [(4, 107, 0, 0), (8, 37, 1, 38), (8, 20, 4, 21), (12, 11, 4, 12)], [(3, 115, 1, 116), (4, 40, 5, 41), (11, 16, 5, 17), (11, 12, 5, 13)], [(5, 87, 1, 88), (5, 41, 5, 42), (5, 24, 7, 25), (11, 12, 7, 13)], [(5, 98, 1, 99), (7, 45, 3, 46), (15, 19, 2, 20), (3, 15, 13, 16)], [(1, 107, 5, 108), (10, 46, 1, 47), (1, 22, 15, 23), (2, 14, 17, 15)], [(5, 120, 1, 121), (9, 43, 4, 44), (17, 22, 1, 23), (2, 14, 19, 15)], [(3, 113, 4, 114), (3, 44, 11, 45), (17, 21, 4, 22), (9, 13, 16, 14)], [(3, 107, 5, 108), (3, 41, 13, 42), (15, 24, 5, 25), (15, 15, 10, 16)], [(4, 116, 4, 117), (17, 42, 0, 0), (17, 22, 6, 23), (19, 16, 6, 17)], [(2, 111, 7, 112), (17, 46, 0, 0), (7, 24, 16, 25), (34, 13, 0, 0)], [(4, 121, 5, 122), (4, 47, 14, 48), (11, 24, 14, 25), (16, 15, 14, 16)], [(6, 117, 4, 118), (6, 45, 14, 46), (11, 24, 16, 25), (30, 16, 2, 17)], [(8, 106, 4, 107), (8, 47, 13, 48), (7, 24, 22, 25), (22, 15, 13, 16)], [(10, 114, 2, 115), (19, 46, 4, 47), (28, 22, 6, 23), (33, 16, 4, 17)], [(8, 122, 4, 123), (22, 45, 3, 46), (8, 23, 26, 24), (12, 15, 28, 16)], [(3, 117, 10, 118), (3, 45, 23, 46), (4, 24, 31, 25), (11, 15, 31, 16)], [(7, 116, 7, 117), (21, 45, 7, 46), (1, 23, 37, 24), (19, 15, 26, 16)], [(5, 115, 10, 116), (19, 47, 10, 48), (15, 24, 25, 25), (23, 15, 25, 16)], [(13, 115, 3, 116), (2, 46, 29, 47), (42, 24, 1, 25), (23, 15, 28, 16)], [(17, 115, 0, 0), (10, 46, 23, 47), (10, 24, 35, 25), (19, 15, 35, 16)], [(17, 115, 1, 116), (14, 46, 21, 47), (29, 24, 19, 25), (11, 15, 46, 16)], [(13, 115, 6, 116), (14, 46, 23, 47), (44, 24, 7, 25), (59, 16, 1, 17)], [(12, 121, 7, 122), (12, 47, 26, 48), (39, 24, 14, 25), (22, 15, 41, 16)], [(6, 121, 14, 122), (6, 47, 34, 48), (46, 24, 10, 25), (2, 15, 64, 16)], [(17, 122, 4, 123), (29, 46, 14, 47), (49, 24, 10, 25), (24, 15, 46, 16)], [(4, 122, 18, 123), (13, 46, 32, 47), (48, 24, 14, 25), (42, 15, 32, 16)], [(20, 117, 4, 118), (40, 47, 7, 48), (43, 24, 22, 25), (10, 15, 67, 16)], [(19, 118, 6, 119), (18, 47, 31, 48), (34, 24, 34, 25), (20, 15, 61, 16)] ] """ ****** for matrix.py ******* """ # Alignment Pattern Locations alig_location = [ (6, 18), (6, 22), (6, 26), (6, 30), (6, 34), (6, 22, 38), (6, 24, 42), (6, 26, 46), (6, 28, 50), (6, 30, 54), (6, 32, 58), (6, 34, 62), (6, 26, 46, 66), (6, 26, 48, 70), (6, 26, 50, 74), (6, 30, 54, 78), (6, 30, 56, 82), (6, 30, 58, 86), (6, 34, 62, 90), (6, 28, 50, 72, 94), (6, 26, 50, 74, 98), (6, 30, 54, 78, 102), (6, 28, 54, 80, 106), (6, 32, 58, 84, 110), (6, 30, 58, 86, 114), (6, 34, 62, 90, 118), (6, 26, 50, 74, 98, 122), (6, 30, 54, 78, 102, 126), (6, 26, 52, 78, 104, 130), (6, 30, 56, 82, 108, 134), (6, 34, 60, 86, 112, 138), (6, 30, 58, 86, 114, 142), (6, 34, 62, 90, 118, 146), (6, 30, 54, 78, 102, 126, 150), (6, 24, 50, 76, 102, 128, 154), (6, 28, 54, 80, 106, 132, 158), (6, 32, 58, 84, 110, 136, 162), (6, 26, 54, 82, 110, 138, 166), (6, 30, 58, 86, 114, 142, 170) ] # List of all Format Information Strings # [ # level1[mask_pattern0, mask_pattern1, mask_...3,...], # level2[...], # level3[...], # level4[...] # ] format_info_str = [ ['111011111000100', '111001011110011', '111110110101010', '111100010011101', '110011000101111', '110001100011000', '110110001000001', '110100101110110'], ['101010000010010', '101000100100101', '101111001111100', '101101101001011', '100010111111001', '100000011001110', '100111110010111', '100101010100000'], ['011010101011111', '011000001101000', '011111100110001', '011101000000110', '010010010110100', '010000110000011', '010111011011010', '010101111101101'], ['001011010001001', '001001110111110', '001110011100111', '001100111010000', '000011101100010', '000001001010101', '000110100001100', '000100000111011'] ] # Version Information Strings version_info_str = [ '000111110010010100', '001000010110111100', '001001101010011001', '001010010011010011', '001011101111110110', '001100011101100010', '001101100001000111', '001110011000001101', '001111100100101000', '010000101101111000', '010001010001011101', '010010101000010111', '010011010100110010', '010100100110100110', '010101011010000011', '010110100011001001', '010111011111101100', '011000111011000100', '011001000111100001', '011010111110101011', '011011000010001110', '011100110000011010', '011101001100111111', '011110110101110101', '011111001001010000', '100000100111010101', '100001011011110000', '100010100010111010', '100011011110011111', '100100101100001011', '100101010000101110', '100110101001100100', '100111010101000001', '101000110001101001' ] ================================================ FILE: video_downloader/MyQR/mylibs/data.py ================================================ # -*- coding: utf-8 -*- from MyQR.mylibs.constant import char_cap, required_bytes, mindex, lindex, num_list, alphanum_list, grouping_list, mode_indicator # ecl: Error Correction Level(L,M,Q,H) def encode(ver, ecl, str): mode_encoding = { 'numeric': numeric_encoding, 'alphanumeric': alphanumeric_encoding, 'byte': byte_encoding, 'kanji': kanji_encoding } ver, mode = analyse(ver, ecl, str) # print('line 16: mode:', mode) code = mode_indicator[mode] + get_cci(ver, mode, str) + mode_encoding[mode](str) # Add a Terminator rqbits = 8 * required_bytes[ver-1][lindex[ecl]] b = rqbits - len(code) code += '0000' if b >= 4 else '0' * b # Make the Length a Multiple of 8 while len(code) % 8 != 0: code += '0' # Add Pad Bytes if the String is Still too Short while len(code) < rqbits: code += '1110110000010001' if rqbits - len(code) >= 16 else '11101100' data_code = [code[i:i+8] for i in range(len(code)) if i%8 == 0] data_code = [int(i,2) for i in data_code] g = grouping_list[ver-1][lindex[ecl]] data_codewords, i = [], 0 for n in range(g[0]): data_codewords.append(data_code[i:i+g[1]]) i += g[1] for n in range(g[2]): data_codewords.append(data_code[i:i+g[3]]) i += g[3] return ver, data_codewords def analyse(ver, ecl, str): if all(i in num_list for i in str): mode = 'numeric' elif all(i in alphanum_list for i in str): mode = 'alphanumeric' else: mode = 'byte' m = mindex[mode] l = len(str) for i in range(40): if char_cap[ecl][i][m] > l: ver = i + 1 if i+1 > ver else ver break return ver, mode def numeric_encoding(str): str_list = [str[i:i+3] for i in range(0,len(str),3)] code = '' for i in str_list: rqbin_len = 10 if len(i) == 1: rqbin_len = 4 elif len(i) == 2: rqbin_len = 7 code_temp = bin(int(i))[2:] code += ('0'*(rqbin_len - len(code_temp)) + code_temp) return code def alphanumeric_encoding(str): code_list = [alphanum_list.index(i) for i in str] code = '' for i in range(1, len(code_list), 2): c = bin(code_list[i-1] * 45 + code_list[i])[2:] c = '0'*(11-len(c)) + c code += c if i != len(code_list) - 1: c = bin(code_list[-1])[2:] c = '0'*(6-len(c)) + c code += c return code def byte_encoding(str): code = '' for i in str: c = bin(ord(i.encode('iso-8859-1')))[2:] c = '0'*(8-len(c)) + c code += c return code def kanji_encoding(str): pass # cci: character count indicator def get_cci(ver, mode, str): if 1 <= ver <= 9: cci_len = (10, 9, 8, 8)[mindex[mode]] elif 10 <= ver <= 26: cci_len = (12, 11, 16, 10)[mindex[mode]] else: cci_len = (14, 13, 16, 12)[mindex[mode]] cci = bin(len(str))[2:] cci = '0' * (cci_len - len(cci)) + cci return cci if __name__ == '__main__': s = '123456789' v, datacode = encode(1, 'H', s) print(v, datacode) ================================================ FILE: video_downloader/MyQR/mylibs/draw.py ================================================ # -*- coding: utf-8 -*- from PIL import Image import os def draw_qrcode(abspath, qrmatrix): unit_len = 3 x = y = 4*unit_len pic = Image.new('1', [(len(qrmatrix)+8)*unit_len]*2, 'white') for line in qrmatrix: for module in line: if module: draw_a_black_unit(pic, x, y, unit_len) x += unit_len x, y = 4*unit_len, y+unit_len saving = os.path.join(abspath, 'qrcode.png') pic.save(saving) return saving def draw_a_black_unit(p, x, y, ul): for i in range(ul): for j in range(ul): p.putpixel((x+i, y+j), 0) ================================================ FILE: video_downloader/MyQR/mylibs/matrix.py ================================================ # -*- coding: utf-8 -*- from MyQR.mylibs.constant import alig_location, format_info_str, version_info_str, lindex def get_qrmatrix(ver, ecl, bits): num = (ver - 1) * 4 + 21 qrmatrix = [[None] * num for i in range(num)] # [([None] * num * num)[i:i+num] for i in range(num * num) if i % num == 0] # Add the Finder Patterns & Add the Separators add_finder_and_separator(qrmatrix) # Add the Alignment Patterns add_alignment(ver, qrmatrix) # Add the Timing Patterns add_timing(qrmatrix) # Add the Dark Module and Reserved Areas add_dark_and_reserving(ver, qrmatrix) maskmatrix = [i[:] for i in qrmatrix] # Place the Data Bits place_bits(bits, qrmatrix) # Data Masking mask_num, qrmatrix = mask(maskmatrix, qrmatrix) # Format Information add_format_and_version_string(ver, ecl, mask_num, qrmatrix) return qrmatrix def add_finder_and_separator(m): for i in range(8): for j in range(8): if i in (0, 6): m[i][j] = m[-i-1][j] = m[i][-j-1] = 0 if j == 7 else 1 elif i in (1, 5): m[i][j] = m[-i-1][j] = m[i][-j-1] = 1 if j in (0, 6) else 0 elif i == 7: m[i][j] = m[-i-1][j] = m[i][-j-1] = 0 else: m[i][j] = m[-i-1][j] = m[i][-j-1] = 0 if j in (1, 5, 7) else 1 def add_alignment(ver, m): if ver > 1: coordinates = alig_location[ver-2] for i in coordinates: for j in coordinates: if m[i][j] is None: add_an_alignment(i, j, m) def add_an_alignment(row, column, m): for i in range(row-2, row+3): for j in range(column-2, column+3): m[i][j] = 1 if i in (row-2, row+2) or j in (column-2, column+2) else 0 m[row][column] = 1 def add_timing(m): for i in range(8, len(m)-8): m[i][6] = m[6][i] = 1 if i % 2 ==0 else 0 def add_dark_and_reserving(ver, m): for j in range(8): m[8][j] = m[8][-j-1] = m[j][8] = m[-j-1][8] = 0 m[8][8] = 0 m[8][6] = m[6][8] = m[-8][8] = 1 if ver > 6: for i in range(6): for j in (-9, -10, -11): m[i][j] = m[j][i] = 0 def place_bits(bits, m): bit = (int(i) for i in bits) up = True for a in range(len(m)-1, 0, -2): a = a-1 if a <= 6 else a irange = range(len(m)-1, -1, -1) if up else range(len(m)) for i in irange: for j in (a, a-1): if m[i][j] is None: m[i][j] = next(bit) up = not up def mask(mm, m): mps = get_mask_patterns(mm) scores = [] for mp in mps: for i in range(len(mp)): for j in range(len(mp)): mp[i][j] = mp[i][j] ^ m[i][j] scores.append(compute_score(mp)) best = scores.index(min(scores)) return best, mps[best] def get_mask_patterns(mm): def formula(i, row, column): if i == 0: return (row + column) % 2 == 0 elif i == 1: return row % 2 == 0 elif i == 2: return column % 3 == 0 elif i == 3: return (row + column) % 3 == 0 elif i == 4: return (row // 2 + column // 3) % 2 == 0 elif i == 5: return ((row * column) % 2) + ((row * column) % 3) == 0 elif i == 6: return (((row * column) % 2) + ((row * column) % 3)) % 2 == 0 elif i == 7: return (((row + column) % 2) + ((row * column) % 3)) % 2 == 0 mm[-8][8] = None for i in range(len(mm)): for j in range(len(mm)): mm[i][j] = 0 if mm[i][j] is not None else mm[i][j] mps = [] for i in range(8): mp = [ii[:] for ii in mm] for row in range(len(mp)): for column in range(len(mp)): mp[row][column] = 1 if mp[row][column] is None and formula(i, row, column) else 0 mps.append(mp) return mps def compute_score(m): def evaluation1(m): def ev1(ma): sc = 0 for mi in ma: j = 0 while j < len(mi)-4: n = 4 while mi[j:j+n+1] in [[1]*(n+1), [0]*(n+1)]: n += 1 (sc, j) = (sc+n-2, j+n) if n > 4 else (sc, j+1) return sc return ev1(m) + ev1(list(map(list, zip(*m)))) def evaluation2(m): sc = 0 for i in range(len(m)-1): for j in range(len(m)-1): sc += 3 if m[i][j] == m[i+1][j] == m[i][j+1] == m[i+1][j+1] else 0 return sc def evaluation3(m): def ev3(ma): sc = 0 for mi in ma: j = 0 while j < len(mi)-10: if mi[j:j+11] == [1,0,1,1,1,0,1,0,0,0,0]: sc += 40 j += 7 elif mi[j:j+11] == [0,0,0,0,1,0,1,1,1,0,1]: sc += 40 j += 4 else: j += 1 return sc return ev3(m) + ev3(list(map(list, zip(*m)))) def evaluation4(m): darknum = 0 for i in m: darknum += sum(i) percent = darknum / (len(m)**2) * 100 s = int((50 - percent) / 5) * 5 return 2*s if s >=0 else -2*s score = evaluation1(m) + evaluation2(m)+ evaluation3(m) + evaluation4(m) return score def add_format_and_version_string(ver, ecl, mask_num, m): fs = [int(i) for i in format_info_str[lindex[ecl]][mask_num]] for j in range(6): m[8][j] = m[-j-1][8] = fs[j] m[8][-j-1] = m[j][8] = fs[-j-1] m[8][7] = m[-7][8] = fs[6] m[8][8] = m[8][-8] = fs[7] m[7][8] = m[8][-7] = fs[8] if ver > 6: vs = (int(i) for i in version_info_str[ver-7]) for j in range(5, -1, -1): for i in (-9, -10, -11): m[i][j] = m[j][i] = next(vs) ================================================ FILE: video_downloader/MyQR/mylibs/structure.py ================================================ # -*- coding: utf-8 -*- from MyQR.mylibs.constant import required_remainder_bits, lindex, grouping_list def structure_final_bits(ver, ecl, data_codewords, ecc): final_message = interleave_dc(ver, ecl, data_codewords) + interleave_ecc(ecc) # convert to binary & Add Remainder Bits if Necessary final_bits = ''.join(['0'*(8-len(i))+i for i in [bin(i)[2:] for i in final_message]]) + '0' * required_remainder_bits[ver-1] return final_bits def interleave_dc(ver, ecl, data_codewords): id = [] for t in zip(*data_codewords): id += list(t) g = grouping_list[ver-1][lindex[ecl]] if g[3]: for i in range(g[2]): id.append(data_codewords[i-g[2]][-1]) return id def interleave_ecc(ecc): ie = [] for t in zip(*ecc): ie += list(t) return ie ================================================ FILE: video_downloader/MyQR/mylibs/theqrmodule.py ================================================ # -*- coding: utf-8 -*- from MyQR.mylibs import data, ECC, structure, matrix, draw # ver: Version from 1 to 40 # ecl: Error Correction Level (L,M,Q,H) # get a qrcode picture of 3*3 pixels per module def get_qrcode(ver, ecl, str, save_place): # Data Coding ver, data_codewords = data.encode(ver, ecl, str) # Error Correction Coding ecc = ECC.encode(ver, ecl, data_codewords) # Structure final bits final_bits = structure.structure_final_bits(ver, ecl, data_codewords, ecc) # Get the QR Matrix qrmatrix = matrix.get_qrmatrix(ver, ecl, final_bits) # Draw the picture and Save it, then return the real ver and the absolute name return ver, draw.draw_qrcode(save_place, qrmatrix) ================================================ FILE: video_downloader/MyQR/myqr.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import os from MyQR.mylibs import theqrmodule from PIL import Image # Positional parameters # words: str # # Optional parameters # version: int, from 1 to 40 # level: str, just one of ('L','M','Q','H') # picutre: str, a filename of a image # colorized: bool # constrast: float # brightness: float # save_name: str, the output filename like 'example.png' # save_dir: str, the output directory # # See [https://github.com/sylnsfar/qrcode] for more details! def run(words, version=1, level='H', picture=None, colorized=False, contrast=1.0, brightness=1.0, save_name=None, save_dir=os.getcwd()): supported_chars = r"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ··,.:;+-*/\~!@#$%^&`'=<>[]()?_{}|" # check every parameter if not isinstance(words, str) or any(i not in supported_chars for i in words): raise ValueError('Wrong words! Make sure the characters are supported!') if not isinstance(version, int) or version not in range(1, 41): raise ValueError('Wrong version! Please choose a int-type value from 1 to 40!') if not isinstance(level, str) or len(level)>1 or level not in 'LMQH': raise ValueError("Wrong level! Please choose a str-type level from {'L','M','Q','H'}!") if picture: if not isinstance(picture, str) or not os.path.isfile(picture) or picture[-4:] not in ('.jpg','.png','.bmp','.gif'): raise ValueError("Wrong picture! Input a filename that exists and be tailed with one of {'.jpg', '.png', '.bmp', '.gif'}!") if picture[-4:] == '.gif' and save_name and save_name[-4:] != '.gif': raise ValueError('Wrong save_name! If the picuter is .gif format, the output filename should be .gif format, too!') if not isinstance(colorized, bool): raise ValueError('Wrong colorized! Input a bool-type value!') if not isinstance(contrast, float): raise ValueError('Wrong contrast! Input a float-type value!') if not isinstance(brightness, float): raise ValueError('Wrong brightness! Input a float-type value!') if save_name and (not isinstance(save_name, str) or save_name[-4:] not in ('.jpg','.png','.bmp','.gif')): raise ValueError("Wrong save_name! Input a filename tailed with one of {'.jpg', '.png', '.bmp', '.gif'}!") if not os.path.isdir(save_dir): raise ValueError('Wrong save_dir! Input a existing-directory!') def combine(ver, qr_name, bg_name, colorized, contrast, brightness, save_dir, save_name=None): from MyQR.mylibs.constant import alig_location from PIL import ImageEnhance, ImageFilter qr = Image.open(qr_name) qr = qr.convert('RGBA') if colorized else qr bg0 = Image.open(bg_name).convert('RGBA') bg0 = ImageEnhance.Contrast(bg0).enhance(contrast) bg0 = ImageEnhance.Brightness(bg0).enhance(brightness) if bg0.size[0] < bg0.size[1]: bg0 = bg0.resize((qr.size[0]-24, (qr.size[0]-24)*int(bg0.size[1]/bg0.size[0]))) else: bg0 = bg0.resize(((qr.size[1]-24)*int(bg0.size[0]/bg0.size[1]), qr.size[1]-24)) bg = bg0 if colorized else bg0.convert('1') aligs = [] if ver > 1: aloc = alig_location[ver-2] for a in range(len(aloc)): for b in range(len(aloc)): if not ((a==b==0) or (a==len(aloc)-1 and b==0) or (a==0 and b==len(aloc)-1)): for i in range(3*(aloc[a]-2), 3*(aloc[a]+3)): for j in range(3*(aloc[b]-2), 3*(aloc[b]+3)): aligs.append((i,j)) for i in range(qr.size[0]-24): for j in range(qr.size[1]-24): if not ((i in (18,19,20)) or (j in (18,19,20)) or (i<24 and j<24) or (i<24 and j>qr.size[1]-49) or (i>qr.size[0]-49 and j<24) or ((i,j) in aligs) or (i%3==1 and j%3==1) or (bg0.getpixel((i,j))[3]==0)): qr.putpixel((i+12,j+12), bg.getpixel((i,j))) qr_name = os.path.join(save_dir, os.path.splitext(os.path.basename(bg_name))[0] + '_qrcode.png') if not save_name else os.path.join(save_dir, save_name) qr.resize((qr.size[0]*3, qr.size[1]*3)).save(qr_name) return qr_name tempdir = os.path.join(os.path.expanduser('~'), '.myqr') try: if not os.path.exists(tempdir): os.makedirs(tempdir) ver, qr_name = theqrmodule.get_qrcode(version, level, words, tempdir) if picture and picture[-4:]=='.gif': import imageio im = Image.open(picture) duration = im.info.get('duration', 0) im.save(os.path.join(tempdir, '0.png')) while True: try: seq = im.tell() im.seek(seq + 1) im.save(os.path.join(tempdir, '%s.png' %(seq+1))) except EOFError: break imsname = [] for s in range(seq+1): bg_name = os.path.join(tempdir, '%s.png' % s) imsname.append(combine(ver, qr_name, bg_name, colorized, contrast, brightness, tempdir)) ims = [imageio.imread(pic) for pic in imsname] qr_name = os.path.join(save_dir, os.path.splitext(os.path.basename(picture))[0] + '_qrcode.gif') if not save_name else os.path.join(save_dir, save_name) imageio.mimwrite(qr_name, ims, '.gif', **{ 'duration': duration/1000 }) elif picture: qr_name = combine(ver, qr_name, picture, colorized, contrast, brightness, save_dir, save_name) elif qr_name: qr = Image.open(qr_name) qr_name = os.path.join(save_dir, os.path.basename(qr_name)) if not save_name else os.path.join(save_dir, save_name) qr.resize((qr.size[0]*3, qr.size[1]*3)).save(qr_name) return ver, level, qr_name except: raise finally: import shutil if os.path.exists(tempdir): shutil.rmtree(tempdir) ================================================ FILE: video_downloader/MyQR/terminal.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- from MyQR.myqr import run import os def main(): import argparse argparser = argparse.ArgumentParser() argparser.add_argument('Words', help = 'The words to produce you QR-code picture, like a URL or a sentence. Please read the README file for the supported characters.') argparser.add_argument('-v', '--version', type = int, choices = range(1,41), default = 1, help = 'The version means the length of a side of the QR-Code picture. From little size to large is 1 to 40.') argparser.add_argument('-l', '--level', choices = list('LMQH'), default = 'H', help = 'Use this argument to choose an Error-Correction-Level: L(Low), M(Medium) or Q(Quartile), H(High). Otherwise, just use the default one: H') argparser.add_argument('-p', '--picture', help = 'the picture e.g. example.jpg') argparser.add_argument('-c', '--colorized', action = 'store_true', help = "Produce a colorized QR-Code with your picture. Just works when there is a correct '-p' or '--picture'.") argparser.add_argument('-con', '--contrast', type = float, default = 1.0, help = 'A floating point value controlling the enhancement of contrast. Factor 1.0 always returns a copy of the original image, lower factors mean less color (brightness, contrast, etc), and higher values more. There are no restrictions on this value. Default: 1.0') argparser.add_argument('-bri', '--brightness', type = float, default = 1.0, help = 'A floating point value controlling the enhancement of brightness. Factor 1.0 always returns a copy of the original image, lower factors mean less color (brightness, contrast, etc), and higher values more. There are no restrictions on this value. Default: 1.0') argparser.add_argument('-n', '--name', help = "The filename of output tailed with one of {'.jpg', '.png', '.bmp', '.gif'}. eg. exampl.png") argparser.add_argument('-d', '--directory', default = os.getcwd(), help = 'The directory of output.') args = argparser.parse_args() if args.picture and args.picture[-4:]=='.gif': print('It may take a while, please wait for minutes...') try: ver, ecl, qr_name = run( args.Words, args.version, args.level, args.picture, args.colorized, args.contrast, args.brightness, args.name, args.directory ) print('Succeed! \nCheck out your', str(ver) + '-' + str(ecl), 'QR-code:', qr_name) except: raise ================================================ FILE: video_downloader/requirements.txt ================================================ imageio numpy Pillow beautifulsoup4 ================================================ FILE: video_downloader/video_downloader.py ================================================ # -*- coding:utf-8 -*- from tkinter.filedialog import askdirectory from MyQR.myqr import run from urllib import request, parse from bs4 import BeautifulSoup import tkinter.messagebox as msgbox import tkinter as tk import webbrowser import re import json import os import types import requests import time """ 类说明:爱奇艺、优酷等实现在线观看以及视频下载的类 Parameters: width - tkinter主界面宽 height - tkinter主界面高 Returns: 无 Modify: 2017-05-09 """ class APP: def __init__(self, width = 500, height = 300): self.w = width self.h = height self.title = ' VIP视频破解助手' self.root = tk.Tk(className=self.title) self.url = tk.StringVar() self.v = tk.IntVar() self.v.set(1) #Frame空间 frame_1 = tk.Frame(self.root) frame_2 = tk.Frame(self.root) frame_3 = tk.Frame(self.root) #Menu菜单 menu = tk.Menu(self.root) self.root.config(menu = menu) filemenu = tk.Menu(menu,tearoff=0) moviemenu = tk.Menu(menu,tearoff = 0) menu.add_cascade(label = '菜单', menu = filemenu) menu.add_cascade(label = '友情链接', menu = moviemenu) filemenu.add_command(label = '使用说明',command = lambda :webbrowser.open('http://blog.csdn.net/c406495762/article/details/71334633')) filemenu.add_command(label = '关于作者',command = lambda :webbrowser.open('http://blog.csdn.net/c406495762')) filemenu.add_command(label = '退出',command = self.root.quit) #各个网站链接 moviemenu.add_command(label = '网易公开课',command = lambda :webbrowser.open('http://open.163.com/')) moviemenu.add_command(label = '腾讯视频',command = lambda :webbrowser.open('http://v.qq.com/')) moviemenu.add_command(label = '搜狐视频',command = lambda :webbrowser.open('http://tv.sohu.com/')) moviemenu.add_command(label = '芒果TV',command = lambda :webbrowser.open('http://www.mgtv.com/')) moviemenu.add_command(label = '爱奇艺',command = lambda :webbrowser.open('http://www.iqiyi.com/')) moviemenu.add_command(label = 'PPTV',command = lambda :webbrowser.open('http://www.bilibili.com/')) moviemenu.add_command(label = '优酷',command = lambda :webbrowser.open('http://www.youku.com/')) moviemenu.add_command(label = '乐视',command = lambda :webbrowser.open('http://www.le.com/')) moviemenu.add_command(label = '土豆',command = lambda :webbrowser.open('http://www.tudou.com/')) moviemenu.add_command(label = 'A站',command = lambda :webbrowser.open('http://www.acfun.tv/')) moviemenu.add_command(label = 'B站',command = lambda :webbrowser.open('http://www.bilibili.com/')) #控件内容设置 group = tk.Label(frame_1,text = '请选择一个视频播放通道:', padx = 10, pady = 10) tb1 = tk.Radiobutton(frame_1,text = '通道一', variable = self.v, value = 1, width = 10, height = 3) tb2 = tk.Radiobutton(frame_1,text = '通道二', variable = self.v, value = 2, width = 10, height = 3) label1 = tk.Label(frame_2, text = "请输入视频链接:") entry = tk.Entry(frame_2, textvariable = self.url, highlightcolor = 'Fuchsia', highlightthickness = 1,width = 35) label2 = tk.Label(frame_2, text = " ") play = tk.Button(frame_2, text = "播放", font = ('楷体',12), fg = 'Purple', width = 2, height = 1, command = self.video_play) label3 = tk.Label(frame_2, text = " ") # download = tk.Button(frame_2, text = "下载", font = ('楷体',12), fg = 'Purple', width = 2, height = 1, command = self.download_wmxz) QR_Code = tk.Button(frame_3, text = "手机观看", font = ('楷体',12), fg = 'Purple', width = 10, height = 2, command = self.QR_Code) label_explain = tk.Label(frame_3, fg = 'red', font = ('楷体',12), text = '\n注意:支持大部分主流视频网站的视频播放!\n此软件仅用于交流学习,请勿用于任何商业用途!') label_warning = tk.Label(frame_3, fg = 'blue', font = ('楷体',12),text = '\n建议:将Chrome内核浏览器设置为默认浏览器\n作者:Jack_Cui') #控件布局 frame_1.pack() frame_2.pack() frame_3.pack() group.grid(row = 0, column = 0) tb1.grid(row = 0, column = 1) tb2.grid(row = 0, column = 2) label1.grid(row = 0, column = 0) entry.grid(row = 0, column = 1) label2.grid(row = 0, column = 2) play.grid(row = 0, column = 3,ipadx = 10, ipady = 10) label3.grid(row = 0, column = 4) # download.grid(row = 0, column = 5,ipadx = 10, ipady = 10) QR_Code.grid(row = 0, column = 0) label_explain.grid(row = 1, column = 0) label_warning.grid(row = 2, column = 0) """ 函数说明:jsonp解析 Parameters: _jsonp - jsonp字符串 Returns: _json - json格式数据 Modify: 2017-05-11 """ def loads_jsonp(self, _jsonp): try: _json = json.loads(re.match(".*?({.*}).*",_jsonp,re.S).group(1)) return _json except: raise ValueError('Invalid Input') """ 函数说明:视频播放 Parameters: self Returns: 无 Modify: 2017-05-09 """ def video_play(self): #视频解析网站地址 port_1 = 'http://www.wmxz.wang/video.php?url=' port_2 = 'http://www.vipjiexi.com/tong.php?url=' #正则表达是判定是否为合法链接 if re.match(r'^https?:/{2}\w.+$', self.url.get()): if self.v.get() == 1: #视频链接获取 ip = self.url.get() #视频链接加密 ip = parse.quote_plus(ip) #浏览器打开 webbrowser.open(port_1 + self.url.get()) elif self.v.get() == 2: #链接获取 ip = self.url.get() #链接加密 ip = parse.quote_plus(ip) #获取time、key、url get_url = 'http://www.vipjiexi.com/x2/tong.php?url=%s' % ip # get_url_head = { # 'User-Agent':'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19', # 'Referer':'http://www.vipjiexi.com/', # } # get_url_req = request.Request(url = get_url, headers = get_url_head) # get_url_response = request.urlopen(get_url_req) # get_url_html = get_url_response.read().decode('utf-8') # bf = BeautifulSoup(get_url_html, 'lxml') # a = str(bf.find_all('script')) # pattern = re.compile('"api.php", {"time":"(\d+)", "key": "(.+)", "url": "(.+)","type"', re.IGNORECASE) # string = pattern.findall(a) # now_time = string[0][0] # now_key = string[0][1] # now_url = string[0][2] # #请求播放,获取Success = 1 # get_movie_url = 'http://www.vipjiexi.com/x2/api.php' # get_movie_data = { # 'key':'%s' % now_key, # 'time':'%s' % now_time, # 'type':'', # 'url':'%s' % now_url # } # get_movie_head = { # 'User-Agent':'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19', # 'Referer':'http://www.vipjiexi.com/x2/tong.php?', # 'url':'%s' % ip, # } # get_movie_req = request.Request(url = get_movie_url, headers = get_movie_head) # get_movie_data = parse.urlencode(get_movie_data).encode('utf-8') # get_movie_response = request.urlopen(get_movie_req, get_movie_data) #请求之后立刻打开 webbrowser.open(get_url) else: msgbox.showerror(title='错误',message='视频链接地址无效,请重新输入!') """ 函数说明:视频下载,通过无名小站抓包(已经无法使用) Parameters: self Returns: 无 Modify: 2017-06-15 """ def download_wmxz(self): if re.match(r'^https?:/{2}\w.+$', self.url.get()): #视频链接获取 ip = self.url.get() #视频链接加密 ip = parse.quote_plus(ip) #获取保存视频的url get_url = 'http://www.sfsft.com/index.php?url=%s' % ip head = { 'User-Agent':'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19', 'Referer':'http://www.sfsft.com/index.php?url=%s' % ip } get_url_req = request.Request(url = get_url, headers = head) get_url_response = request.urlopen(get_url_req) get_url_html = get_url_response.read().decode('utf-8') bf = BeautifulSoup(get_url_html, 'lxml') a = str(bf.find_all('script')) pattern = re.compile("url : '(.+)',", re.IGNORECASE) url = pattern.findall(a)[0] #获取视频地址 get_movie_url = 'http://www.sfsft.com/api.php' get_movie_data = { 'up':'0', 'url':'%s' % url, } get_movie_req = request.Request(url = get_movie_url, headers = head) get_movie_data = parse.urlencode(get_movie_data).encode('utf-8') get_movie_response = request.urlopen(get_movie_req, get_movie_data) get_movie_html = get_movie_response.read().decode('utf-8') get_movie_data = json.loads(get_movie_html) webbrowser.open(get_movie_data['url']) else: msgbox.showerror(title='错误',message='视频链接地址无效,请重新输入!') """ 函数说明:生成二维码,手机观看 Parameters: self Returns: 无 Modify: 2017-05-12 """ def QR_Code(self): if re.match(r'^https?:/{2}\w.+$', self.url.get()): #视频链接获取 ip = self.url.get() #视频链接加密 ip = parse.quote_plus(ip) url = 'http://www.wmxz.wang/video.php?url=%s' % ip words = url images_pwd = os.getcwd() + '\Images\\' png_path = images_pwd + 'bg.png' qr_name = 'qrcode.png' qr_path = images_pwd + 'qrcode.png' run(words = words, picture = png_path, save_name = qr_name, save_dir = images_pwd) top = tk.Toplevel(self.root) img = tk.PhotoImage(file = qr_path) text_label = tk.Label(top, fg = 'red', font = ('楷体',15), text = "手机浏览器扫描二维码,在线观看视频!") img_label = tk.Label(top, image = img) text_label.pack() img_label.pack() top.mainloop() else: msgbox.showerror(title='错误',message='视频链接地址无效,请重新输入!') """ 函数说明:tkinter窗口居中 Parameters: self Returns: 无 Modify: 2017-05-09 """ def center(self): ws = self.root.winfo_screenwidth() hs = self.root.winfo_screenheight() x = int( (ws/2) - (self.w/2) ) y = int( (hs/2) - (self.h/2) ) self.root.geometry('{}x{}+{}+{}'.format(self.w, self.h, x, y)) """ 函数说明:loop等待用户事件 Parameters: self Returns: 无 Modify: 2017-05-09 """ def loop(self): self.root.resizable(False, False) #禁止修改窗口大小 self.center() #窗口居中 self.root.mainloop() if __name__ == '__main__': app = APP() #实例化APP对象 app.loop() #loop等待用户事件 ================================================ FILE: zhengfang_system_spider/README.md ================================================ # ZhengFang_System_Spider 对正方教务管理系统的个人课表,个人学生成绩,绩点等简单爬取 ## 依赖环境 python 3.6 ### python库 http请求:requests,urllib 数据提取:re,lxml,bs4 存储相关:os,sys 验证码处理:PIL ## 下载安装 在终端输入如下命令: ```bash git clone git@github.com:Jack-Cherish/python-spider.git ``` ## 使用方法 ### 安装依赖包 ```bash pip install -r requirements.txt ``` ### 运行 在当前目录下输入: ``` cd zhengfang_system_spider python spider.py ``` 运行爬虫,按提示输入学校教务网,学号,密码,输入验证码 ![运行时](/zhengfang_system_spider/screenshot/spider.png) 稍等几秒钟,当前ZhengFang_System_Spider文件夹下就会生成zhengfang.txt 个人课表,成绩绩点均已保存到该文本文件中 ![结果](/zhengfang_system_spider/screenshot/zf.png) ================================================ FILE: zhengfang_system_spider/requirements.txt ================================================ lxml==4.6.3 requests==2.20.0 Pillow>=6.2.2 beautifulsoup4==4.6.0 ================================================ FILE: zhengfang_system_spider/spider.py ================================================ #!/usr/bin/env python #-*- coding: utf-8 -*- __author__ = 'ZYSzys' import requests import re import os import sys import urllib import getpass from lxml import etree from PIL import Image from imp import reload from bs4 import BeautifulSoup class Who: def __init__(self, user, pswd): self.user = user self.pswd = pswd class Tool: rma = re.compile('|') rmtb = re.compile('
|
|
') rmtr = re.compile('|||||') rmtime1 = re.compile('.*?') rmtime2 = re.compile('.*?') def replace(self, x): x = re.sub(self.rma, ' ', x) x = re.sub(self.rmtb, '---', x) x = re.sub(self.rmtr, ' ', x) x = re.sub(self.rmtime1, '\n', x) x = re.sub(self.rmtime2, '', x) return x.strip() def Getgrade(response): html = response.content soup = BeautifulSoup(html, 'lxml') trs = soup.find(id="Datagrid1").findAll("tr") Grades = [] keys = [] tds = trs[0].findAll("td") tds = tds[:2] + tds[3:5] + tds[6:9] for td in tds: keys.append(td.string) for tr in trs[1:]: tds = tr.findAll("td") tds = tds[:2] + tds[3:5] + tds[6:9] values = [] for td in tds: values.append(td.string) one = dict((key, value) for key, value in zip(keys, values)) Grades.append(one) return Grades def Getgradetestresults(trs): results = [] k = [] for td in trs[0].xpath('.//td/text()'): k.append(td) trs = trs[1:] for tr in trs: tds = tr.xpath('.//td/text()') v = [] for td in tds: v.append(td) one = dict((i, j) for i, j in zip(k, v)) results.append(one) return results class University: def __init__(self, student, baseurl): reload(sys) self.student = student self.baseurl = baseurl self.session = requests.session() self.session.headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36' def Login(self): url = self.baseurl+'/default2.aspx' res = self.session.get(url) cont = res.content selector = etree.HTML(cont) __VIEWSTATE = selector.xpath('//*[@id="form1"]/input/@value')[0] imgurl = self.baseurl + '/CheckCode.aspx' imgres = self.session.get(imgurl, stream=True) img = imgres.content with open('code.jpg', 'wb') as f: f.write(img) jpg = Image.open('{}/code.jpg'.format(os.getcwd())) jpg.show() jpg.close code = input('输入验证码:') RadioButtonList1 = u"学生" data = { "__VIEWSTATE": __VIEWSTATE, "txtUserName": self.student.user, "TextBox1": self.student.pswd, "TextBox2": self.student.pswd, "txtSecretCode": code, "RadioButtonList1": RadioButtonList1, "Button1": "", "lbLanguage": "" } loginres = self.session.post(url, data=data) logcont = loginres.text pattern = re.compile( '

', re.S) res = re.findall(pattern, logcont) try: if res[0][17:29] == self.student.user: print('Login succeed!') except: print('Login failed! Maybe Wrong password ! ! !') return pattern = re.compile('(.*?)') xhxm = re.findall(pattern, logcont) name = xhxm[0].replace('同学', '') self.student.urlname = urllib.parse.quote_plus(str(name)) return True def GetClass(self): self.session.headers['Referer'] = self.baseurl + \ '/xs_main.aspx?xh=' + self.student.user kburl = self.baseurl + '/xskbcx.aspx?xh='+self.student.user + \ '&xm='+self.student.urlname+'&gnmkdm=N121603' kbresponse = self.session.get(kburl) kbcont = kbresponse.text pattern = re.compile('(.*?)', re.S) contents = re.findall(pattern, kbcont) tool = Tool() f = open(os.getcwd()+'/zhengfang.txt', 'w') f.write(u'本学期课表:'+'\n') cnt = 1 l = [u'周一', u'周二', u'周三', u'周四', u'周五', u'周六', u'周日'] for day in l: for i in contents: if u'星期' in i: continue elif u'第' in i: if day in i: con = tool.replace(i) f.write(str(cnt)+':\t'+con+'\n') cnt += 1 else: continue f.write('\n') f.close() print('Download class succeed!') def GetGrade(self): self.session.headers['Referer'] = self.baseurl + \ '/xs_main.aspx?xh=' + self.student.user gradeurl = self.baseurl + '/xscjcx.aspx?xh='+self.student.user + \ '&xm='+self.student.urlname+'&gnmkdm=N121605' graderesponse = self.session.get(gradeurl) gradecont = graderesponse.content soup = BeautifulSoup(gradecont, 'lxml') __VIEWSTATE = soup.findAll(name="input")[2]["value"] self.session.headers['Referer'] = gradeurl data = { "__EVENTTARGET": "", "__EVENTARGUMENT": "", "__VIEWSTATE": __VIEWSTATE, "hidLanguage": "", "ddlXN": "", "ddlXQ": "", "ddl_kcxz": "", "btn_zcj": u'历年成绩' } grares = self.session.post(gradeurl, data=data) grades = Getgrade(grares) totup = 0 totdown = 0 f = open(os.getcwd()+'/zhengfang.txt', 'a+') f.write('\n\n\n'+u'历年成绩:'+'\n') for i in grades[0]: f.write('%-13s\t' % i) f.write('\n') for each in grades: for one in each: f.write('%-15s\t' % each[one]) f.write('\n') totup = totup + float(each[u'绩点']) * float(each[u'学分']) totdown = totdown + float(each[u'学分']) f.write('\n'+u'平均绩点: '+'%.2f\t\t\t' % (totup / totdown) + u'总学分绩点: '+'%.2f\t\t\t' % totup + u'总学分: '+'%.2f\n' % totdown) f.close() print('Download grade succeed!') def GradeTestResults(self): self.session.headers['Referer'] = self.baseurl + \ '/xs_main.aspx?xh=' + self.student.user gtrurl = self.baseurl + '/xsdjkscx.aspx?xh='+self.student.user + \ '&xm='+self.student.urlname+'&gnmkdm=N121606' gtrresponse = self.session.get(gtrurl) gtrcontent = gtrresponse.text gtrhtml = etree.HTML(gtrcontent) trs = gtrhtml.xpath('//table[@class="datelist"]/tr') f = open(os.getcwd()+'/zhengfang.txt', 'a+') f.write('\n\n\n'+u'等级考试成绩:'+'\n') results = Getgradetestresults(trs) for one in results[0]: f.write('%-10s\t' % one) f.write('\n') for each in results: for one in each: f.write('%-10s\t' % each[one]) f.write('\n') f.close() print('Download grade test results succeed!') if __name__ == "__main__": url = input("学校教务网站(如http://115.236.84.162):") user = input("学号:") pswd = getpass.getpass("密码:") who = Who(user, pswd) univ = University(who, url) if univ.Login(): univ.GetClass() univ.GetGrade() univ.GradeTestResults() ================================================ FILE: zhengfang_system_spider/zhengfang.txt ================================================ 本学期课表: 1: 电工电子技术基础AⅡ---周一第1,2节{第2-16周|双周}---郜园园/章云(章云,郜园园)---学10609(实验室) 2: 计算机网络A---周一第3,4节{第2-16周|双周}---吴晓平(吴晓平)---学10311(实验室) 3: 数据库原理与技术B---周一第6,7节{第1-12周}---刘丽娟(刘丽娟)---学1502(智慧教室) 4: 数据库原理与技术B---周一第8节{第1-12周}---刘丽娟(刘丽娟)---学1502(智慧教室) 5: 数据库原理与技术B---周二第1,2节{第1-16周}---刘丽娟(刘丽娟)---学10309(实验室) 6: 计算机网络A---周二第3,4,5节{第1-16周}---吴晓平(吴晓平)---教1512(多媒体) 7: J2EE程序设计---周二第6,7节{第1-16周}---陈文辉(陈文辉)---教5402(多媒体) 8: 大学体育(篮球)---周三第3,4节{第1-17周}---田晓鹏---东湖风雨操场 9: J2EE程序设计---周三第6,7节{第1-16周}---陈文辉(陈文辉)---学10309(实验室) 10: 毛泽东思想和中国特色社会主义理论体系概论---周三第8,9节{第2-16周}---张国泉---教1401(多媒体)---2018年06月30日(10:20-11:10)---学10203(实验室) 11: 中国文化英语---周四第1,2节{第1-16周}---陈献---教5302(多媒体) 12: 电工电子技术基础AⅡ---周四第3,4,5节{第1-16周}---郜园园/章云(章云,郜园园)---教5403(多媒体) 13: 物联网工程概论A---周四第6,7节{第2-16周|双周}---孔汶汶/张建锋/冯海林/吴剑(孔汶汶,张建锋)---学10603(实验室) 14: 毛泽东思想和中国特色社会主义理论体系概论---周五第3,4节{第2-16周}---张国泉---教1401(多媒体)---2018年06月30日(10:20-11:10)---学10203(实验室) 15: 物联网工程概论A---周五第8,9节{第1-16周}---孔汶汶/张建锋/冯海林/吴剑(孔汶汶,张建锋)---教1512(多媒体) 16: 思辨与创新(网络课程)---周日第12节{第1-15周}---网络教师---------用经济学智慧解读中国(网络课程)---周日第12节{第1-15周}---网络教师--- 历年成绩: 学年 学期 课程名称 课程性质 学分 绩点 成绩 2016-2017 1 思想道德修养与法律基础 必修 3 2.80 78 2016-2017 1 形势与政策 必修 0.5 4.50 优 2016-2017 1 大学生心理健康教育 必修 1 3.30 83 2016-2017 1 大学生职业发展 必修 0.5 3.80 88 2016-2017 1 高级语言程序设计 必修 4 4.40 94 2016-2017 1 学业指导 必修 0.5 4.40 94 2016-2017 1 大学计算机基础A 必修 1 3.90 89 2016-2017 1 高等数学AI 必修 4 4.50 95 2016-2017 1 线性代数A 必修 3 4.60 96 2016-2017 1 大学英语BI 必修 4 3.60 86 2016-2017 1 军事理论 必修 0.5 3.50 良 2016-2017 1 军事技能训练 必修 0.5 4.50 95 2016-2017 1 大学体育I 必修 0.75 2.30 73 2016-2017 2 马克思主义基本原理概论 必修 3 2.20 72 2016-2017 2 形势与政策 必修 0.5 4.50 优 2016-2017 2 信息技术导论 必修 1.5 4.40 94 2016-2017 2 数据结构C 必修 3.5 4.10 91 2016-2017 2 数据结构C实习 必修 1 4.50 优 2016-2017 2 应用文写作 必修 2 3.90 89 2016-2017 2 高等数学AII 必修 5 4.20 92 2016-2017 2 大学物理AI 必修 3 2.90 79 2016-2017 2 大学英语BII 必修 4 3.20 82 2016-2017 2 大学体育II 必修 0.75 2.50 75 2017-2018 1 中国近现代史纲要 必修 2 2.30 73 2017-2018 1 形势与政策 必修 0.5 4.50 优 2017-2018 1 电工电子技术基础AI 必修 3.5 2.60 76 2017-2018 1 概率论与数理统计A 必修 4 4.70 97 2017-2018 1 大学物理AII 必修 3 3.30 83 2017-2018 1 大学物理A实验 必修 1.5 3.40 84 2017-2018 1 英语报刊选读 必修 2 3.40 84 2017-2018 1 大学体育III 必修 0.75 3.00 80 2017-2018 1 生命科学与生物技术导论B 选修 2 3.50 85 2017-2018 1 动物福利B(双语) 选修 2 4.50 95 2017-2018 1 JAVA程序设计B 选修 3 4.50 95 2017-2018 1 面向对象程序设计B 选修 3.5 4.10 91 2017-2018 1 专业认知实习 选修 0.5 4.70 97 2017-2018 1 个人理财规划(网络课程) 选修 2 4.50 优 2017-2018 1 淘宝店铺设计与制作(实验室开放项目) 选修 1 3.20 82 2017-2018 2 大学体育(篮球) 必修 0.75 2.10 71 2017-2018 2 电工电子技术基础AⅡ 必修 3.5 2.30 73 2017-2018 2 电工电子技术基础实习A 必修 1 2.50 中 2017-2018 2 中国文化英语 必修 2 3.20 82 2017-2018 2 数据库原理与技术B 选修 3 4.10 91 2017-2018 2 数据库原理与技术实习B 选修 1 3.50 良 2017-2018 2 思辨与创新(网络课程) 选修 2 3.90 89 2017-2018 2 用经济学智慧解读中国(网络课程) 选修 3 4.80 97.97 平均绩点: 3.70 总学分绩点: 351.68 总学分: 95.00 等级考试成绩: 学年 学期 等级考试名称 准考证号 考试日期 成绩 听力成绩 阅读成绩 写作成绩 综合成绩 2016-2017 1 英语四级 330391162105502 2016-12-17 563 212 190 161 0 2016-2017 2 英语六级 330391171213315 2017-6-17 434 112 195 127 0 2017-2018 1 英语六级 330391172204918 2017-12-16 415 135 151 129 0