Full Code of Jack-Cherish/python-spider for AI

master 60b7ed839713 cached
68 files
274.5 KB
95.1k tokens
270 symbols
1 requests
Download .txt
Showing preview only (292K chars total). Download the full file or copy to clipboard to get everything.
Repository: Jack-Cherish/python-spider
Branch: master
Commit: 60b7ed839713
Files: 68
Total size: 274.5 KB

Directory structure:
gitextract_u9lbtfzc/

├── 12306.py
├── 2020/
│   ├── README.md
│   ├── api/
│   │   └── api.py
│   ├── bilibili/
│   │   ├── download.py
│   │   └── xml2ass.py
│   ├── dmzj/
│   │   └── cartoon.py
│   ├── taobao/
│   │   └── taobao_login.py
│   ├── xbqg/
│   │   └── xbqg_spider.py
│   └── zycjw/
│       └── video_download.py
├── Netease/
│   ├── Netease.py
│   └── music_list.txt
├── README.md
├── baiduwenku.py
├── baiduwenku_pro_1.py
├── baiwan/
│   ├── app.js
│   ├── baiwan.py
│   ├── file.txt
│   ├── index.html
│   └── question.txt
├── bilibili/
│   ├── README.md
│   ├── bilibili.py
│   └── xml2ass.py
├── bilibili_luckyman/
│   ├── README.md
│   └── bilibili_luckyman.py
├── biqukan.py
├── cartoon/
│   ├── cartoon/
│   │   ├── __init__.py
│   │   ├── items.py
│   │   ├── middlewares.py
│   │   ├── pipelines.py
│   │   ├── settings.py
│   │   └── spiders/
│   │       ├── __init__.py
│   │       └── comic_spider.py
│   └── scrapy.cfg
├── daili.py
├── dingdong/
│   ├── README.md
│   └── jd.py
├── douyin/
│   ├── README.md
│   ├── douyin.py
│   └── fuck-byted-acrawler.js
├── douyin.py
├── douyin_pro.py
├── downloader.py
├── financical.py
├── geetest.py
├── hero.py
├── one_hour_spider/
│   ├── biquge20180731.py
│   ├── biqukan.py
│   ├── unsplash.py
│   ├── unsplash20180731.py
│   └── vidoe_downloader.py
├── shuaia.py
├── video_downloader/
│   ├── MyQR/
│   │   ├── __init__.py
│   │   ├── mylibs/
│   │   │   ├── ECC.py
│   │   │   ├── __init__.py
│   │   │   ├── constant.py
│   │   │   ├── data.py
│   │   │   ├── draw.py
│   │   │   ├── matrix.py
│   │   │   ├── structure.py
│   │   │   └── theqrmodule.py
│   │   ├── myqr.py
│   │   └── terminal.py
│   ├── requirements.txt
│   └── video_downloader.py
└── zhengfang_system_spider/
    ├── README.md
    ├── requirements.txt
    ├── spider.py
    └── zhengfang.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: 12306.py
================================================
# -*- coding: utf-8 -*-
"""
@author: liuyw
"""
from splinter.browser import Browser
from time import sleep
import traceback
import time, sys

class huoche(object):
	driver_name = ''
	executable_path = ''
	#用户名,密码
	username = u"xxx"
	passwd = u"xxx"
	# cookies值得自己去找, 下面两个分别是沈阳, 哈尔滨
	starts = u"%u6C88%u9633%2CSYT"
	ends = u"%u54C8%u5C14%u6EE8%2CHBB"
	
	# 时间格式2018-01-19
	dtime = u"2018-01-19"
	# 车次,选择第几趟,0则从上之下依次点击
	order = 0
	###乘客名
	users = [u"xxx",u"xxx"]
	##席位
	xb = u"二等座"
	pz = u"成人票"

	"""网址"""
	ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init"
	login_url = "https://kyfw.12306.cn/otn/login/init"
	initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306"
	buy = "https://kyfw.12306.cn/otn/confirmPassenger/initDc"
	
	def __init__(self):
		self.driver_name = 'chrome'
		self.executable_path = 'D:/chromedriver'

	def login(self):
		self.driver.visit(self.login_url)
		self.driver.fill("loginUserDTO.user_name", self.username)
		# sleep(1)
		self.driver.fill("userDTO.password", self.passwd)
		print(u"等待验证码,自行输入...")
		while True:
			if self.driver.url != self.initmy_url:
				sleep(1)
			else:
				break

	def start(self):
		self.driver = Browser(driver_name=self.driver_name,executable_path=self.executable_path)
		self.driver.driver.set_window_size(1400, 1000)
		self.login()
		# sleep(1)
		self.driver.visit(self.ticket_url)
		try:
			print(u"购票页面开始...")
			# sleep(1)
			# 加载查询信息
			self.driver.cookies.add({"_jc_save_fromStation": self.starts})
			self.driver.cookies.add({"_jc_save_toStation": self.ends})
			self.driver.cookies.add({"_jc_save_fromDate": self.dtime})

			self.driver.reload()

			count = 0
			if self.order != 0:
				while self.driver.url == self.ticket_url:
					self.driver.find_by_text(u"查询").click()
					count += 1
					print(u"循环点击查询... 第 %s 次" % count)
					# sleep(1)
					try:
						self.driver.find_by_text(u"预订")[self.order - 1].click()
					except Exception as e:
						print(e)
						print(u"还没开始预订")
						continue
			else:
				while self.driver.url == self.ticket_url:
					self.driver.find_by_text(u"查询").click()
					count += 1
					print(u"循环点击查询... 第 %s 次" % count)
					# sleep(0.8)
					try:
						for i in self.driver.find_by_text(u"预订"):
							i.click()
							sleep(1)
					except Exception as e:
						print(e)
						print(u"还没开始预订 %s" % count)
						continue
			print(u"开始预订...")
			# sleep(3)
			# self.driver.reload()
			sleep(1)
			print(u'开始选择用户...')
			for user in self.users:
				self.driver.find_by_text(user).last.click()

			print(u"提交订单...")
			sleep(1)
			self.driver.find_by_text(self.pz).click()
			self.driver.find_by_id('').select(self.pz)
			# sleep(1)
			self.driver.find_by_text(self.xb).click()
			sleep(1)
			self.driver.find_by_id('submitOrder_id').click()
			print(u"开始选座...")
			self.driver.find_by_id('1D').last.click()
			self.driver.find_by_id('1F').last.click()

			sleep(1.5)
			print(u"确认选座...")
			self.driver.find_by_id('qr_submit_id').click()

		except Exception as e:
			print(e)

if __name__ == '__main__':
	huoche = huoche()
	huoche.start()

================================================
FILE: 2020/README.md
================================================
# Python Spider 2020

由于这个项目时间太长了,陆陆续续,很多实战示例也早已失效。

网络爬虫,是一门比较通用的基础技术,各个领域都会有所涉及,比如我做视觉算法的,也需要用到网络爬虫,例如调用 API 接口清洗数据等,这本质也都是一个小的爬虫程序。

为了提供各位更好的学习示例,我决定重写这一系列教程,对一些失效的示例,重新找例子,并查缺补漏,完善这一些列教程。

2020年,最新版的 Python3 网络爬虫实战系列教程。

原创文章每周最少两篇,**后续最新文章**会在[【公众号】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)首发,视频[【B站】](https://space.bilibili.com/331507846)首发,大家可以加我[【微信】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)进**交流群**,技术交流或提意见都可以,欢迎**Star**!

<p align="center">
  <a href="https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg" target="_blank"><img src="https://img.shields.io/badge/weChat-微信群-blue.svg" alt="微信群"></a>
  <a href="https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg" target="_blank"><img src="https://img.shields.io/badge/%E5%85%AC%E4%BC%97%E5%8F%B7-Jack%20Cui-lightgrey.svg" alt="公众号"></a>
  <a href="https://space.bilibili.com/331507846"><img src="https://img.shields.io/badge/bilibili-哔哩哔哩-critical" alt="B站"></a>
  <a href="https://www.zhihu.com/people/Jack--Cui" target="_blank"><img src="https://img.shields.io/badge/zhihu-知乎-informational" alt="知乎"></a>
  <a href="https://blog.csdn.net/c406495762" target="_blank"><img src="https://img.shields.io/badge/csdn-CSDN-red.svg" alt="CSDN"></a>
  <a href="https://www.toutiao.com/c/user/token/MS4wLjABAAAA5gJtmezUJ6vli2hZvnN13iLnzKLpuF8gGHeS0iVlmNs/" target="_blank"><img src="https://img.shields.io/badge/toutiao-%E5%A4%B4%E6%9D%A1-important.svg" alt="头条"></a>
  <a href="https://juejin.im/user/5ea2ca74e51d4546b50d5f9f" target="_blank"><img src="https://img.shields.io/badge/juejin-掘金-blue.svg" alt="掘金"></a>
</p>

## Python3 网络爬虫教程 2020
|   文章   |  公众号  |    代码    |
| :------  | :--------: | :--------: |
| Python3 网络爬虫(一):初识网络爬虫之夜探老王家 | [公众号](https://mp.weixin.qq.com/s/1rcq9RQYuAuHFg1w1j8HXg "Python3 网络爬虫(一)") | no |
| Python3 网络爬虫(二):下载小说的正确姿势 | [公众号](https://mp.weixin.qq.com/s/5e2_r0QXUISVp9GdDsqbzg "Python3 网络爬虫(二)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/xbqg "Python3 网络爬虫(二)") |
| Python3 网络爬虫(三):漫画下载,动态加载、反爬虫这都不叫事!| [公众号](https://mp.weixin.qq.com/s/wyS-OP04K3Vs9arSelRlyA "Python3网络爬虫(三)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/dmzj "Python3 网络爬虫(三)") |
| Python3 网络爬虫(四):视频下载,那些事儿!| [公众号](https://mp.weixin.qq.com/s/_geNA6Dwo4kx25X7trJzlg "Python3 网络爬虫(四)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/zycjw "Python3 网络爬虫(四)") |
| Python3 网络爬虫(五):老板,需要特殊服务吗?| [公众号](https://mp.weixin.qq.com/s/PPTSnIHV71b-wB3oRiYnIA "Python3 网络爬虫(五)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/api "Python3 网络爬虫(五)") |
| Python3 网络爬虫(六):618,爱他/她,就清空他/她的购物车!| [公众号](https://mp.weixin.qq.com/s/lXXDfzyLVrf3f-aqJN1C3A "Python3 网络爬虫(六)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/taobao "Python3 网络爬虫(六)") |
| 宝藏B站UP主,视频弹幕尽收囊中!| [公众号](https://mp.weixin.qq.com/s/aWratg1j9RBAjIghoY66yQ "宝藏B站UP主,视频弹幕尽收囊中!") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/bilibili "宝藏B站UP主,视频弹幕尽收囊中!") |

更多精彩,敬请期待!

<a name="微信"></a>  <a name="公众号"></a>

<img src="https://ftp.bmp.ovh/imgs/2020/07/112254f0199e3d4f.jpg" alt="wechat" width="400" height="200" align="bottom" />


================================================
FILE: 2020/api/api.py
================================================
import requests
import base64
import json
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
 
 
beautify_url = "https://api-cn.faceplusplus.com/facepp/v2/beautify"
# 你创建的应用的 API Key 和 API Secret(也叫 Secret Key)
AK = ''
SK = ''
 
# 可选参数,不填写,默认50
# 美白程度 0 - 100
whitening = 80
# 磨皮程度 0 - 100
smoothing = 80
# 瘦脸程度 0 - 100
thinface = 20
# 小脸程度 0 - 100
shrink_face = 50
# 大眼程度 0 - 100
enlarge_eye = 50
# 去眉毛程度 0 - 100
remove_eyebrow = 50
# 滤镜名称,不填写,默认无滤镜
filter_type = ''
 
# 二进制方式打开图片
img_name = 'test_1.png'
f = open(img_name, 'rb')
# 转 base64
img_base64 = base64.b64encode(f.read())
 
# 使用 whitening、smoothing、thinface 三个可选参数,其他用默认值
data = {
    'api_key': AK,
    'api_secret': SK,
    'image_base64': img_base64,
    'whitening': whitening,
    'smoothing': smoothing,
    'thinface': thinface,
    }
 
r = requests.post(url=beautify_url, data=data)
html = json.loads(r.text)
 
# 解析base64图片
base64_data = html['result']
imgData = base64.b64decode(base64_data)
nparr = np.frombuffer(imgData, np.uint8)
img_res = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
img_res_BGR = cv2.cvtColor(img_res, cv2.COLOR_RGB2BGR)
 
# 原始图片
img = cv2.imread(img_name)
img_BGR = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
 
# 显示图片
fig, axs = plt.subplots(nrows=1, ncols=2, sharex=False, sharey=False, figsize=(10,10))
axs[0].imshow(img_BGR)
axs[1].imshow(img_res_BGR)
plt.show()


================================================
FILE: 2020/bilibili/download.py
================================================
# -*-coding:utf-8 -*-
# Website: https://cuijiahua.com
# Author: Jack Cui
# Date: 2020.07.22
import requests
import json
import re
import json
import math
import xml2ass
import time
from contextlib import closing

from bs4 import BeautifulSoup

import os
from win32com.client import Dispatch

def addTasktoXunlei(down_url):
    flag = False
    o = Dispatch('ThunderAgent.Agent64.1')
    try:
        o.AddTask(down_url, "", "", "", "", -1, 0, 5)
        o.CommitTasks()
        flag = True
    except Exception:
        print(Exception.message)
        print(" AddTask is fail!")
    return flag

def get_download_url(arcurl):
    # 微信搜索 JackCui-AI 关注公众号,后台回复「B 站」获取视频解析地址
    jiexi_url = 'xxx'
    payload = {'url': arcurl}
    jiexi_req = requests.get(jiexi_url, params=payload)
    jiexi_bf = BeautifulSoup(jiexi_req.text)
    jiexi_dn_url = jiexi_bf.iframe.get('src')
    dn_req = requests.get(jiexi_dn_url)
    dn_bf = BeautifulSoup(dn_req.text)
    video_script = dn_bf.find('script',src = None)
    DPlayer = str(video_script.string)
    download_url = re.findall('\'(http[s]?:(?:[a-zA-Z]|[0-9]|[$-_@.&~+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)\'', DPlayer)[0]
    download_url = download_url.replace('\\', '')
    return download_url

space_url = 'https://space.bilibili.com/280793434'
search_url = 'https://api.bilibili.com/x/space/arc/search'
mid = space_url.split('/')[-1]
sess = requests.Session()
search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept': 'application/json, text/plain, */*'}

# 获取视频个数
ps = 1
pn = 1
search_params = {'mid': mid,
                 'ps': ps,
                 'tid': 0,
                 'pn': pn}
req = sess.get(url=search_url, headers=search_headers, params=search_params, verify=False)
info = json.loads(req.text)
video_count = info['data']['page']['count']

ps = 10
page = math.ceil(video_count/ps)
videos_list = []
for pn in range(1, page+1):
    search_params = {'mid': mid,
                     'ps': ps,
                     'tid': 0,
                     'pn': pn}
    req = sess.get(url=search_url, headers=search_headers, params=search_params, verify=False)
    info = json.loads(req.text)
    vlist = info['data']['list']['vlist']
    for video in vlist:
        title = video['title']
        bvid = video['bvid']
        vurl = 'https://www.bilibili.com/video/' + bvid
        videos_list.append([title, vurl])
print('共 %d 个视频' % len(videos_list))
all_video = {}
# 下载前 10 个视频
for video in videos_list[:10]:
    download_url = get_download_url(video[1])
    print(video[0] + ':' + download_url)
    # 记录视频名字
    xunlei_video_name = download_url.split('?')[0].split('/')[-1]
    filename = video[0]
    for c in u'´☆<img draggable="false" data-mce-resize="false" data-mce-placeholder="1" data-wp-emoji="1" class="emoji" alt="❤" src="https://s.w.org/images/core/emoji/11.2.0/svg/2764.svg">◦\/:*?"<>| ':
        filename = filename.replace(c, '')
    save_video_name = filename + '.mp4'
    all_video[xunlei_video_name] = save_video_name

    addTasktoXunlei(download_url)
    # 弹幕下载
    danmu_name = filename + '.xml'
    danmu_ass = filename + '.ass'
    oid = download_url.split('/')[6]
    danmu_url = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(oid)
    danmu_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
                    'Accept': '*/*',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'zh-CN,zh;q=0.9'}
    with closing(sess.get(danmu_url, headers=danmu_header, stream=True, verify=False)) as response:  
        if response.status_code == 200:
            with open(danmu_name, 'wb') as file:
                for data in response.iter_content():
                    file.write(data)
                    file.flush()
        else:
            print('链接异常')
    time.sleep(0.5)
    xml2ass.Danmaku2ASS(danmu_name, danmu_ass, 1280, 720)
# 视频重命名
for key, item in all_video.items():
    while key not in os.listdir('./'):
        time.sleep(1)
    os.rename(key, item)


================================================
FILE: 2020/bilibili/xml2ass.py
================================================
# The original author of this program, Danmaku2ASS, is StarBrilliant.
# This file is released under General Public License version 3.
# You should have received a copy of General Public License text alongside with
# this program. If not, you can obtain it at http://gnu.org/copyleft/gpl.html .
# This program comes with no warranty, the author will not be resopnsible for
# any damage or problems caused by this program.

import argparse
import calendar
import gettext
import io
import json
import logging
import math
import os
import random
import re
import sys
import time
import xml.dom.minidom


if sys.version_info < (3,):
    raise RuntimeError('at least Python 3.0 is required')

gettext.install('danmaku2ass', os.path.join(os.path.dirname(os.path.abspath(os.path.realpath(sys.argv[0] or 'locale'))), 'locale'))

def SeekZero(function):
    def decorated_function(file_):
        file_.seek(0)
        try:
            return function(file_)
        finally:
            file_.seek(0)
    return decorated_function


def EOFAsNone(function):
    def decorated_function(*args, **kwargs):
        try:
            return function(*args, **kwargs)
        except EOFError:
            return None
    return decorated_function


@SeekZero
@EOFAsNone
def ProbeCommentFormat(f):
    tmp = f.read(1)
    if tmp == '[':
        return 'Acfun'
        # It is unwise to wrap a JSON object in an array!
        # See this: http://haacked.com/archive/2008/11/20/anatomy-of-a-subtle-json-vulnerability.aspx/
        # Do never follow what Acfun developers did!
    elif tmp == '{':
        tmp = f.read(14)
        if tmp == '"status_code":':
            return 'Tudou'
        elif tmp == '"root":{"total':
            return 'sH5V'
    elif tmp == '<':
        tmp = f.read(1)
        if tmp == '?':
            tmp = f.read(38)
            if tmp == 'xml version="1.0" encoding="UTF-8"?><p':
                return 'Niconico'
            elif tmp == 'xml version="1.0" encoding="UTF-8"?><i':
                return 'Bilibili'
            elif tmp == 'xml version="1.0" encoding="utf-8"?><i':
                return 'Bilibili'  # tucao.cc, with the same file format as Bilibili
            elif tmp == 'xml version="1.0" encoding="Utf-8"?>\n<':
                return 'Bilibili'  # Komica, with the same file format as Bilibili
            elif tmp == 'xml version="1.0" encoding="UTF-8"?>\n<':
                return 'MioMio'
        elif tmp == 'p':
            return 'Niconico'  # Himawari Douga, with the same file format as Niconico Douga


#
# ReadComments**** protocol
#
# Input:
#     f:         Input file
#     fontsize:  Default font size
#
# Output:
#     yield a tuple:
#         (timeline, timestamp, no, comment, pos, color, size, height, width)
#     timeline:  The position when the comment is replayed
#     timestamp: The UNIX timestamp when the comment is submitted
#     no:        A sequence of 1, 2, 3, ..., used for sorting
#     comment:   The content of the comment
#     pos:       0 for regular moving comment,
#                1 for bottom centered comment,
#                2 for top centered comment,
#                3 for reversed moving comment
#     color:     Font color represented in 0xRRGGBB,
#                e.g. 0xffffff for white
#     size:      Font size
#     height:    The estimated height in pixels
#                i.e. (comment.count('\n')+1)*size
#     width:     The estimated width in pixels
#                i.e. CalculateLength(comment)*size
#
# After implementing ReadComments****, make sure to update ProbeCommentFormat
# and CommentFormatMap.
#


def ReadCommentsNiconico(f, fontsize):
    NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffcc00, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000, 'niconicowhite': 0xcccc99, 'white2': 0xcccc99, 'truered': 0xcc0033, 'red2': 0xcc0033, 'passionorange': 0xff6600, 'orange2': 0xff6600, 'madyellow': 0x999900, 'yellow2': 0x999900, 'elementalgreen': 0x00cc66, 'green2': 0x00cc66, 'marineblue': 0x33ffcc, 'blue2': 0x33ffcc, 'nobleviolet': 0x6633cc, 'purple2': 0x6633cc}
    dom = xml.dom.minidom.parse(f)
    comment_element = dom.getElementsByTagName('chat')
    for comment in comment_element:
        try:
            c = str(comment.childNodes[0].wholeText)
            if c.startswith('/'):
                continue  # ignore advanced comments
            pos = 0
            color = 0xffffff
            size = fontsize
            for mailstyle in str(comment.getAttribute('mail')).split():
                if mailstyle == 'ue':
                    pos = 1
                elif mailstyle == 'shita':
                    pos = 2
                elif mailstyle == 'big':
                    size = fontsize*1.44
                elif mailstyle == 'small':
                    size = fontsize*0.64
                elif mailstyle in NiconicoColorMap:
                    color = NiconicoColorMap[mailstyle]
            yield (max(int(comment.getAttribute('vpos')), 0)*0.01, int(comment.getAttribute('date')), int(comment.getAttribute('no')), c, pos, color, size, (c.count('\n')+1)*size, CalculateLength(c)*size)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %s') % comment.toxml())
            continue


def ReadCommentsAcfun(f, fontsize):
    comment_element = json.load(f)
    for i, comment in enumerate(comment_element):
        try:
            p = str(comment['c']).split(',')
            assert len(p) >= 6
            assert p[2] in ('1', '2', '4', '5', '7')
            size = int(p[3])*fontsize/25.0
            if p[2] != '7':
                c = str(comment['m']).replace('\\r', '\n').replace('\r', '\n')
                yield (float(p[0]), int(p[5]), i, c, {'1': 0, '2': 0, '4': 2, '5': 1}[p[2]], int(p[1]), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
            else:
                c = dict(json.loads(comment['m']))
                yield (float(p[0]), int(p[5]), i, c, 'acfunpos', int(p[1]), size, 0, 0)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %r') % comment)
            continue


def ReadCommentsBilibili(f, fontsize):
    dom = xml.dom.minidom.parse(f)
    comment_element = dom.getElementsByTagName('d')
    for i, comment in enumerate(comment_element):
        try:
            p = str(comment.getAttribute('p')).split(',')
            assert len(p) >= 5
            assert p[1] in ('1', '4', '5', '6', '7')
            if p[1] != '7':
                c = str(comment.childNodes[0].wholeText).replace('/n', '\n')
                size = int(p[2])*fontsize/25.0
                yield (float(p[0]), int(p[4]), i, c, {'1': 0, '4': 2, '5': 1, '6': 3}[p[1]], int(p[3]), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
            else:  # positioned comment
                c = str(comment.childNodes[0].wholeText)
                yield (float(p[0]), int(p[4]), i, c, 'bilipos', int(p[3]), int(p[2]), 0, 0)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %s') % comment.toxml())
            continue


def ReadCommentsTudou(f, fontsize):
    comment_element = json.load(f)
    for i, comment in enumerate(comment_element['comment_list']):
        try:
            assert comment['pos'] in (3, 4, 6)
            c = str(comment['data'])
            assert comment['size'] in (0, 1, 2)
            size = {0: 0.64, 1: 1, 2: 1.44}[comment['size']]*fontsize
            yield (int(comment['replay_time']*0.001), int(comment['commit_time']), i, c, {3: 0, 4: 2, 6: 1}[comment['pos']], int(comment['color']), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %r') % comment)
            continue


def ReadCommentsMioMio(f, fontsize):
    NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffc000, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000}
    dom = xml.dom.minidom.parse(f)
    comment_element = dom.getElementsByTagName('data')
    for i, comment in enumerate(comment_element):
        try:
            message = comment.getElementsByTagName('message')[0]
            c = str(message.childNodes[0].wholeText)
            pos = 0
            size = int(message.getAttribute('fontsize'))*fontsize/25.0
            yield (float(comment.getElementsByTagName('playTime')[0].childNodes[0].wholeText), int(calendar.timegm(time.strptime(comment.getElementsByTagName('times')[0].childNodes[0].wholeText, '%Y-%m-%d %H:%M:%S')))-28800, i, c, {'1': 0, '4': 2, '5': 1}[message.getAttribute('mode')], int(message.getAttribute('color')), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %s') % comment.toxml())
            continue


def ReadCommentsSH5V(f, fontsize):
    comment_element = json.load(f)
    for i, comment in enumerate(comment_element["root"]["bgs"]):
        try:
            c_at = str(comment['at'])
            c_type = str(comment['type'])
            c_date = str(comment['timestamp'])
            c_color = str(comment['color'])
            c = str(comment['text'])
            size = fontsize
            if c_type != '7':
                yield (float(c_at), int(c_date), i, c, {'0': 0, '1': 0, '4': 2, '5': 1}[c_type], int(c_color[1:], 16), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
            else:
                c_x = float(comment['x'])
                c_y = float(comment['y'])
                size = int(comment['size'])
                dur = int(comment['dur'])
                data1 = float(comment['data1'])
                data2 = float(comment['data2'])
                data3 = int(comment['data3'])
                data4 = int(comment['data4'])
                yield (float(c_at), int(c_date), i, c, 'sH5Vpos', int(c_color[1:], 16), size, 0, 0, c_x, c_y, dur, data1, data2, data3, data4)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %r') % comment)
            continue


CommentFormatMap = {None: None, 'Niconico': ReadCommentsNiconico, 'Acfun': ReadCommentsAcfun, 'Bilibili': ReadCommentsBilibili, 'Tudou': ReadCommentsTudou, 'MioMio': ReadCommentsMioMio, 'sH5V': ReadCommentsSH5V}


def WriteCommentBilibiliPositioned(f, c, width, height, styleid):
    #BiliPlayerSize = (512, 384)  # Bilibili player version 2010
    #BiliPlayerSize = (540, 384)  # Bilibili player version 2012
    BiliPlayerSize = (672, 438)  # Bilibili player version 2014
    ZoomFactor = GetZoomFactor(BiliPlayerSize, (width, height))

    def GetPosition(InputPos, isHeight):
        isHeight = int(isHeight)  # True -> 1
        if isinstance(InputPos, int):
            return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
        elif isinstance(InputPos, float):
            if InputPos > 1:
                return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
            else:
                return BiliPlayerSize[isHeight]*ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
        else:
            try:
                InputPos = int(InputPos)
            except ValueError:
                InputPos = float(InputPos)
            return GetPosition(InputPos, isHeight)

    try:
        comment_args = safe_list(json.loads(c[3]))
        text = ASSEscape(str(comment_args[4]).replace('/n', '\n'))
        from_x = comment_args.get(0, 0)
        from_y = comment_args.get(1, 0)
        to_x = comment_args.get(7, from_x)
        to_y = comment_args.get(8, from_y)
        from_x = round(GetPosition(from_x, False))
        from_y = round(GetPosition(from_y, True))
        to_x = round(GetPosition(to_x, False))
        to_y = round(GetPosition(to_y, True))
        alpha = safe_list(str(comment_args.get(2, '1')).split('-'))
        from_alpha = float(alpha.get(0, 1))
        to_alpha = float(alpha.get(1, from_alpha))
        from_alpha = 255-round(from_alpha*255)
        to_alpha = 255-round(to_alpha*255)
        rotate_z = int(comment_args.get(5, 0))
        rotate_y = int(comment_args.get(6, 0))
        lifetime = float(comment_args.get(3, 4500))
        duration = int(comment_args.get(9, lifetime*1000))
        delay = int(comment_args.get(10, 0))
        fontface = comment_args.get(12)
        isborder = comment_args.get(11, 'true')
        styles = []
        if (from_x, from_y) == (to_x, to_y):
            styles.append('\\pos(%s, %s)' % (from_x, from_y))
        else:
            styles.append('\\move(%s, %s, %s, %s, %s, %s)' % (from_x, from_y, to_x, to_y, delay, delay+duration))
        styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (from_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (from_y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
        if (from_x, from_y) != (to_x, to_y):
            styles.append('\\t(%s, %s, ' % (delay, delay+duration))
            styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
            styles.append(')')
        if fontface:
            styles.append('\\fn%s' % ASSEscape(fontface))
        styles.append('\\fs%s' % round(c[6]*ZoomFactor[0]))
        if c[5] != 0xffffff:
            styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff))
            if c[5] == 0x000000:
                styles.append('\\3c&HFFFFFF&')
        if from_alpha == to_alpha:
            styles.append('\\alpha&H%02X' % from_alpha)
        elif (from_alpha, to_alpha) == (255, 0):
            styles.append('\\fad(%s,0)' % (lifetime*1000))
        elif (from_alpha, to_alpha) == (0, 255):
            styles.append('\\fad(0, %s)' % (lifetime*1000))
        else:
            styles.append('\\fade(%(from_alpha)s, %(to_alpha)s, %(to_alpha)s, 0, %(end_time)s, %(end_time)s, %(end_time)s)' % {'from_alpha': from_alpha, 'to_alpha': to_alpha, 'end_time': lifetime*1000})
        if isborder == 'false':
            styles.append('\\bord0')
        f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
    except (IndexError, ValueError) as e:
        try:
            logging.warning(_('Invalid comment: %r') % c[3])
        except IndexError:
            logging.warning(_('Invalid comment: %r') % c)


def WriteCommentAcfunPositioned(f, c, width, height, styleid):
    AcfunPlayerSize = (560, 400)
    ZoomFactor = GetZoomFactor(AcfunPlayerSize, (width, height))

    def GetPosition(InputPos, isHeight):
        isHeight = int(isHeight)  # True -> 1
        return AcfunPlayerSize[isHeight]*ZoomFactor[0]*InputPos*0.001+ZoomFactor[isHeight+1]

    def GetTransformStyles(x=None, y=None, scale_x=None, scale_y=None, rotate_z=None, rotate_y=None, color=None, alpha=None):
        styles = []
        if x is not None and y is not None:
            styles.append('\\pos(%s, %s)' % (x, y))
        if scale_x is not None:
            styles.append('\\fscx%s' % scale_x)
        if scale_y is not None:
            styles.append('\\fscy%s' % scale_y)
        if rotate_z is not None and rotate_y is not None:
            assert x is not None
            assert y is not None
            styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
        if color is not None:
            styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff))
            if color == 0x000000:
                styles.append('\\3c&HFFFFFF&')
        if alpha is not None:
            alpha = 255-round(alpha*255)
            styles.append('\\alpha&H%02X' % alpha)
        return styles

    def FlushCommentLine(f, text, styles, start_time, end_time, styleid):
        if end_time > start_time:
            f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})

    try:
        comment_args = c[3]
        text = ASSEscape(str(comment_args['n']).replace('\r', '\n').replace('\r', '\n'))
        common_styles = []
        anchor = {0: 7, 1: 8, 2: 9, 3: 4, 4: 5, 5: 6, 6: 1, 7: 2, 8: 3}.get(comment_args.get('c', 0), 7)
        if anchor != 7:
            common_styles.append('\\an%s' % anchor)
        font = comment_args.get('w')
        if font:
            font = dict(font)
            fontface = font.get('f')
            if fontface:
                common_styles.append('\\fn%s' % ASSEscape(str(fontface)))
            fontbold = bool(font.get('b'))
            if fontbold:
                common_styles.append('\\b1')
        common_styles.append('\\fs%s' % round(c[6]*ZoomFactor[0]))
        isborder = bool(comment_args.get('b', True))
        if not isborder:
            common_styles.append('\\bord0')
        to_pos = dict(comment_args.get('p', {'x': 0, 'y': 0}))
        to_x = round(GetPosition(int(to_pos.get('x', 0)), False))
        to_y = round(GetPosition(int(to_pos.get('y', 0)), True))
        to_scale_x = round(float(comment_args.get('e', 1.0))*100)
        to_scale_y = round(float(comment_args.get('f', 1.0))*100)
        to_rotate_z = float(comment_args.get('r', 0.0))
        to_rotate_y = float(comment_args.get('k', 0.0))
        to_color = c[5]
        to_alpha = float(comment_args.get('a', 1.0))
        from_time = float(comment_args.get('t', 0.0))
        action_time = float(comment_args.get('l', 3.0))
        actions = list(comment_args.get('z', []))
        transform_styles = GetTransformStyles(to_x, to_y, to_scale_x, to_scale_y, to_rotate_z, to_rotate_y, to_color, to_alpha)
        FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid)
        for action in actions:
            action = dict(action)
            from_x, from_y = to_x, to_y
            from_scale_x, from_scale_y = to_scale_x, to_scale_y
            from_rotate_z, from_rotate_y = to_rotate_z, to_rotate_y
            from_color, from_alpha = to_color, to_alpha
            from_time += action_time
            action_time = float(action.get('l', 0.0))
            action_styles = []
            if 'x' in action:
                to_x = round(GetPosition(int(action['x']), False))
            if 'y' in action:
                to_y = round(GetPosition(int(action['y']), True))
            if 'f' in action:
                to_scale_x = round(float(action['f'])*100)
                action_styles.append('\\fscx%s' % to_scale_x)
            if 'g' in action:
                to_scale_y = round(float(action['g'])*100)
                action_styles.append('\\fscy%s' % to_scale_y)
            if 'c' in action:
                to_color = int(action['c'])
                action_styles.append('\\c&H%02X%02X%02X&' % (to_color & 0xff, (to_color >> 8) & 0xff, (to_color >> 16) & 0xff))
            if 't' in action:
                to_alpha = float(action['t'])
                action_styles.append('\\alpha&H%02X' % (255-round(to_alpha*255)))
            if 'd' in action:
                to_rotate_z = float(action['d'])
            if 'e' in action:
                to_rotate_y = float(action['e'])
            if ('x' in action) or ('y' in action):
                transform_styles = GetTransformStyles(None, None, from_scale_x, from_scale_y, None, None, from_color, from_alpha)
                transform_styles.append('\\move(%s, %s, %s, %s)' % (from_x, from_y, to_x, to_y))
                action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2)))
            elif ('d' in action) or ('e' in action):
                action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2)))
            else:
                transform_styles = GetTransformStyles(from_x, from_y, from_scale_x, from_scale_y, from_rotate_z, from_rotate_y, from_color, from_alpha)
            if action_styles:
                transform_styles.append('\\t(%s)' % (''.join(action_styles)))
            FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid)
    except (IndexError, ValueError) as e:
        logging.warning(_('Invalid comment: %r') % c[3])


def WriteCommentSH5VPositioned(f, c, width, height, styleid):

    def GetTransformStyles(x=None, y=None, fsize=None, rotate_z=None, rotate_y=None, color=None, alpha=None):
        styles = []
        if x is not None and y is not None:
            styles.append('\\pos(%s, %s)' % (x, y))
        if fsize is not None:
            styles.append('\\fs%s' % fsize)
        if rotate_y is not None and rotate_z is not None:
            styles.append('\\frz%s' % round(rotate_z))
            styles.append('\\fry%s' % round(rotate_y))
        if color is not None:
            styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff))
            if color == 0x000000:
                styles.append('\\3c&HFFFFFF&')
        if alpha is not None:
            alpha = 255-round(alpha*255)
            styles.append('\\alpha&H%02X' % alpha)
        return styles

    def FlushCommentLine(f, text, styles, start_time, end_time, styleid):
        if end_time > start_time:
            f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})

    try:
        text = ASSEscape(str(c[3]))
        to_x = round(float(c[9])*width)
        to_y = round(float(c[10])*height)
        to_rotate_z = -int(c[14])
        to_rotate_y = -int(c[15])
        to_color = c[5]
        to_alpha = float(c[12])
        #Note: Alpha transition hasn't been worked out yet.
        to_size = round(int(c[6])*math.sqrt(width*height/307200))
        #Note: Because sH5V's data is the absolute size of font,temporarily solve by it at present.[*math.sqrt(width/640*height/480)]
        #But it seems to be working fine...
        from_time = float(c[0])
        action_time = float(c[11])/1000
        transform_styles = GetTransformStyles(to_x, to_y, to_size, to_rotate_z, to_rotate_y, to_color, to_alpha)
        FlushCommentLine(f, text, transform_styles, from_time, from_time+action_time, styleid)
    except (IndexError, ValueError) as e:
        logging.warning(_('Invalid comment: %r') % c[3])


# Result: (f, dx, dy)
# To convert: NewX = f*x+dx, NewY = f*y+dy
def GetZoomFactor(SourceSize, TargetSize):
    try:
        if (SourceSize, TargetSize) == GetZoomFactor.Cached_Size:
            return GetZoomFactor.Cached_Result
    except AttributeError:
        pass
    GetZoomFactor.Cached_Size = (SourceSize, TargetSize)
    try:
        SourceAspect = SourceSize[0]/SourceSize[1]
        TargetAspect = TargetSize[0]/TargetSize[1]
        if TargetAspect < SourceAspect:  # narrower
            ScaleFactor = TargetSize[0]/SourceSize[0]
            GetZoomFactor.Cached_Result = (ScaleFactor, 0, (TargetSize[1]-TargetSize[0]/SourceAspect)/2)
        elif TargetAspect > SourceAspect:  # wider
            ScaleFactor = TargetSize[1]/SourceSize[1]
            GetZoomFactor.Cached_Result = (ScaleFactor, (TargetSize[0]-TargetSize[1]*SourceAspect)/2, 0)
        else:
            GetZoomFactor.Cached_Result = (TargetSize[0]/SourceSize[0], 0, 0)
        return GetZoomFactor.Cached_Result
    except ZeroDivisionError:
        GetZoomFactor.Cached_Result = (1, 0, 0)
        return GetZoomFactor.Cached_Result


# Calculation is based on https://github.com/jabbany/CommentCoreLibrary/issues/5#issuecomment-40087282
#                     and https://github.com/m13253/danmaku2ass/issues/7#issuecomment-41489422
# Input: X relative horizonal coordinate: 0 for left edge, 1 for right edge.
#        Y relative vertical coordinate: 0 for top edge, 1 for bottom edge.
# FOV = 1.0/math.tan(100*math.pi/360.0)
# Result: (rotX, rotY, rotZ, shearX, shearY)
def ConvertFlashRotation(rotY, rotZ, X, Y, FOV=math.tan(2*math.pi/9.0)):
    def WrapAngle(deg):
        return 180-((180-deg)%360)
    def CalcPerspectiveCorrection(alpha, X, FOV=FOV):
        alpha = WrapAngle(alpha)
        if FOV is None:
            return alpha
        if 0 <= alpha <= 180:
            costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV+max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0))
            try:
                if costheta > 1:
                    costheta = 1
                    raise ValueError
                elif costheta < -1:
                    costheta = -1
                    raise ValueError
            except ValueError:
                logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X))
            theta = math.acos(costheta)*180/math.pi
        else:
            costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV-max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0))
            try:
                if costheta > 1:
                    costheta = 1
                    raise ValueError
                elif costheta < -1:
                    costheta = -1
                    raise ValueError
            except ValueError:
                logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X))
            theta = -math.acos(costheta)*180/math.pi
        return WrapAngle(theta)
    X = 2*X-1
    Y = 2*Y-1
    rotY = WrapAngle(rotY)
    rotZ = WrapAngle(rotZ)
    if rotY == 0 or rotZ == 0:
        outX = 0
        outY = -rotY  # Positive value means clockwise in Flash
        outZ = -rotZ
    else:
        rotY = rotY*math.pi/180.0
        rotZ = rotZ*math.pi/180.0
        outY = math.atan2(-math.sin(rotY)*math.cos(rotZ), math.cos(rotY))*180/math.pi
        outZ = math.atan2(-math.cos(rotY)*math.sin(rotZ), math.cos(rotZ))*180/math.pi
        outX = math.asin(math.sin(rotY)*math.sin(rotZ))*180/math.pi
    if FOV is not None:
        #outX = CalcPerspectiveCorrection(outX, -Y, FOV*0.75)
        outY = CalcPerspectiveCorrection(outY, X, FOV)
    return (WrapAngle(round(outX)), WrapAngle(round(outY)), WrapAngle(round(outZ)), 0, round(-0.75*Y*math.sin(outY*math.pi/180.0), 3))


def ProcessComments(comments, f, width, height, bottomReserved, fontface, fontsize, alpha, lifetime, reduced, progress_callback):
    styleid = 'Danmaku2ASS_%04x' % random.randint(0, 0xffff)
    WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid)
    rows = [[None]*(height-bottomReserved+1) for i in range(4)]
    for idx, i in enumerate(comments):
        if progress_callback and idx % 1000 == 0:
            progress_callback(idx, len(comments))
        if isinstance(i[4], int):
            row = 0
            rowmax = height-bottomReserved-i[7]
            while row <= rowmax:
                freerows = TestFreeRows(rows, i, row, width, height, bottomReserved, lifetime)
                if freerows >= i[7]:
                    MarkCommentRow(rows, i, row)
                    WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid)
                    break
                else:
                    row += freerows or 1
            else:
                if not reduced:
                    row = FindAlternativeRow(rows, i, height, bottomReserved)
                    MarkCommentRow(rows, i, row)
                    WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid)
        elif i[4] == 'bilipos':
            WriteCommentBilibiliPositioned(f, i, width, height, styleid)
        elif i[4] == 'acfunpos':
            WriteCommentAcfunPositioned(f, i, width, height, styleid)
        elif i[4] == 'sH5Vpos':
            WriteCommentSH5VPositioned(f, i, width, height, styleid)
        else:
            logging.warning(_('Invalid comment: %r') % i[3])
    if progress_callback:
        progress_callback(len(comments), len(comments))


def TestFreeRows(rows, c, row, width, height, bottomReserved, lifetime):
    res = 0
    rowmax = height-bottomReserved
    targetRow = None
    if c[4] in (1, 2):
        while row < rowmax and res < c[7]:
            if targetRow != rows[c[4]][row]:
                targetRow = rows[c[4]][row]
                if targetRow and targetRow[0]+lifetime > c[0]:
                    break
            row += 1
            res += 1
    else:
        try:
            thresholdTime = c[0]-lifetime*(1-width/(c[8]+width))
        except ZeroDivisionError:
            thresholdTime = c[0]-lifetime
        while row < rowmax and res < c[7]:
            if targetRow != rows[c[4]][row]:
                targetRow = rows[c[4]][row]
                try:
                    if targetRow and (targetRow[0] > thresholdTime or targetRow[0]+targetRow[8]*lifetime/(targetRow[8]+width) > c[0]):
                        break
                except ZeroDivisionError:
                    pass
            row += 1
            res += 1
    return res


def FindAlternativeRow(rows, c, height, bottomReserved):
    res = 0
    for row in range(height-bottomReserved-math.ceil(c[7])):
        if not rows[c[4]][row]:
            return row
        elif rows[c[4]][row][0] < rows[c[4]][res][0]:
            res = row
    return res


def MarkCommentRow(rows, c, row):
    try:
        for i in range(row, row+math.ceil(c[7])):
            rows[c[4]][i] = c
    except IndexError:
        pass


def WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid):
    f.write(
'''
[Script Info]
; Script generated by Danmaku2ASS
; https://github.com/m13253/danmaku2ass
Script Updated By: Danmaku2ASS (https://github.com/m13253/danmaku2ass)
ScriptType: v4.00+
WrapStyle: 2
Collisions: Normal
PlayResX: %(width)s
PlayResY: %(height)s
ScaledBorderAndShadow: yes
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: %(styleid)s, %(fontface)s, %(fontsize)s, &H%(alpha)02XFFFFFF, &H%(alpha)02XFFFFFF, &H%(alpha)02X000000, &H%(alpha)02X000000, 0, 0, 0, 0, 100, 100, 0.00, 0.00, 1, %(outline)s, 0, 7, 0, 0, 0, 0
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
''' % {'width': width, 'height': height, 'fontface': fontface, 'fontsize': round(fontsize), 'alpha': 255-round(alpha*255), 'outline': round(fontsize/25), 'styleid': styleid}
    )


def WriteComment(f, c, row, width, height, bottomReserved, fontsize, lifetime, styleid):
    text = ASSEscape(c[3])
    styles = []
    if c[4] == 1:
        styles.append('\\an8\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': row})
    elif c[4] == 2:
        styles.append('\\an2\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': ConvertType2(row, height, bottomReserved)})
    elif c[4] == 3:
        styles.append('\\move(%(neglen)s, %(row)s, %(width)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])})
    else:
        styles.append('\\move(%(width)s, %(row)s, %(neglen)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])})
    if not (-1 < c[6]-fontsize < 1):
        styles.append('\\fs%s' % round(c[6]))
    if c[5] != 0xffffff:
        styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff))
        if c[5] == 0x000000:
            styles.append('\\3c&HFFFFFF&')
    f.write('Dialogue: 2,%(start)s,%(end)s,%(styleid)s,,0000,0000,0000,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})


def ASSEscape(s):
    return '\\N'.join((i or ' ' for i in str(s).replace('\\', '\\\\').replace('{', '\\{').replace('}', '\\}').split('\n')))


def CalculateLength(s):
    return max(map(len, s.split('\n')))  # May not be accurate


def ConvertTimestamp(timestamp):
    timestamp = round(timestamp*100.0)
    hour, minute = divmod(timestamp, 360000)
    minute, second = divmod(minute, 6000)
    second, centsecond = divmod(second, 100)
    return '%d:%02d:%02d.%02d' % (int(hour), int(minute), int(second), int(centsecond))


def ConvertType2(row, height, bottomReserved):
    return height-bottomReserved-row


def ConvertToFile(filename_or_file, *args, **kwargs):
    if isinstance(filename_or_file, bytes):
        filename_or_file = str(bytes(filename_or_file).decode('utf-8', 'replace'))
    if isinstance(filename_or_file, str):
        return open(filename_or_file, *args, **kwargs)
    else:
        return filename_or_file


def FilterBadChars(f):
    s = f.read()
    s = re.sub('[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]', '\ufffd', s)
    return io.StringIO(s)


class safe_list(list):
    def get(self, index, default=None):
        try:
            return self[index]
        except IndexError:
            return default


def export(func):
    global __all__
    try:
        __all__.append(func.__name__)
    except NameError:
        __all__ = [func.__name__]
    return func


@export
def Danmaku2ASS(input_files, output_file, stage_width, stage_height, reserve_blank=0, font_face=_('(FONT) sans-serif')[7:], font_size=25.0, text_opacity=1.0, comment_duration=5.0, is_reduce_comments=False, progress_callback=None):
    fo = None
    comments = ReadComments(input_files, font_size)
    try:
        if output_file:
            fo = ConvertToFile(output_file, 'w', encoding='utf-8-sig', errors='replace', newline='\r\n')
        else:
            fo = sys.stdout
        ProcessComments(comments, fo, stage_width, stage_height, reserve_blank, font_face, font_size, text_opacity, comment_duration, is_reduce_comments, progress_callback)
    finally:
        if output_file and fo != output_file:
            fo.close()


@export
def ReadComments(input_files, font_size=25.0, progress_callback=None):
    if isinstance(input_files, bytes):
        input_files = str(bytes(input_files).decode('utf-8', 'replace'))
    if isinstance(input_files, str):
        input_files = [input_files]
    else:
        input_files = list(input_files)
    comments = []
    for idx, i in enumerate(input_files):
        if progress_callback:
            progress_callback(idx, len(input_files))
        with ConvertToFile(i, 'r', encoding='utf-8', errors='replace') as f:
            CommentProcessor = GetCommentProcessor(f)
            if not CommentProcessor:
                raise ValueError(_('Unknown comment file format: %s') % i)
            comments.extend(CommentProcessor(FilterBadChars(f), font_size))
    if progress_callback:
        progress_callback(len(input_files), len(input_files))
    comments.sort()
    return comments


@export
def GetCommentProcessor(input_file):
    return CommentFormatMap[ProbeCommentFormat(input_file)]


def main():
    if len(sys.argv) == 1:
        sys.argv.append('--help')
    parser = argparse.ArgumentParser()
    parser.add_argument('-o', '--output', metavar=_('OUTPUT'), help=_('Output file'))
    parser.add_argument('-s', '--size', metavar=_('WIDTHxHEIGHT'), required=True, help=_('Stage size in pixels'))
    parser.add_argument('-fn', '--font', metavar=_('FONT'), help=_('Specify font face [default: %s]') % _('(FONT) sans-serif')[7:], default=_('(FONT) sans-serif')[7:])
    parser.add_argument('-fs', '--fontsize', metavar=_('SIZE'), help=(_('Default font size [default: %s]') % 25), type=float, default=25.0)
    parser.add_argument('-a', '--alpha', metavar=_('ALPHA'), help=_('Text opacity'), type=float, default=1.0)
    parser.add_argument('-l', '--lifetime', metavar=_('SECONDS'), help=_('Duration of comment display [default: %s]') % 5, type=float, default=5.0)
    parser.add_argument('-p', '--protect', metavar=_('HEIGHT'), help=_('Reserve blank on the bottom of the stage'), type=int, default=0)
    parser.add_argument('-r', '--reduce', action='store_true', help=_('Reduce the amount of comments if stage is full'))
    parser.add_argument('file', metavar=_('FILE'), nargs='+', help=_('Comment file to be processed'))
    args = parser.parse_args()
    try:
        width, height = str(args.size).split('x', 1)
        width = int(width)
        height = int(height)
    except ValueError:
        raise ValueError(_('Invalid stage size: %r') % args.size)
    Danmaku2ASS(args.file, args.output, width, height, args.protect, args.font, args.fontsize, args.alpha, args.lifetime, args.reduce)


if __name__ == '__main__':
    main()


================================================
FILE: 2020/dmzj/cartoon.py
================================================
import requests
import os
import re
from bs4 import BeautifulSoup
from contextlib import closing
from tqdm import tqdm
import time

"""
    Author:
        Jack Cui
    Wechat:
        https://mp.weixin.qq.com/s/OCWwRVDFNslIuKyiCVUoTA
"""

# 创建保存目录
save_dir = '妖神记'
if save_dir not in os.listdir('./'):
    os.mkdir(save_dir)
    
target_url = "https://www.dmzj.com/info/yaoshenji.html"

# 获取动漫章节链接和章节名
r = requests.get(url = target_url)
bs = BeautifulSoup(r.text, 'lxml')
list_con_li = bs.find('ul', class_="list_con_li")
cartoon_list = list_con_li.find_all('a')
chapter_names = []
chapter_urls = []
for cartoon in cartoon_list:
    href = cartoon.get('href')
    name = cartoon.text
    chapter_names.insert(0, name)
    chapter_urls.insert(0, href)

# 下载漫画 
for i, url in enumerate(tqdm(chapter_urls)):
    download_header = {
        'Referer': url
    }
    name = chapter_names[i]
    # 去掉.
    while '.' in name:
        name = name.replace('.', '')
    chapter_save_dir = os.path.join(save_dir, name)
    if name not in os.listdir(save_dir):
        os.mkdir(chapter_save_dir)
    r = requests.get(url = url)
    html = BeautifulSoup(r.text, 'lxml')
    script_info = html.script
    pics = re.findall('\d{13,14}', str(script_info))
    for j, pic in enumerate(pics):
        if len(pic) == 13:
            pics[j] = pic + '0'
    pics = sorted(pics, key=lambda x:int(x))
    chapterpic_hou = re.findall('\|(\d{5})\|', str(script_info))[0]
    chapterpic_qian = re.findall('\|(\d{4})\|', str(script_info))[0]
    for idx, pic in enumerate(pics):
        if pic[-1] == '0':
            url = 'https://images.dmzj.com/img/chapterpic/' + chapterpic_qian + '/' + chapterpic_hou + '/' + pic[:-1] + '.jpg'
        else:
            url = 'https://images.dmzj.com/img/chapterpic/' + chapterpic_qian + '/' + chapterpic_hou + '/' + pic + '.jpg'
        pic_name = '%03d.jpg' % (idx + 1)
        pic_save_path = os.path.join(chapter_save_dir, pic_name)
        with closing(requests.get(url, headers = download_header, stream = True)) as response:  
            chunk_size = 1024  
            content_size = int(response.headers['content-length'])  
            if response.status_code == 200:
                with open(pic_save_path, "wb") as file:  
                    for data in response.iter_content(chunk_size=chunk_size):  
                        file.write(data)  
            else:
                print('链接异常')
    time.sleep(10)

================================================
FILE: 2020/taobao/taobao_login.py
================================================
from selenium import webdriver
import logging
import time
from selenium.common.exceptions import NoSuchElementException, WebDriverException
from retrying import retry
from selenium.webdriver import ActionChains

import pyautogui
pyautogui.PAUSE = 0.5 

logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

"""
微信公众号 JackCui-AI
更多精彩教程、源码尽在微信公众号
"""

class taobao():
    def __init__(self):
        self.browser = webdriver.Chrome("path\to\your\chromedriver.exe")
        # 最大化窗口
        self.browser.maximize_window()
        self.browser.implicitly_wait(5)
        self.domain = 'http://www.taobao.com'
        self.action_chains = ActionChains(self.browser)

    def login(self, username, password):
        while True:
            self.browser.get(self.domain)
            time.sleep(1)
            
            #会xpath可以简化这几步
            #self.browser.find_element_by_class_name('h').click()
            #self.browser.find_element_by_id('fm-login-id').send_keys(username)
            #self.browser.find_element_by_id('fm-login-password').send_keys(password)
            self.browser.find_element_by_xpath('//*[@id="J_SiteNavLogin"]/div[1]/div[1]/a[1]').click()
            self.browser.find_element_by_xpath('//*[@id="fm-login-id"]').send_keys(username)
            self.browser.find_element_by_xpath('//*[@id="fm-login-password"]').send_keys(password)
            time.sleep(1)

            try:
                # 出现验证码,滑动验证
                slider = self.browser.find_element_by_xpath("//span[contains(@class, 'btn_slide')]")
                if slider.is_displayed():
                    # 拖拽滑块
                    self.action_chains.drag_and_drop_by_offset(slider, 258, 0).perform()
                    time.sleep(0.5)
                    # 释放滑块,相当于点击拖拽之后的释放鼠标
                    self.action_chains.release().perform()
            except (NoSuchElementException, WebDriverException):
                logger.info('未出现登录验证码')
            
            # 会xpath可以简化点击登陆按钮,但都无法登录,需要使用 pyautogui 完成点击事件
            #self.browser.find_element_by_class_name('password-login').click()
            #self.browser.find_element_by_xpath('//*[@id="login-form"]/div[4]/button').click()
            # 图片地址
            coords = pyautogui.locateOnScreen('1.png')
            x, y = pyautogui.center(coords)
            pyautogui.leftClick(x, y)
            
            nickname = self.get_nickname()
            if nickname:
                logger.info('登录成功,呢称为:' + nickname)
                break
            logger.debug('登录出错,5s后继续登录')
            time.sleep(5)

    def get_nickname(self):
        self.browser.get(self.domain)
        time.sleep(0.5)
        try:
            return self.browser.find_element_by_class_name('site-nav-user').text
        except NoSuchElementException:
            return ''
            
    def clear_cart(self):
        cart = self.browser.find_element_by_xpath('//*[@id="J_MiniCart"]')
        if cart.is_displayed():
            cart.click()
        select = self.browser.find_element_by_xpath('//*[@id="J_SelectAll1"]/div/label')
        if select.is_displayed():
            select.click()
        time.sleep(0.5)
        go = self.browser.find_element_by_xpath('//*[@id="J_Go"]')
        if go.is_displayed():
            go.click()
        submit = self.browser.find_element_by_xpath('//*[@id="submitOrderPC_1"]/div/a[2]')
        if submit.is_displayed():
            submit.click()


if __name__ == '__main__':
    # 填入自己的用户名,密码
    username = 'username'
    password = 'password'
    tb = taobao()
    tb.login(username, password)
    #tb.clear_cart()


================================================
FILE: 2020/xbqg/xbqg_spider.py
================================================
import requests
import time
from tqdm import tqdm
from bs4 import BeautifulSoup

"""
    Author:
        Jack Cui
    Wechat:
        https://mp.weixin.qq.com/s/OCWwRVDFNslIuKyiCVUoTA
"""

def get_content(target):
    req = requests.get(url = target)
    req.encoding = 'utf-8'
    html = req.text
    bf = BeautifulSoup(html, 'lxml')
    texts = bf.find('div', id='content')
    content = texts.text.strip().split('\xa0'*4)
    return content

if __name__ == '__main__':
    server = 'https://www.xsbiquge.com'
    book_name = '诡秘之主.txt'
    target = 'https://www.xsbiquge.com/15_15338/'
    req = requests.get(url = target)
    req.encoding = 'utf-8'
    html = req.text
    chapter_bs = BeautifulSoup(html, 'lxml')
    chapters = chapter_bs.find('div', id='list')
    chapters = chapters.find_all('a')
    for chapter in tqdm(chapters):
        chapter_name = chapter.string
        url = server + chapter.get('href')
        content = get_content(url)
        with open(book_name, 'a', encoding='utf-8') as f:
            f.write(chapter_name)
            f.write('\n')
            f.write('\n'.join(content))
            f.write('\n')

================================================
FILE: 2020/zycjw/video_download.py
================================================
import os
import ffmpy3
import requests
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool as ThreadPool

search_keyword = '越狱第一季'
search_url = 'http://www.jisudhw.com/index.php'
serach_params = {
    'm': 'vod-search'
}
serach_headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
    'Referer': 'http://www.jisudhw.com/',
    'Origin': 'http://www.jisudhw.com',
    'Host': 'www.jisudhw.com'
}
serach_datas = {
    'wd': search_keyword,
    'submit': 'search'
}


video_dir = ''
    
r = requests.post(url=search_url, params=serach_params, headers=serach_headers, data=serach_datas)
r.encoding = 'utf-8'
server = 'http://www.jisudhw.com'
search_html = BeautifulSoup(r.text, 'lxml')
search_spans = search_html.find_all('span', class_='xing_vb4')
for span in search_spans:
    url = server + span.a.get('href')
    name = span.a.string
    print(name)
    print(url)
    video_dir = name
    if name not in os.listdir('./'):
        os.mkdir(name)
        
    detail_url = url
    r = requests.get(url = detail_url)
    r.encoding = 'utf-8'
    detail_bf = BeautifulSoup(r.text, 'lxml')
    num = 1
    serach_res = {}
    for each_url in detail_bf.find_all('input'):
        if 'm3u8' in each_url.get('value'):
            url = each_url.get('value')
            if url not in serach_res.keys():
                serach_res[url] = num
            print('第%03d集:' % num)
            print(url)
            num += 1

def downVideo(url):
    num = serach_res[url]
    name = os.path.join(video_dir, '第%03d集.mp4' % num)
    ffmpy3.FFmpeg(inputs={url: None}, outputs={name:None}).run()
            
# 开8个线程池
pool = ThreadPool(8)
results = pool.map(downVideo, serach_res.keys())
pool.close()
pool.join()

================================================
FILE: Netease/Netease.py
================================================
# -*- coding:utf-8 -*-
import requests, hashlib, sys, click, re, base64, binascii, json, os
from Crypto.Cipher import AES
from http import cookiejar

"""
Website:http://cuijiahua.com
Author:Jack Cui
Refer:https://github.com/darknessomi/musicbox
"""

class Encrypyed():
	"""
	解密算法
	"""
	def __init__(self):
		self.modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
		self.nonce = '0CoJUm6Qyw8W8jud'
		self.pub_key = '010001'

	# 登录加密算法, 基于https://github.com/stkevintan/nw_musicbox脚本实现
	def encrypted_request(self, text):
		text = json.dumps(text)
		sec_key = self.create_secret_key(16)
		enc_text = self.aes_encrypt(self.aes_encrypt(text, self.nonce), sec_key.decode('utf-8'))
		enc_sec_key = self.rsa_encrpt(sec_key, self.pub_key, self.modulus)
		data = {'params': enc_text, 'encSecKey': enc_sec_key}
		return data

	def aes_encrypt(self, text, secKey):
		pad = 16 - len(text) % 16
		text = text + chr(pad) * pad
		encryptor = AES.new(secKey.encode('utf-8'), AES.MODE_CBC, b'0102030405060708')
		ciphertext = encryptor.encrypt(text.encode('utf-8'))
		ciphertext = base64.b64encode(ciphertext).decode('utf-8')
		return ciphertext

	def rsa_encrpt(self, text, pubKey, modulus):
		text = text[::-1]
		rs = pow(int(binascii.hexlify(text), 16), int(pubKey, 16), int(modulus, 16))
		return format(rs, 'x').zfill(256)

	def create_secret_key(self, size):
		return binascii.hexlify(os.urandom(size))[:16]


class Song():
	"""
	歌曲对象,用于存储歌曲的信息
	"""
	def __init__(self, song_id, song_name, song_num, song_url=None):
		self.song_id = song_id
		self.song_name = song_name
		self.song_num = song_num
		self.song_url = '' if song_url is None else song_url

class Crawler():
	"""
	网易云爬取API
	"""
	def __init__(self, timeout=60, cookie_path='.'):
		self.headers = {
			'Accept': '*/*',
			'Accept-Encoding': 'gzip,deflate,sdch',
			'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
			'Connection': 'keep-alive',
			'Content-Type': 'application/x-www-form-urlencoded',
			'Host': 'music.163.com',
			'Referer': 'http://music.163.com/search/',
			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
		}
		self.session = requests.Session()
		self.session.headers.update(self.headers)
		self.session.cookies = cookiejar.LWPCookieJar(cookie_path)
		self.download_session = requests.Session()
		self.timeout = timeout
		self.ep = Encrypyed()

	def post_request(self, url, params):
		"""
		Post请求
		:return: 字典
		"""

		data = self.ep.encrypted_request(params)
		resp = self.session.post(url, data=data, timeout=self.timeout)
		result = resp.json()
		if result['code'] != 200:
			click.echo('post_request error')
		else:
		    return result

	def search(self, search_content, search_type, limit=9):
		"""
		搜索API
		:params search_content: 搜索内容
		:params search_type: 搜索类型
		:params limit: 返回结果数量
		:return: 字典.
		"""

		url = 'http://music.163.com/weapi/cloudsearch/get/web?csrf_token='
		params = {'s': search_content, 'type': search_type, 'offset': 0, 'sub': 'false', 'limit': limit}
		result = self.post_request(url, params)
		return result

	def search_song(self, song_name, song_num, quiet=True, limit=9):
		"""
		根据音乐名搜索
		:params song_name: 音乐名
		:params song_num: 下载的歌曲数
		:params quiet: 自动选择匹配最优结果
		:params limit: 返回结果数量
		:return: Song独享
		"""

		result = self.search(song_name, search_type=1, limit=limit)

		if result['result']['songCount'] <= 0:
			click.echo('Song {} not existed.'.format(song_name))
		else:
			songs = result['result']['songs']
			if quiet:
				song_id, song_name = songs[0]['id'], songs[0]['name']
				song = Song(song_id=song_id, song_name=song_name, song_num=song_num)
				return song

	def get_song_url(self, song_id, bit_rate=320000):
		"""
		获得歌曲的下载地址
		:params song_id: 音乐ID<int>.
		:params bit_rate: {'MD 128k': 128000, 'HD 320k': 320000}
		:return: 歌曲下载地址
		"""

		url = 'http://music.163.com/weapi/song/enhance/player/url?csrf_token='
		csrf = ''
		params = {'ids': [song_id], 'br': bit_rate, 'csrf_token': csrf}
		result = self.post_request(url, params)
		# 歌曲下载地址
		song_url = result['data'][0]['url']

		# 歌曲不存在
		if song_url is None:
			click.echo('Song {} is not available due to copyright issue.'.format(song_id))
		else:
			return song_url

	def get_song_by_url(self, song_url, song_name, song_num, folder):
		"""
		下载歌曲到本地
		:params song_url: 歌曲下载地址
		:params song_name: 歌曲名字
		:params song_num: 下载的歌曲数
		:params folder: 保存路径
		"""
		if not os.path.exists(folder):
			os.makedirs(folder)
		fpath = os.path.join(folder, str(song_num) + '_' + song_name + '.mp3')
		if sys.platform == 'win32' or sys.platform == 'cygwin':
			valid_name = re.sub(r'[<>:"/\\|?*]', '', song_name)
			if valid_name != song_name:
				click.echo('{} will be saved as: {}.mp3'.format(song_name, valid_name))
				fpath = os.path.join(folder, str(song_num) + '_' + valid_name + '.mp3')
		
		if not os.path.exists(fpath):
			resp = self.download_session.get(song_url, timeout=self.timeout, stream=True)
			length = int(resp.headers.get('content-length'))
			label = 'Downloading {} {}kb'.format(song_name, int(length/1024))

			with click.progressbar(length=length, label=label) as progressbar:
				with open(fpath, 'wb') as song_file:
					for chunk in resp.iter_content(chunk_size=1024):
						if chunk:
							song_file.write(chunk)
							progressbar.update(1024)


class Netease():
	"""
	网易云音乐下载
	"""
	def __init__(self, timeout, folder, quiet, cookie_path):
		self.crawler = Crawler(timeout, cookie_path)
		self.folder = '.' if folder is None else folder
		self.quiet = quiet

	def download_song_by_search(self, song_name, song_num):
		"""
		根据歌曲名进行搜索
		:params song_name: 歌曲名字
		:params song_num: 下载的歌曲数
		"""

		try:
			song = self.crawler.search_song(song_name, song_num, self.quiet)
		except:
			click.echo('download_song_by_serach error')
		# 如果找到了音乐, 则下载
		if song != None:
			self.download_song_by_id(song.song_id, song.song_name, song.song_num, self.folder)

	def download_song_by_id(self, song_id, song_name, song_num, folder='.'):
		"""
		通过歌曲的ID下载
		:params song_id: 歌曲ID
		:params song_name: 歌曲名
		:params song_num: 下载的歌曲数
		:params folder: 保存地址
		"""
		try:
			url = self.crawler.get_song_url(song_id)
			# 去掉非法字符
			song_name = song_name.replace('/', '')
			song_name = song_name.replace('.', '')
			self.crawler.get_song_by_url(url, song_name, song_num, folder)

		except:
			click.echo('download_song_by_id error')


if __name__ == '__main__':
	timeout = 60
	output = 'Musics'
	quiet = True
	cookie_path = 'Cookie'
	netease = Netease(timeout, output, quiet, cookie_path)
	music_list_name = 'music_list.txt'
	# 如果music列表存在, 那么开始下载
	if os.path.exists(music_list_name):
		with open(music_list_name, 'r') as f:
			music_list = list(map(lambda x: x.strip(), f.readlines()))
		for song_num, song_name in enumerate(music_list):
			netease.download_song_by_search(song_name,song_num + 1)
	else:
		click.echo('music_list.txt not exist.')

================================================
FILE: Netease/music_list.txt
================================================
風見鶏
外婆的话【不才】
We Don't Talk Anymore
【电吉他】《青鸟》
小棋童
千本桜(古筝版)
妄为
借我
你到底有没有爱过我
七月上


================================================
FILE: README.md
================================================
# 注:2020年最新连载教程请移步:[Python Spider 2020](https://github.com/Jack-Cherish/python-spider/tree/master/2020 "Python Spider 2020")

免责声明:

大家请以学习为目的使用本仓库,爬虫违法违规的案件:https://github.com/HiddenStrawberry/Crawler_Illegal_Cases_In_China

本仓库的所有内容仅供学习和参考之用,禁止用于商业用途。任何人或组织不得将本仓库的内容用于非法用途或侵犯他人合法权益。本仓库所涉及的爬虫技术仅用于学习和研究,不得用于对其他平台进行大规模爬虫或其他非法行为。对于因使用本仓库内容而引起的任何法律责任,本仓库不承担任何责任。使用本仓库的内容即表示您同意本免责声明的所有条款和条件。

# Python Spider

原创文章每周最少两篇,**后续最新文章**会在[【公众号】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)首发,视频[【B站】](https://space.bilibili.com/331507846)首发,大家可以加我[【微信】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)进**交流群**,技术交流或提意见都可以,欢迎**Star**!

<p align="center">
  <a href="https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg" target="_blank"><img src="https://img.shields.io/badge/weChat-微信群-blue.svg" alt="微信群"></a>
  <a href="https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg" target="_blank"><img src="https://img.shields.io/badge/%E5%85%AC%E4%BC%97%E5%8F%B7-Jack%20Cui-lightgrey.svg" alt="公众号"></a>
  <a href="https://space.bilibili.com/331507846"><img src="https://img.shields.io/badge/bilibili-哔哩哔哩-critical" alt="B站"></a>
  <a href="https://www.zhihu.com/people/Jack--Cui" target="_blank"><img src="https://img.shields.io/badge/zhihu-知乎-informational" alt="知乎"></a>
  <a href="https://blog.csdn.net/c406495762" target="_blank"><img src="https://img.shields.io/badge/csdn-CSDN-red.svg" alt="CSDN"></a>
  <a href="https://www.toutiao.com/c/user/token/MS4wLjABAAAA5gJtmezUJ6vli2hZvnN13iLnzKLpuF8gGHeS0iVlmNs/" target="_blank"><img src="https://img.shields.io/badge/toutiao-%E5%A4%B4%E6%9D%A1-important.svg" alt="头条"></a>
  <a href="https://juejin.im/user/5ea2ca74e51d4546b50d5f9f" target="_blank"><img src="https://img.shields.io/badge/juejin-掘金-blue.svg" alt="掘金"></a>
</p>

## 声明

* 代码、教程**仅限于学习交流,请勿用于任何商业用途!**

## 目录

* [爬虫小工具](#爬虫小工具)
    * [文件下载小助手](https://github.com/Jack-Cherish/python-spider/blob/master/downloader.py "悬停显示")
* [爬虫实战](#爬虫实战)
    * [笔趣看小说下载](https://github.com/Jack-Cherish/python-spider/blob/master/biqukan.py "悬停显示")
    * [百度文库免费文章下载助手_rev1](https://github.com/Jack-Cherish/python-spider/blob/master/baiduwenku.py "悬停显示")
    * [百度文库免费文章下载助手_rev2](https://github.com/Jack-Cherish/python-spider/blob/master/baiduwenku_pro_1.py "悬停显示")
    * [《帅啊》网帅哥图片下载](https://github.com/Jack-Cherish/python-spider/blob/master/shuaia.py "悬停显示")
    * [构建代理IP池](https://github.com/Jack-Cherish/python-spider/blob/master/daili.py "悬停显示")
    * [《火影忍者》漫画下载](https://github.com/Jack-Cherish/python-spider/tree/master/cartoon "悬停显示")
    * [财务报表下载小助手](https://github.com/Jack-Cherish/python-spider/blob/master/financical.py "悬停显示")
    * [一小时入门网络爬虫](https://github.com/Jack-Cherish/python-spider/tree/master/one_hour_spider "悬停显示")
    * [抖音App视频下载](https://github.com/Jack-Cherish/python-spider/tree/master/douyin "悬停显示")
    * [GEETEST验证码识别](https://github.com/Jack-Cherish/python-spider/blob/master/geetest.py "悬停显示")
    * [12306抢票小助手](https://github.com/Jack-Cherish/python-spider/blob/master/12306.py "悬停显示")
    * [百万英雄答题辅助系统](https://github.com/Jack-Cherish/python-spider/tree/master/baiwan "悬停显示")   
    * [网易云音乐免费音乐批量下载](https://github.com/Jack-Cherish/python-spider/tree/master/Netease "悬停显示")
    * [B站免费视频和弹幕批量下载](https://github.com/Jack-Cherish/python-spider/tree/master/bilibili "悬停显示")
    * [京东商品晒单图下载](https://github.com/Jack-Cherish/python-spider/tree/master/dingdong "悬停显示")
    * [正方教务管理系统个人信息查询](https://github.com/Jack-Cherish/python-spider/tree/master/zhengfang_system_spider "悬停显示")
* [其它](#其它)

## 爬虫小工具

* downloader.py:文件下载小助手

	一个可以用于下载图片、视频、文件的小工具,有下载进度显示功能。稍加修改即可添加到自己的爬虫中。
	
	动态示意图:
	
	![image](https://raw.githubusercontent.com/Jack-Cherish/Pictures/master/9.gif)

## 爬虫实战
 
 * biqukan.py:《笔趣看》盗版小说网站,爬取小说工具

	第三方依赖库安装:

		pip3 install beautifulsoup4

	使用方法:

		python biqukan.py

 * baiduwenku.py: 百度文库word文章爬取
	
	原理说明:http://blog.csdn.net/c406495762/article/details/72331737
	
	代码不完善,没有进行打包,不具通用性,纯属娱乐。
	
 * shuaia.py: 爬取《帅啊》网,帅哥图片

	《帅啊》网URL:http://www.shuaia.net/index.html

	原理说明:http://blog.csdn.net/c406495762/article/details/72597755
	
	第三方依赖库安装:
	
		pip3 install requests beautifulsoup4
		
 * daili.py: 构建代理IP池

	原理说明:http://blog.csdn.net/c406495762/article/details/72793480
	
	
 * carton: 使用Scrapy爬取《火影忍者》漫画

	代码可以爬取整个《火影忍者》漫画所有章节的内容,保存到本地。更改地址,可以爬取其他漫画。保存地址可以在settings.py中修改。
	
	动漫网站:http://comic.kukudm.com/
	
	原理说明:http://blog.csdn.net/c406495762/article/details/72858983
	
 * hero.py: 《王者荣耀》推荐出装查询小助手

	网页爬取已经会了,想过爬取手机APP里的内容吗?
	
	原理说明:http://blog.csdn.net/c406495762/article/details/76850843
	
 * financical.py: 财务报表下载小助手

	爬取的数据存入数据库会吗?《跟股神巴菲特学习炒股之财务报表入库(MySQL)》也许能给你一些思路。
	
	原理说明:http://blog.csdn.net/c406495762/article/details/77801899
	
	动态示意图:
	
	![image](https://raw.githubusercontent.com/Jack-Cherish/Pictures/master/10.gif)
	
 * one_hour_spider:一小时入门Python3网络爬虫。

	原理说明:
	
	 * 知乎:https://zhuanlan.zhihu.com/p/29809609
	 * CSDN:http://blog.csdn.net/c406495762/article/details/78123502
	
	本次实战内容有:
	
	 * 网络小说下载(静态网站)-biqukan
	 * 优美壁纸下载(动态网站)-unsplash
	 * 视频下载
	 
 * douyin.py:抖音App视频下载
 
	抖音App的视频下载,就是普通的App爬取。

	原理说明:
	
	 * 个人网站:http://cuijiahua.com/blog/2018/03/spider-5.html
	
 * douyin_pro:抖音App视频下载(升级版)
 
	抖音App的视频下载,添加视频解析网站,支持无水印视频下载,使用第三方平台解析。

	原理说明:
	
	 * 个人网站:http://cuijiahua.com/blog/2018/03/spider-5.html
	 
 * douyin:抖音App视频下载(升级版2)
 
	抖音App的视频下载,添加视频解析网站,支持无水印视频下载,通过url解析,无需第三方平台。
	
	原理说明:
	
	 * 个人网站:http://cuijiahua.com/blog/2018/03/spider-5.html
	 
	动态示意图:
	
	![image](https://github.com/Jack-Cherish/Pictures/blob/master/14.gif)
	
 * geetest.py:GEETEST验证码识别
 
 	原理说明:
	
	 无
	
 * 12306.py:用Python抢火车票简单代码
 
	可以自己慢慢丰富,蛮简单,有爬虫基础很好操作,没有原理说明。
	
 * baiwan:百万英雄辅助答题
 
	效果图:
	
	![image](https://github.com/Jack-Cherish/Pictures/blob/master/11.gif)
	
	原理说明:
	
	* 个人网站:http://cuijiahua.com/blog/2018/01/spider_3.html
	
  	功能介绍:
	
	服务器端,使用Python(baiwan.py)通过抓包获得的接口获取答题数据,解析之后通过百度知道搜索接口匹配答案,将最终匹配的结果写入文件(file.txt)。
	
	手机抓包不会的朋友,可以看下我的早期[手机APP抓包教程](http://blog.csdn.net/c406495762/article/details/76850843 "悬停显示")。
	
	Node.js(app.js)每隔1s读取一次file.txt文件,并将读取结果通过socket.io推送给客户端(index.html)。
	
	亲测答题延时在3s左右。
	
	声明:没做过后端和前端,花了一天时间,现学现卖弄好的,javascript也是现看现用,百度的程序,调试调试而已。可能有很多用法比较low的地方,用法不对,请勿见怪,有大牛感兴趣,可以自行完善。

 * Netease:根据歌单下载网易云音乐
 	
	效果图:
	
	![image](https://github.com/Jack-Cherish/Pictures/blob/master/13.gif)
	
	原理说明:
	
	暂无
	
	功能介绍:
	
	根据music_list.txt文件里的歌单的信息下载网易云音乐,将自己喜欢的音乐进行批量下载。

 * bilibili:B站视频和弹幕批量下载
 	
	原理说明:
	
	暂无
	
	使用说明:
	
        python bilibili.py -d 猫 -k 猫 -p 10

        三个参数:
        -d	保存视频的文件夹名
        -k	B站搜索的关键字
        -p	下载搜索结果前多少页
	
 * jingdong:京东商品晒单图下载
 
 	效果图:
	
	![image](https://github.com/Jack-Cherish/Pictures/blob/master/jd.gif)
 	
	原理说明:
	
	暂无
	
	使用说明:
	
        python jd.py -k 芒果
	
         三个参数:
        -d	保存图片的路径,默认为fd.py文件所在文件夹
        -k	搜索关键词
        -n  	下载商品的晒单图个数,即n个商店的晒单图

 * zhengfang_system_spider:对正方教务管理系统个人课表,个人学生成绩,绩点等简单爬取
 
 	效果图:
	
	![image](/zhengfang_system_spider/screenshot/zf.png)
 	
	原理说明:
	
	暂无
	
	使用说明:
	
        cd zhengfang_system_spider
        pip install -r requirements.txt
        python spider.py

## 其它

 * 欢迎 Pull requests,感谢贡献。
 
 更多精彩,敬请期待!

<a name="微信"></a>  <a name="公众号"></a>

<img src="https://ftp.bmp.ovh/imgs/2020/07/112254f0199e3d4f.jpg" alt="wechat" width="400" height="200" align="bottom" />


================================================
FILE: baiduwenku.py
================================================
# -*- coding:UTF-8 -*-
from selenium import webdriver
from bs4 import BeautifulSoup
import re
import time

if __name__ == '__main__':

	options = webdriver.ChromeOptions()
	options.add_argument('user-agent="Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19"')
	driver = webdriver.Chrome('J:\迅雷下载\chromedriver.exe', chrome_options=options)
	driver.get('https://wenku.baidu.com/view/aa31a84bcf84b9d528ea7a2c.html')

	html = driver.page_source
	bf1 = BeautifulSoup(html, 'lxml')
	result = bf1.find_all(class_='rtcspage')
	bf2 = BeautifulSoup(str(result[0]), 'lxml')
	title = bf2.div.div.h1.string
	pagenum = bf2.find_all(class_='size')
	pagenum = BeautifulSoup(str(pagenum), 'lxml').span.string
	pagepattern = re.compile('页数:(\d+)页')
	num = int(pagepattern.findall(pagenum)[0])
	print('文章标题:%s' % title)
	print('文章页数:%d' % num)


	while True:
		num = num / 5.0
		html = driver.page_source
		bf1 = BeautifulSoup(html, 'lxml')
		result = bf1.find_all(class_='rtcspage')
		for each_result in result:
			bf2 = BeautifulSoup(str(each_result), 'lxml')
			texts = bf2.find_all('p')
			for each_text in texts:
				main_body = BeautifulSoup(str(each_text), 'lxml')
				for each in main_body.find_all(True):
					if each.name == 'span':
						print(each.string.replace('\xa0',''),end='')
					elif each.name == 'br':
						print('')
			print('\n')
		if num > 1:
			page = driver.find_elements_by_xpath("//div[@class='page']")
			driver.execute_script('arguments[0].scrollIntoView();', page[-1]) #拖动到可见的元素去
			nextpage = driver.find_element_by_xpath("//a[@data-fun='next']")
			nextpage.click()
			time.sleep(3)
		else:
			break

================================================
FILE: baiduwenku_pro_1.py
================================================
import requests
import re
import json
import os

session = requests.session()


def fetch_url(url):
    return session.get(url).content.decode('gbk')


def get_doc_id(url):
    return re.findall('view/(.*).html', url)[0]


def parse_type(content):
    return re.findall(r"docType.*?\:.*?\'(.*?)\'\,", content)[0]


def parse_title(content):
    return re.findall(r"title.*?\:.*?\'(.*?)\'\,", content)[0]


def parse_doc(content):
    result = ''
    url_list = re.findall('(https.*?0.json.*?)\\\\x22}', content)
    url_list = [addr.replace("\\\\\\/", "/") for addr in url_list]
    for url in url_list[:-5]:
        content = fetch_url(url)
        y = 0
        txtlists = re.findall('"c":"(.*?)".*?"y":(.*?),', content)
        for item in txtlists:
            if not y == item[1]:
                y = item[1]
                n = '\n'
            else:
                n = ''
            result += n
            result += item[0].encode('utf-8').decode('unicode_escape', 'ignore')
    return result


def parse_txt(doc_id):
    content_url = 'https://wenku.baidu.com/api/doc/getdocinfo?callback=cb&doc_id=' + doc_id
    content = fetch_url(content_url)
    md5 = re.findall('"md5sum":"(.*?)"', content)[0]
    pn = re.findall('"totalPageNum":"(.*?)"', content)[0]
    rsign = re.findall('"rsign":"(.*?)"', content)[0]
    content_url = 'https://wkretype.bdimg.com/retype/text/' + doc_id + '?rn=' + pn + '&type=txt' + md5 + '&rsign=' + rsign
    content = json.loads(fetch_url(content_url))
    result = ''
    for item in content:
        for i in item['parags']:
            result += i['c'].replace('\\r', '\r').replace('\\n', '\n')
    return result


def parse_other(doc_id):
    content_url = "https://wenku.baidu.com/browse/getbcsurl?doc_id=" + doc_id + "&pn=1&rn=99999&type=ppt"
    content = fetch_url(content_url)
    url_list = re.findall('{"zoom":"(.*?)","page"', content)
    url_list = [item.replace("\\", '') for item in url_list]
    if not os.path.exists(doc_id):
        os.mkdir(doc_id)
    for index, url in enumerate(url_list):
        content = session.get(url).content
        path = os.path.join(doc_id, str(index) + '.jpg')
        with open(path, 'wb') as f:
            f.write(content)
    print("图片保存在" + doc_id + "文件夹")


def save_file(filename, content):
    with open(filename, 'w', encoding='utf8') as f:
        f.write(content)
        print('已保存为:' + filename)


# test_txt_url = 'https://wenku.baidu.com/view/cbb4af8b783e0912a3162a89.html?from=search'
# test_ppt_url = 'https://wenku.baidu.com/view/2b7046e3f78a6529657d5376.html?from=search'
# test_pdf_url = 'https://wenku.baidu.com/view/dd6e15c1227916888586d795.html?from=search'
# test_xls_url = 'https://wenku.baidu.com/view/eb4a5bb7312b3169a551a481.html?from=search'
def main():
    url = input('请输入要下载的文库URL地址')
    content = fetch_url(url)
    doc_id = get_doc_id(url)
    type = parse_type(content)
    title = parse_title(content)
    if type == 'doc':
        result = parse_doc(content)
        save_file(title + '.txt', result)
    elif type == 'txt':
        result = parse_txt(doc_id)
        save_file(title + '.txt', result)
    else:
        parse_other(doc_id)


if __name__ == "__main__":
    main()


================================================
FILE: baiwan/app.js
================================================
var http = require('http');
var fs = require('fs');
var schedule = require("node-schedule"); 
var message = {};
var count = 0;
var server = http.createServer(function (req,res){
    fs.readFile('./index.html',function(error,data){
        res.writeHead(200,{'Content-Type':'text/html'});
        res.end(data,'utf-8');
    });
}).listen(80);
console.log('Server running!');
var lineReader = require('line-reader');
function messageGet(){
    lineReader.eachLine('file.txt', function(line, last) {
        count++;
        var name = 'line' + count;
        console.log(name);
	console.log(line);
        message[name] = line;
    });  
    if(count == 25){
    	count = 0;
    }
    else{
    	for(var i = count+1; i <= 25; i++){
  	    var name = 'line' + i;
            message[name] = 'f';
	}
  	count = 0;
    }
}
var io = require('socket.io').listen(server);
var rule = new schedule.RecurrenceRule();
var times = [];
for(var i=1; i<1800; i++){
    times.push(i);
}
rule.second = times;
schedule.scheduleJob(rule, function(){
        messageGet();
});
io.sockets.on('connection',function(socket){
       // console.log('User connected' + count + 'user(s) present');
        socket.emit('users',message);
        socket.broadcast.emit('users',message);

    socket.on('disconnect',function(){
        console.log('User disconnected');
        //socket.broadcast.emit('users',message);  
    });
});


================================================
FILE: baiwan/baiwan.py
================================================
# -*-coding:utf-8 -*-
import requests
from lxml import etree
from bs4 import BeautifulSoup
import urllib
import time, re, types, os


"""
代码写的匆忙,本来想再重构下,完善好注释再发,但是比较忙,想想算了,所以自行完善吧!写法很不规范,勿见怪。

作者:  Jack Cui
Website:http://cuijiahua.com
注:     本软件仅用于学习交流,请勿用于任何商业用途!
"""

class BaiWan():
	def __init__(self):
		# 百度知道搜索接口
		self.baidu = 'http://zhidao.baidu.com/search?'
		# 百万英雄及接口,每个人的接口都不一样,里面包含的手机信息,因此不公布,请自行抓包,有疑问欢迎留言:http://cuijiahua.com/liuyan.html
		self.api = 'https://api-spe-ttl.ixigua.com/xxxxxxx={}'.format(int(time.time()*1000))

	# 获取答案并解析问题
	def get_question(self):
		to = True
		while to:
			list_dir = os.listdir('./')
			if 'question.txt' not in list_dir:
				fw = open('question.txt', 'w')
				fw.write('百万英雄尚未出题请稍后!')
				fw.close()		
			go = True
			while go:
				req = requests.get(self.api, verify=False)
				req.encoding = 'utf-8'
				html = req.text

				print(html)
				if '*' in html:
					question_start = html.index('*')
					try:
						
						question_end = html.index('?')
					except:
						question_end = html.index('?')
					question = html[question_start:question_end][2:]
					if question != None:
						fr = open('question.txt', 'r')
						text = fr.readline()
						fr.close()
						if text != question:
							print(question)
							go = False
							with open('question.txt', 'w') as f:
								f.write(question)
						else:
							time.sleep(1)
					else:
						to = False
				else:
					to = False

			temp = re.findall(r'[\u4e00-\u9fa5a-zA-Z0-9\+\-\*/]', html[question_end+1:])
			b_index = []
			print(temp)

			for index, each in enumerate(temp):
				if each == 'B':
					b_index.append(index)
				elif each == 'P' and (len(temp) - index) <= 3 :
					b_index.append(index)
					break

			if len(b_index) == 4:
				a = ''.join(temp[b_index[0] + 1:b_index[1]])
				b = ''.join(temp[b_index[1] + 1:b_index[2]])
				c = ''.join(temp[b_index[2] + 1:b_index[3]])
				alternative_answers = [a,b,c]

				if '下列' in question:
					question = a + ' ' + b + ' ' + c + ' ' + question.replace('下列', '')
				elif '以下' in question:
					question = a + ' ' + b + ' ' + c + ' ' + question.replace('以下', '')
			else:
				alternative_answers = []
			# 根据问题和备选答案搜索答案
			self.search(question, alternative_answers)
			time.sleep(1)

	def search(self, question, alternative_answers):
		print(question)
		print(alternative_answers)
		infos = {"word":question}
		# 调用百度接口
		url = self.baidu + 'lm=0&rn=10&pn=0&fr=search&ie=gbk&' + urllib.parse.urlencode(infos, encoding='GB2312')
		print(url)
		headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36',
		}
		sess = requests.Session()
		req = sess.get(url = url, headers=headers, verify=False)
		req.encoding = 'gbk'
		# print(req.text)
		bf = BeautifulSoup(req.text, 'lxml')
		answers = bf.find_all('dd',class_='dd answer')
		for answer in answers:
			print(answer.text)

		# 推荐答案
		recommend = ''
		if alternative_answers != []:
			best = []
			print('\n')
			for answer in answers:
				# print(answer.text)
				for each_answer in alternative_answers:
					if each_answer in answer.text:
						best.append(each_answer)
						print(each_answer,end=' ')
						# print(answer.text)
						print('\n')
						break
			statistics = {}
			for each in best:
				if each not in statistics.keys():
					statistics[each] = 1
				else:
					statistics[each] += 1
			errors = ['没有', '不是', '不对', '不正确','错误','不包括','不包含','不在','错']
			error_list = list(map(lambda x: x in question, errors))
			print(error_list)
			if sum(error_list) >= 1:
				for each_answer in alternative_answers:
					if each_answer not in statistics.items():
						recommend = each_answer
						print('推荐答案:', recommend)
						break
			elif statistics != {}:
				recommend = sorted(statistics.items(), key=lambda e:e[1], reverse=True)[0][0]
				print('推荐答案:', recommend)

		# 写入文件
		with open('file.txt', 'w') as f:
			f.write('问题:' + question)
			f.write('\n')
			f.write('*' * 50)
			f.write('\n')
			if alternative_answers != []:
				f.write('选项:')
				for i in range(len(alternative_answers)):
					f.write(alternative_answers[i])
					f.write('  ')
			f.write('\n')
			f.write('*' * 50)
			f.write('\n')
			f.write('参考答案:\n')
			for answer in answers:
				f.write(answer.text)
				f.write('\n')
			f.write('*' * 50)
			f.write('\n')
			if recommend != '':
				f.write('最终答案请自行斟酌!\t')
				f.write('推荐答案:' + sorted(statistics.items(), key=lambda e:e[1], reverse=True)[0][0])


if __name__ == '__main__':
	bw = BaiWan()
	bw.get_question()

================================================
FILE: baiwan/file.txt
================================================
⣺Ǽ¼
**************************************************
ѡ723  81  101  
**************************************************
ο𰸣

Ƽ
81 ÿİһйžգҲСһڡ August 1, anniversary of the founding of the Chinese People's Liberation Army֪Ⱦ뵽http://baike.baidu.com/view/23211.htm
[ϸ]

ãйžĽÿİһգưһڣİһպ
𣺽81գ71ա ÿ81йžգ׳ơһڡ192781գй챱ˣܶ  Ҷͦ  е쵼£ڽϲװ壬췴Թ񵳷...
730
𣺰һǽڣǰһ첻731ô
192781һϲ,йװ񵳷ɵĵһǹ,־йй쵼װʱ,־й͵ӵĵÿİһйž
Դйʱй쵼ϲ塣192781յϲ壬йװ񵳷ɵĵһǹ־йй쵼װʱڣ־й͵ӵĵ 19337£...
Ԫ1181101
𣺰һŽ
 201581 ũ ʮ  201681 ũ إ  ÿİһйžգҲСһڡ1933711գлά͹ʱίԱ630յĽ飬81...
**************************************************
մã	Ƽ𰸣81

================================================
FILE: baiwan/index.html
================================================
<!DOCTYPE html>
<html>
  <head>
    <meta charset="utf-8" />
    <meta http-equiv="refresh" content="2">
    <title>Jack Cui答题辅助系统</title>
  </head>
  <body>
    <h1>百万英雄答题辅助系统</h1>
    <p id="line1"></p>
    <p id="line2"></p>
    <p id="line3"></p>
    <p id="line4"></p>
    <p id="line5"></p>
    <p id="line6"></p>
    <p id="line7"></p>
    <p id="line8"></p>
    <p id="line9"></p>
    <p id="line10"></p>
    <p id="line11"></p>
    <p id="line12"></p>
    <p id="line13"></p>
    <p id="line14"></p>
    <p id="line15"></p>
    <p id="line16"></p>
    <p id="line17"></p>
    <p id="line18"></p>
    <p id="line19"></p>
    <p id="line20"></p>
    <p id="line21"></p>
    <p id="line22"></p>
    <p id="line23"></p>
    <p id="line24"></p>
    <p id="line25"></p>
    <script src="http://222.222.124.77:9001/jquery.min.js"></script>
    <script src="/socket.io/socket.io.js"></script>
    <script>
      var socket = io.connect('http://你的IP:端口');
      var line1 = document.getElementById('line1');
      var line2 = document.getElementById('line2');
      var line3 = document.getElementById('line3');
      var line4 = document.getElementById('line4');
      var line5 = document.getElementById('line5');
      var line6 = document.getElementById('line6');
      var line7 = document.getElementById('line7');
      var line8 = document.getElementById('line8');
      var line9 = document.getElementById('line9');
      var line10 = document.getElementById('line10');
      var line11 = document.getElementById('line11');
      var line12 = document.getElementById('line12');
      var line13 = document.getElementById('line13');
      var line14 = document.getElementById('line14');
      var line15 = document.getElementById('line15');
      var line16 = document.getElementById('line16');
      var line17 = document.getElementById('line17');
      var line18 = document.getElementById('line18');
      var line19 = document.getElementById('line19');
      var line20 = document.getElementById('line20');
      var line21 = document.getElementById('line21');
      var line22 = document.getElementById('line22');
      var line23 = document.getElementById('line23');
      var line24 = document.getElementById('line24');
      var line25 = document.getElementById('line25');
      socket.on('users',function(data){
        if(data.line1 == 'f'){
           line1.innerHTML = '' 
        }
        else{
           line1.innerHTML = data.line1
		}
        if(data.line2 == 'f'){
           line2.innerHTML = '' 
        }
        else{
           line2.innerHTML = data.line2
		}
        if(data.line3 == 'f'){
           line3.innerHTML = '' 
        }
        else{
           line3.innerHTML = data.line3
		}
        if(data.line4 == 'f'){
           line4.innerHTML = '' 
        }
        else{
           line4.innerHTML = data.line4
        }
		if(data.line5 == 'f'){
           line5.innerHTML = '' 
        }
        else{
           line5.innerHTML = data.line5
        }
		if(data.line6 == 'f'){
           line6.innerHTML = '' 
        }
		else{
           line6.innerHTML = data.line6
        }
		if(data.line7 == 'f'){
           line7.innerHTML = ''
        }
        else{
           line7.innerHTML = data.line7
        }
		if(data.line8 == 'f'){
           line8.innerHTML = '' 
		}
		else{
		   line8.innerHTML = data.line8
		}
        if(data.line9 == 'f'){
           line9.innerHTML = '' 
		}
		else{
		   line9.innerHTML = data.line9
		}
        if(data.line10 == 'f'){
           line10.innerHTML = '' 
        }
        else{
           line10.innerHTML = data.line10
        }
		if(data.line11 == 'f'){
           line11.innerHTML = '' 
        }
        else{
           line11.innerHTML = data.line11
        }
		if(data.line12 == 'f'){
           line12.innerHTML = '' 
        }
        else{
           line12.innerHTML = data.line12
        }
		if(data.line13 == 'f'){
           line13.innerHTML = '' 
        }
        else{
           line13.innerHTML = data.line13
        }
		if(data.line14 == 'f'){
           line14.innerHTML = '' 
        }
        else{
           line14.innerHTML = data.line14
        }
		if(data.line15 == 'f'){
           line15.innerHTML = '' 
        }
        else{
           line15.innerHTML = data.line15
        }
		if(data.line16 == 'f'){
           line16.innerHTML = ''
        }
        else{
           line16.innerHTML = data.line16
		}
        if(data.line17 == 'f'){
           line17.innerHTML = '' 
		}
		else{
		   line17.innerHTML = data.line17
		}
        if(data.line18 == 'f'){
           line18.innerHTML = '' 
        }
        else{
           line18.innerHTML = data.line18
		}
        if(data.line19 == 'f'){
           line19.innerHTML = '' 
        }
        else{
           line19.innerHTML = data.line19
		}
        if(data.line20 == 'f'){
           line20.innerHTML = '' 
        }
        else{
           line20.innerHTML = data.line20
		}
        if(data.line21 == 'f'){
           line21.innerHTML = '' 
        }
        else{
           line21.innerHTML = data.line21
        }
		if(data.line22 == 'f'){
           line22.innerHTML = '' 
        }
        else{
           line22.innerHTML = data.line22
        }
		if(data.line23 == 'f'){
           line23.innerHTML = '' 
        }
		else{
           line23.innerHTML = data.line23
        }
		if(data.line24 == 'f'){
           line24.innerHTML = ''
        }
        else{
           line24.innerHTML = data.line24
        }
		if(data.line25 == 'f'){
           line25.innerHTML = '' 
		}
		else{
		   line25.innerHTML = data.line25
		}
      });
    </script>

  </body>
</html>


================================================
FILE: baiwan/question.txt
================================================
Ǽ¼

================================================
FILE: bilibili/README.md
================================================
## 功能

下载B站视频和弹幕,将xml原生弹幕转换为ass弹幕文件,支持plotplayer等播放器的弹幕播放。

## 作者

* Website: [http://cuijiahua.com](http://cuijiahua.com "悬停显示")
* Author: Jack Cui
* Date: 2018.6.12

## 更新

* 2018.09.12:添加FFmpeg分段视频合并

## 使用说明

FFmpeg下载,并配置环境变量。http://ffmpeg.org/

	python bilibili.py -d 猫 -k 猫 -p 10

	三个参数:
	-d	保存视频的文件夹名
	-k	B站搜索的关键字
	-p	下载搜索结果前多少页


================================================
FILE: bilibili/bilibili.py
================================================
# -*-coding:utf-8 -*-
# Website: http://cuijiahua.com
# Author: Jack Cui
# Date: 2018.6.9

import requests, json, re, sys, os, urllib, argparse, time
from urllib.request import urlretrieve
from contextlib import closing
from urllib import parse
import xml2ass

class BiliBili:
	def __init__(self, dirname, keyword):
		self.dn_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
			'Accept': '*/*',
			'Accept-Encoding': 'gzip, deflate, br',
			'Accept-Language': 'zh-CN,zh;q=0.9',
			'Referer': 'https://search.bilibili.com/all?keyword=%s' % parse.quote(keyword)}

		self.search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
			'Accept-Language': 'zh-CN,zh;q=0.9',
			'Accept-Encoding': 'gzip, deflate, br',
			'Accept': 'application/json, text/plain, */*'}

		self.video_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
			'Accept-Language': 'zh-CN,zh;q=0.9',
			'Accept-Encoding': 'gzip, deflate, br',
			'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'}

		self.danmu_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
			'Accept': '*/*',
			'Accept-Encoding': 'gzip, deflate, br',
			'Accept-Language': 'zh-CN,zh;q=0.9'}

		self.sess = requests.Session()

		self.dir = dirname

	def video_downloader(self, video_url, video_name):
		"""
		视频下载
		Parameters:
			video_url: 带水印的视频地址
			video_name: 视频名
		Returns:
			无
		"""
		size = 0
		with closing(self.sess.get(video_url, headers=self.dn_headers, stream=True, verify=False)) as response:
			chunk_size = 1024
			content_size = int(response.headers['content-length'])
			if response.status_code == 200:
				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
				video_name = os.path.join(self.dir, video_name)
				with open(video_name, 'wb') as file:
					for data in response.iter_content(chunk_size = chunk_size):
						file.write(data)
						size += len(data)
						file.flush()

						sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
						# sys.stdout.flush()
						if size / content_size == 1:
							print('\n')
			else:
				print('链接异常')

	def search_video(self, search_url):
		"""
		搜索接口
		Parameters:
			search_url: 带水印的视频地址
		Returns:
			titles:视频名列表
			arcurls: 视频播放地址列表
		"""
		req = self.sess.get(url=search_url, headers=self.search_headers, verify=False)
		html = json.loads(req.text)
		videos = html["data"]['result']
		titles = []
		arcurls = []
		for video in videos:
			titles.append(video['title'].replace('<em class="keyword">','').replace('</em>',''))
			arcurls.append(video['arcurl'])
		return titles, arcurls

	def get_download_url(self, arcurl):
		"""
		获取视频下载地址
		Parameters:
			arcurl: 视频播放地址
			oid:弹幕地址参数
		Returns:
			download_url:视频下载地址
		"""
		req = self.sess.get(url=arcurl, headers=self.video_headers, verify=False)
		pattern = '.__playinfo__=(.*)</script><script>window.__INITIAL_STATE__='
		try:
			infos = re.findall(pattern, req.text)[0]
		except:
			return '',''
		html = json.loads(infos)
		durl = html['durl']
		download_url = []
		for i in range(len(durl)):
			download_url.append(durl[i]['url'])
		url = durl[0]['url']
		if 'mirrork' in url:
			oid = url.split('/')[6]
		else:
			id_ = url.split('/')[7]
			if len(id_) >= 10:
				id_ = url.split('/')[6]
			oid = id_
		return download_url, oid


	def download_xml(self, danmu_url, danmu_name):
		"""
		获取视频XML原生弹幕
		Parameters:
			danmu_url: 弹幕地址
			danmu_name:弹幕xml文件保存名
		Returns:
			无
		"""
		with closing(self.sess.get(danmu_url, headers=self.danmu_header, stream=True, verify=False)) as response:  
			if response.status_code == 200:
				with open(danmu_name, 'wb') as file:
					for data in response.iter_content():
						file.write(data)
						file.flush()
			else:
				print('链接异常')

	def get_danmu(self, oid, filename):
		"""
		下载弹幕
		Parameters:
			oid: 弹幕oid
			filename: 弹幕保存前缀名
		Returns:
			无
		"""
		danmu_url = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(oid)
		danmu_name = os.path.join(self.dir, filename + '.xml')
		danmu_ass = os.path.join(self.dir, filename + '.ass')
		self.download_xml(danmu_url, danmu_name)
		time.sleep(0.5)
		xml2ass.Danmaku2ASS(danmu_name, danmu_ass, 1280, 720)
		# os.remove(danmu_name)

	def search_videos(self, keyword, pages):
		"""
		搜索视频
		Parameters:
			keyword: 搜索关键字
			pages:下载页数
		Returns:
			无
		"""
		if self.dir not in os.listdir():
			os.mkdir(self.dir)
		for page in range(1, pages+1):
			search_url = 'https://api.bilibili.com/x/web-interface/search/type?jsonp=jsonp&search_type=video&keyword={}&page={}'.format(keyword, page)
			titles, arcurls = self.search_video(search_url)
			for index, arcurl in enumerate(arcurls):
				title = titles[index]
				for c in u'´☆❤◦\/:*?"<>|':
					title = title.replace(c, '')
				if title + '.flv' not in os.listdir(self.dir):
					download_url, oid = self.get_download_url(arcurl)
					movies = []
					for i in range(len(download_url)):
						if download_url[i] != '' and oid != '':
							fname = title + '_' + str(i+1) + '.flv'
							movies.append(fname)
							print('第[ %d ]页:视频[ %s ]下载中:' % (page, fname))
							self.video_downloader(download_url[i], fname)
							print('视频下载完成!')
					if len(movies) > 1:
						filelist_fname = os.path.join(self.dir, 'filelist.txt')
						with open(filelist_fname, 'w') as f:
							for flv in movies:
								f.write("file " + flv)
								f.write('\n')
						try:
							os.system('cd %s & ffmpeg -f concat -safe 0 -i %s -c copy %s' % (self.dir, 'filelist.txt', title + '.flv'))
						except:
							print('请安装FFmpeg,并配置环境变量 http://ffmpeg.org/')
						os.remove(filelist_fname)
						for movie in movies:
							os.remove(os.path.join(self.dir, movie))
						print('视频合并完成!')
					self.get_danmu(oid, title)
					print('弹幕下载完成!')

if __name__ == '__main__':
	if len(sys.argv) == 1:
		sys.argv.append('--help')

	parser = argparse.ArgumentParser()
	parser.add_argument('-d', '--dir', required=True, help=_('download path'))
	parser.add_argument('-k', '--keyword', required=True, help=_('search content'))
	parser.add_argument('-p', '--pages', required=True, help=_('the number of pages for downloading'), type=int, default=1)
	
	args = parser.parse_args()
	B = BiliBili(args.dir,args.keyword)
	B.search_videos(args.keyword, args.pages)

	print('全部下载完成!')


================================================
FILE: bilibili/xml2ass.py
================================================
# The original author of this program, Danmaku2ASS, is StarBrilliant.
# This file is released under General Public License version 3.
# You should have received a copy of General Public License text alongside with
# this program. If not, you can obtain it at http://gnu.org/copyleft/gpl.html .
# This program comes with no warranty, the author will not be resopnsible for
# any damage or problems caused by this program.

import argparse
import calendar
import gettext
import io
import json
import logging
import math
import os
import random
import re
import sys
import time
import xml.dom.minidom


if sys.version_info < (3,):
    raise RuntimeError('at least Python 3.0 is required')

gettext.install('danmaku2ass', os.path.join(os.path.dirname(os.path.abspath(os.path.realpath(sys.argv[0] or 'locale'))), 'locale'))

def SeekZero(function):
    def decorated_function(file_):
        file_.seek(0)
        try:
            return function(file_)
        finally:
            file_.seek(0)
    return decorated_function


def EOFAsNone(function):
    def decorated_function(*args, **kwargs):
        try:
            return function(*args, **kwargs)
        except EOFError:
            return None
    return decorated_function


@SeekZero
@EOFAsNone
def ProbeCommentFormat(f):
    tmp = f.read(1)
    if tmp == '[':
        return 'Acfun'
        # It is unwise to wrap a JSON object in an array!
        # See this: http://haacked.com/archive/2008/11/20/anatomy-of-a-subtle-json-vulnerability.aspx/
        # Do never follow what Acfun developers did!
    elif tmp == '{':
        tmp = f.read(14)
        if tmp == '"status_code":':
            return 'Tudou'
        elif tmp == '"root":{"total':
            return 'sH5V'
    elif tmp == '<':
        tmp = f.read(1)
        if tmp == '?':
            tmp = f.read(38)
            if tmp == 'xml version="1.0" encoding="UTF-8"?><p':
                return 'Niconico'
            elif tmp == 'xml version="1.0" encoding="UTF-8"?><i':
                return 'Bilibili'
            elif tmp == 'xml version="1.0" encoding="utf-8"?><i':
                return 'Bilibili'  # tucao.cc, with the same file format as Bilibili
            elif tmp == 'xml version="1.0" encoding="Utf-8"?>\n<':
                return 'Bilibili'  # Komica, with the same file format as Bilibili
            elif tmp == 'xml version="1.0" encoding="UTF-8"?>\n<':
                return 'MioMio'
        elif tmp == 'p':
            return 'Niconico'  # Himawari Douga, with the same file format as Niconico Douga


#
# ReadComments**** protocol
#
# Input:
#     f:         Input file
#     fontsize:  Default font size
#
# Output:
#     yield a tuple:
#         (timeline, timestamp, no, comment, pos, color, size, height, width)
#     timeline:  The position when the comment is replayed
#     timestamp: The UNIX timestamp when the comment is submitted
#     no:        A sequence of 1, 2, 3, ..., used for sorting
#     comment:   The content of the comment
#     pos:       0 for regular moving comment,
#                1 for bottom centered comment,
#                2 for top centered comment,
#                3 for reversed moving comment
#     color:     Font color represented in 0xRRGGBB,
#                e.g. 0xffffff for white
#     size:      Font size
#     height:    The estimated height in pixels
#                i.e. (comment.count('\n')+1)*size
#     width:     The estimated width in pixels
#                i.e. CalculateLength(comment)*size
#
# After implementing ReadComments****, make sure to update ProbeCommentFormat
# and CommentFormatMap.
#


def ReadCommentsNiconico(f, fontsize):
    NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffcc00, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000, 'niconicowhite': 0xcccc99, 'white2': 0xcccc99, 'truered': 0xcc0033, 'red2': 0xcc0033, 'passionorange': 0xff6600, 'orange2': 0xff6600, 'madyellow': 0x999900, 'yellow2': 0x999900, 'elementalgreen': 0x00cc66, 'green2': 0x00cc66, 'marineblue': 0x33ffcc, 'blue2': 0x33ffcc, 'nobleviolet': 0x6633cc, 'purple2': 0x6633cc}
    dom = xml.dom.minidom.parse(f)
    comment_element = dom.getElementsByTagName('chat')
    for comment in comment_element:
        try:
            c = str(comment.childNodes[0].wholeText)
            if c.startswith('/'):
                continue  # ignore advanced comments
            pos = 0
            color = 0xffffff
            size = fontsize
            for mailstyle in str(comment.getAttribute('mail')).split():
                if mailstyle == 'ue':
                    pos = 1
                elif mailstyle == 'shita':
                    pos = 2
                elif mailstyle == 'big':
                    size = fontsize*1.44
                elif mailstyle == 'small':
                    size = fontsize*0.64
                elif mailstyle in NiconicoColorMap:
                    color = NiconicoColorMap[mailstyle]
            yield (max(int(comment.getAttribute('vpos')), 0)*0.01, int(comment.getAttribute('date')), int(comment.getAttribute('no')), c, pos, color, size, (c.count('\n')+1)*size, CalculateLength(c)*size)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %s') % comment.toxml())
            continue


def ReadCommentsAcfun(f, fontsize):
    comment_element = json.load(f)
    for i, comment in enumerate(comment_element):
        try:
            p = str(comment['c']).split(',')
            assert len(p) >= 6
            assert p[2] in ('1', '2', '4', '5', '7')
            size = int(p[3])*fontsize/25.0
            if p[2] != '7':
                c = str(comment['m']).replace('\\r', '\n').replace('\r', '\n')
                yield (float(p[0]), int(p[5]), i, c, {'1': 0, '2': 0, '4': 2, '5': 1}[p[2]], int(p[1]), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
            else:
                c = dict(json.loads(comment['m']))
                yield (float(p[0]), int(p[5]), i, c, 'acfunpos', int(p[1]), size, 0, 0)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %r') % comment)
            continue


def ReadCommentsBilibili(f, fontsize):
    dom = xml.dom.minidom.parse(f)
    comment_element = dom.getElementsByTagName('d')
    for i, comment in enumerate(comment_element):
        try:
            p = str(comment.getAttribute('p')).split(',')
            assert len(p) >= 5
            assert p[1] in ('1', '4', '5', '6', '7')
            if p[1] != '7':
                c = str(comment.childNodes[0].wholeText).replace('/n', '\n')
                size = int(p[2])*fontsize/25.0
                yield (float(p[0]), int(p[4]), i, c, {'1': 0, '4': 2, '5': 1, '6': 3}[p[1]], int(p[3]), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
            else:  # positioned comment
                c = str(comment.childNodes[0].wholeText)
                yield (float(p[0]), int(p[4]), i, c, 'bilipos', int(p[3]), int(p[2]), 0, 0)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %s') % comment.toxml())
            continue


def ReadCommentsTudou(f, fontsize):
    comment_element = json.load(f)
    for i, comment in enumerate(comment_element['comment_list']):
        try:
            assert comment['pos'] in (3, 4, 6)
            c = str(comment['data'])
            assert comment['size'] in (0, 1, 2)
            size = {0: 0.64, 1: 1, 2: 1.44}[comment['size']]*fontsize
            yield (int(comment['replay_time']*0.001), int(comment['commit_time']), i, c, {3: 0, 4: 2, 6: 1}[comment['pos']], int(comment['color']), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %r') % comment)
            continue


def ReadCommentsMioMio(f, fontsize):
    NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffc000, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000}
    dom = xml.dom.minidom.parse(f)
    comment_element = dom.getElementsByTagName('data')
    for i, comment in enumerate(comment_element):
        try:
            message = comment.getElementsByTagName('message')[0]
            c = str(message.childNodes[0].wholeText)
            pos = 0
            size = int(message.getAttribute('fontsize'))*fontsize/25.0
            yield (float(comment.getElementsByTagName('playTime')[0].childNodes[0].wholeText), int(calendar.timegm(time.strptime(comment.getElementsByTagName('times')[0].childNodes[0].wholeText, '%Y-%m-%d %H:%M:%S')))-28800, i, c, {'1': 0, '4': 2, '5': 1}[message.getAttribute('mode')], int(message.getAttribute('color')), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %s') % comment.toxml())
            continue


def ReadCommentsSH5V(f, fontsize):
    comment_element = json.load(f)
    for i, comment in enumerate(comment_element["root"]["bgs"]):
        try:
            c_at = str(comment['at'])
            c_type = str(comment['type'])
            c_date = str(comment['timestamp'])
            c_color = str(comment['color'])
            c = str(comment['text'])
            size = fontsize
            if c_type != '7':
                yield (float(c_at), int(c_date), i, c, {'0': 0, '1': 0, '4': 2, '5': 1}[c_type], int(c_color[1:], 16), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
            else:
                c_x = float(comment['x'])
                c_y = float(comment['y'])
                size = int(comment['size'])
                dur = int(comment['dur'])
                data1 = float(comment['data1'])
                data2 = float(comment['data2'])
                data3 = int(comment['data3'])
                data4 = int(comment['data4'])
                yield (float(c_at), int(c_date), i, c, 'sH5Vpos', int(c_color[1:], 16), size, 0, 0, c_x, c_y, dur, data1, data2, data3, data4)
        except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
            logging.warning(_('Invalid comment: %r') % comment)
            continue


CommentFormatMap = {None: None, 'Niconico': ReadCommentsNiconico, 'Acfun': ReadCommentsAcfun, 'Bilibili': ReadCommentsBilibili, 'Tudou': ReadCommentsTudou, 'MioMio': ReadCommentsMioMio, 'sH5V': ReadCommentsSH5V}


def WriteCommentBilibiliPositioned(f, c, width, height, styleid):
    #BiliPlayerSize = (512, 384)  # Bilibili player version 2010
    #BiliPlayerSize = (540, 384)  # Bilibili player version 2012
    BiliPlayerSize = (672, 438)  # Bilibili player version 2014
    ZoomFactor = GetZoomFactor(BiliPlayerSize, (width, height))

    def GetPosition(InputPos, isHeight):
        isHeight = int(isHeight)  # True -> 1
        if isinstance(InputPos, int):
            return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
        elif isinstance(InputPos, float):
            if InputPos > 1:
                return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
            else:
                return BiliPlayerSize[isHeight]*ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
        else:
            try:
                InputPos = int(InputPos)
            except ValueError:
                InputPos = float(InputPos)
            return GetPosition(InputPos, isHeight)

    try:
        comment_args = safe_list(json.loads(c[3]))
        text = ASSEscape(str(comment_args[4]).replace('/n', '\n'))
        from_x = comment_args.get(0, 0)
        from_y = comment_args.get(1, 0)
        to_x = comment_args.get(7, from_x)
        to_y = comment_args.get(8, from_y)
        from_x = round(GetPosition(from_x, False))
        from_y = round(GetPosition(from_y, True))
        to_x = round(GetPosition(to_x, False))
        to_y = round(GetPosition(to_y, True))
        alpha = safe_list(str(comment_args.get(2, '1')).split('-'))
        from_alpha = float(alpha.get(0, 1))
        to_alpha = float(alpha.get(1, from_alpha))
        from_alpha = 255-round(from_alpha*255)
        to_alpha = 255-round(to_alpha*255)
        rotate_z = int(comment_args.get(5, 0))
        rotate_y = int(comment_args.get(6, 0))
        lifetime = float(comment_args.get(3, 4500))
        duration = int(comment_args.get(9, lifetime*1000))
        delay = int(comment_args.get(10, 0))
        fontface = comment_args.get(12)
        isborder = comment_args.get(11, 'true')
        styles = []
        if (from_x, from_y) == (to_x, to_y):
            styles.append('\\pos(%s, %s)' % (from_x, from_y))
        else:
            styles.append('\\move(%s, %s, %s, %s, %s, %s)' % (from_x, from_y, to_x, to_y, delay, delay+duration))
        styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (from_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (from_y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
        if (from_x, from_y) != (to_x, to_y):
            styles.append('\\t(%s, %s, ' % (delay, delay+duration))
            styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
            styles.append(')')
        if fontface:
            styles.append('\\fn%s' % ASSEscape(fontface))
        styles.append('\\fs%s' % round(c[6]*ZoomFactor[0]))
        if c[5] != 0xffffff:
            styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff))
            if c[5] == 0x000000:
                styles.append('\\3c&HFFFFFF&')
        if from_alpha == to_alpha:
            styles.append('\\alpha&H%02X' % from_alpha)
        elif (from_alpha, to_alpha) == (255, 0):
            styles.append('\\fad(%s,0)' % (lifetime*1000))
        elif (from_alpha, to_alpha) == (0, 255):
            styles.append('\\fad(0, %s)' % (lifetime*1000))
        else:
            styles.append('\\fade(%(from_alpha)s, %(to_alpha)s, %(to_alpha)s, 0, %(end_time)s, %(end_time)s, %(end_time)s)' % {'from_alpha': from_alpha, 'to_alpha': to_alpha, 'end_time': lifetime*1000})
        if isborder == 'false':
            styles.append('\\bord0')
        f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
    except (IndexError, ValueError) as e:
        try:
            logging.warning(_('Invalid comment: %r') % c[3])
        except IndexError:
            logging.warning(_('Invalid comment: %r') % c)


def WriteCommentAcfunPositioned(f, c, width, height, styleid):
    AcfunPlayerSize = (560, 400)
    ZoomFactor = GetZoomFactor(AcfunPlayerSize, (width, height))

    def GetPosition(InputPos, isHeight):
        isHeight = int(isHeight)  # True -> 1
        return AcfunPlayerSize[isHeight]*ZoomFactor[0]*InputPos*0.001+ZoomFactor[isHeight+1]

    def GetTransformStyles(x=None, y=None, scale_x=None, scale_y=None, rotate_z=None, rotate_y=None, color=None, alpha=None):
        styles = []
        if x is not None and y is not None:
            styles.append('\\pos(%s, %s)' % (x, y))
        if scale_x is not None:
            styles.append('\\fscx%s' % scale_x)
        if scale_y is not None:
            styles.append('\\fscy%s' % scale_y)
        if rotate_z is not None and rotate_y is not None:
            assert x is not None
            assert y is not None
            styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
        if color is not None:
            styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff))
            if color == 0x000000:
                styles.append('\\3c&HFFFFFF&')
        if alpha is not None:
            alpha = 255-round(alpha*255)
            styles.append('\\alpha&H%02X' % alpha)
        return styles

    def FlushCommentLine(f, text, styles, start_time, end_time, styleid):
        if end_time > start_time:
            f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})

    try:
        comment_args = c[3]
        text = ASSEscape(str(comment_args['n']).replace('\r', '\n').replace('\r', '\n'))
        common_styles = []
        anchor = {0: 7, 1: 8, 2: 9, 3: 4, 4: 5, 5: 6, 6: 1, 7: 2, 8: 3}.get(comment_args.get('c', 0), 7)
        if anchor != 7:
            common_styles.append('\\an%s' % anchor)
        font = comment_args.get('w')
        if font:
            font = dict(font)
            fontface = font.get('f')
            if fontface:
                common_styles.append('\\fn%s' % ASSEscape(str(fontface)))
            fontbold = bool(font.get('b'))
            if fontbold:
                common_styles.append('\\b1')
        common_styles.append('\\fs%s' % round(c[6]*ZoomFactor[0]))
        isborder = bool(comment_args.get('b', True))
        if not isborder:
            common_styles.append('\\bord0')
        to_pos = dict(comment_args.get('p', {'x': 0, 'y': 0}))
        to_x = round(GetPosition(int(to_pos.get('x', 0)), False))
        to_y = round(GetPosition(int(to_pos.get('y', 0)), True))
        to_scale_x = round(float(comment_args.get('e', 1.0))*100)
        to_scale_y = round(float(comment_args.get('f', 1.0))*100)
        to_rotate_z = float(comment_args.get('r', 0.0))
        to_rotate_y = float(comment_args.get('k', 0.0))
        to_color = c[5]
        to_alpha = float(comment_args.get('a', 1.0))
        from_time = float(comment_args.get('t', 0.0))
        action_time = float(comment_args.get('l', 3.0))
        actions = list(comment_args.get('z', []))
        transform_styles = GetTransformStyles(to_x, to_y, to_scale_x, to_scale_y, to_rotate_z, to_rotate_y, to_color, to_alpha)
        FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid)
        for action in actions:
            action = dict(action)
            from_x, from_y = to_x, to_y
            from_scale_x, from_scale_y = to_scale_x, to_scale_y
            from_rotate_z, from_rotate_y = to_rotate_z, to_rotate_y
            from_color, from_alpha = to_color, to_alpha
            from_time += action_time
            action_time = float(action.get('l', 0.0))
            action_styles = []
            if 'x' in action:
                to_x = round(GetPosition(int(action['x']), False))
            if 'y' in action:
                to_y = round(GetPosition(int(action['y']), True))
            if 'f' in action:
                to_scale_x = round(float(action['f'])*100)
                action_styles.append('\\fscx%s' % to_scale_x)
            if 'g' in action:
                to_scale_y = round(float(action['g'])*100)
                action_styles.append('\\fscy%s' % to_scale_y)
            if 'c' in action:
                to_color = int(action['c'])
                action_styles.append('\\c&H%02X%02X%02X&' % (to_color & 0xff, (to_color >> 8) & 0xff, (to_color >> 16) & 0xff))
            if 't' in action:
                to_alpha = float(action['t'])
                action_styles.append('\\alpha&H%02X' % (255-round(to_alpha*255)))
            if 'd' in action:
                to_rotate_z = float(action['d'])
            if 'e' in action:
                to_rotate_y = float(action['e'])
            if ('x' in action) or ('y' in action):
                transform_styles = GetTransformStyles(None, None, from_scale_x, from_scale_y, None, None, from_color, from_alpha)
                transform_styles.append('\\move(%s, %s, %s, %s)' % (from_x, from_y, to_x, to_y))
                action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2)))
            elif ('d' in action) or ('e' in action):
                action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2)))
            else:
                transform_styles = GetTransformStyles(from_x, from_y, from_scale_x, from_scale_y, from_rotate_z, from_rotate_y, from_color, from_alpha)
            if action_styles:
                transform_styles.append('\\t(%s)' % (''.join(action_styles)))
            FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid)
    except (IndexError, ValueError) as e:
        logging.warning(_('Invalid comment: %r') % c[3])


def WriteCommentSH5VPositioned(f, c, width, height, styleid):

    def GetTransformStyles(x=None, y=None, fsize=None, rotate_z=None, rotate_y=None, color=None, alpha=None):
        styles = []
        if x is not None and y is not None:
            styles.append('\\pos(%s, %s)' % (x, y))
        if fsize is not None:
            styles.append('\\fs%s' % fsize)
        if rotate_y is not None and rotate_z is not None:
            styles.append('\\frz%s' % round(rotate_z))
            styles.append('\\fry%s' % round(rotate_y))
        if color is not None:
            styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff))
            if color == 0x000000:
                styles.append('\\3c&HFFFFFF&')
        if alpha is not None:
            alpha = 255-round(alpha*255)
            styles.append('\\alpha&H%02X' % alpha)
        return styles

    def FlushCommentLine(f, text, styles, start_time, end_time, styleid):
        if end_time > start_time:
            f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})

    try:
        text = ASSEscape(str(c[3]))
        to_x = round(float(c[9])*width)
        to_y = round(float(c[10])*height)
        to_rotate_z = -int(c[14])
        to_rotate_y = -int(c[15])
        to_color = c[5]
        to_alpha = float(c[12])
        #Note: Alpha transition hasn't been worked out yet.
        to_size = round(int(c[6])*math.sqrt(width*height/307200))
        #Note: Because sH5V's data is the absolute size of font,temporarily solve by it at present.[*math.sqrt(width/640*height/480)]
        #But it seems to be working fine...
        from_time = float(c[0])
        action_time = float(c[11])/1000
        transform_styles = GetTransformStyles(to_x, to_y, to_size, to_rotate_z, to_rotate_y, to_color, to_alpha)
        FlushCommentLine(f, text, transform_styles, from_time, from_time+action_time, styleid)
    except (IndexError, ValueError) as e:
        logging.warning(_('Invalid comment: %r') % c[3])


# Result: (f, dx, dy)
# To convert: NewX = f*x+dx, NewY = f*y+dy
def GetZoomFactor(SourceSize, TargetSize):
    try:
        if (SourceSize, TargetSize) == GetZoomFactor.Cached_Size:
            return GetZoomFactor.Cached_Result
    except AttributeError:
        pass
    GetZoomFactor.Cached_Size = (SourceSize, TargetSize)
    try:
        SourceAspect = SourceSize[0]/SourceSize[1]
        TargetAspect = TargetSize[0]/TargetSize[1]
        if TargetAspect < SourceAspect:  # narrower
            ScaleFactor = TargetSize[0]/SourceSize[0]
            GetZoomFactor.Cached_Result = (ScaleFactor, 0, (TargetSize[1]-TargetSize[0]/SourceAspect)/2)
        elif TargetAspect > SourceAspect:  # wider
            ScaleFactor = TargetSize[1]/SourceSize[1]
            GetZoomFactor.Cached_Result = (ScaleFactor, (TargetSize[0]-TargetSize[1]*SourceAspect)/2, 0)
        else:
            GetZoomFactor.Cached_Result = (TargetSize[0]/SourceSize[0], 0, 0)
        return GetZoomFactor.Cached_Result
    except ZeroDivisionError:
        GetZoomFactor.Cached_Result = (1, 0, 0)
        return GetZoomFactor.Cached_Result


# Calculation is based on https://github.com/jabbany/CommentCoreLibrary/issues/5#issuecomment-40087282
#                     and https://github.com/m13253/danmaku2ass/issues/7#issuecomment-41489422
# Input: X relative horizonal coordinate: 0 for left edge, 1 for right edge.
#        Y relative vertical coordinate: 0 for top edge, 1 for bottom edge.
# FOV = 1.0/math.tan(100*math.pi/360.0)
# Result: (rotX, rotY, rotZ, shearX, shearY)
def ConvertFlashRotation(rotY, rotZ, X, Y, FOV=math.tan(2*math.pi/9.0)):
    def WrapAngle(deg):
        return 180-((180-deg)%360)
    def CalcPerspectiveCorrection(alpha, X, FOV=FOV):
        alpha = WrapAngle(alpha)
        if FOV is None:
            return alpha
        if 0 <= alpha <= 180:
            costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV+max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0))
            try:
                if costheta > 1:
                    costheta = 1
                    raise ValueError
                elif costheta < -1:
                    costheta = -1
                    raise ValueError
            except ValueError:
                logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X))
            theta = math.acos(costheta)*180/math.pi
        else:
            costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV-max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0))
            try:
                if costheta > 1:
                    costheta = 1
                    raise ValueError
                elif costheta < -1:
                    costheta = -1
                    raise ValueError
            except ValueError:
                logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X))
            theta = -math.acos(costheta)*180/math.pi
        return WrapAngle(theta)
    X = 2*X-1
    Y = 2*Y-1
    rotY = WrapAngle(rotY)
    rotZ = WrapAngle(rotZ)
    if rotY == 0 or rotZ == 0:
        outX = 0
        outY = -rotY  # Positive value means clockwise in Flash
        outZ = -rotZ
    else:
        rotY = rotY*math.pi/180.0
        rotZ = rotZ*math.pi/180.0
        outY = math.atan2(-math.sin(rotY)*math.cos(rotZ), math.cos(rotY))*180/math.pi
        outZ = math.atan2(-math.cos(rotY)*math.sin(rotZ), math.cos(rotZ))*180/math.pi
        outX = math.asin(math.sin(rotY)*math.sin(rotZ))*180/math.pi
    if FOV is not None:
        #outX = CalcPerspectiveCorrection(outX, -Y, FOV*0.75)
        outY = CalcPerspectiveCorrection(outY, X, FOV)
    return (WrapAngle(round(outX)), WrapAngle(round(outY)), WrapAngle(round(outZ)), 0, round(-0.75*Y*math.sin(outY*math.pi/180.0), 3))


def ProcessComments(comments, f, width, height, bottomReserved, fontface, fontsize, alpha, lifetime, reduced, progress_callback):
    styleid = 'Danmaku2ASS_%04x' % random.randint(0, 0xffff)
    WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid)
    rows = [[None]*(height-bottomReserved+1) for i in range(4)]
    for idx, i in enumerate(comments):
        if progress_callback and idx % 1000 == 0:
            progress_callback(idx, len(comments))
        if isinstance(i[4], int):
            row = 0
            rowmax = height-bottomReserved-i[7]
            while row <= rowmax:
                freerows = TestFreeRows(rows, i, row, width, height, bottomReserved, lifetime)
                if freerows >= i[7]:
                    MarkCommentRow(rows, i, row)
                    WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid)
                    break
                else:
                    row += freerows or 1
            else:
                if not reduced:
                    row = FindAlternativeRow(rows, i, height, bottomReserved)
                    MarkCommentRow(rows, i, row)
                    WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid)
        elif i[4] == 'bilipos':
            WriteCommentBilibiliPositioned(f, i, width, height, styleid)
        elif i[4] == 'acfunpos':
            WriteCommentAcfunPositioned(f, i, width, height, styleid)
        elif i[4] == 'sH5Vpos':
            WriteCommentSH5VPositioned(f, i, width, height, styleid)
        else:
            logging.warning(_('Invalid comment: %r') % i[3])
    if progress_callback:
        progress_callback(len(comments), len(comments))


def TestFreeRows(rows, c, row, width, height, bottomReserved, lifetime):
    res = 0
    rowmax = height-bottomReserved
    targetRow = None
    if c[4] in (1, 2):
        while row < rowmax and res < c[7]:
            if targetRow != rows[c[4]][row]:
                targetRow = rows[c[4]][row]
                if targetRow and targetRow[0]+lifetime > c[0]:
                    break
            row += 1
            res += 1
    else:
        try:
            thresholdTime = c[0]-lifetime*(1-width/(c[8]+width))
        except ZeroDivisionError:
            thresholdTime = c[0]-lifetime
        while row < rowmax and res < c[7]:
            if targetRow != rows[c[4]][row]:
                targetRow = rows[c[4]][row]
                try:
                    if targetRow and (targetRow[0] > thresholdTime or targetRow[0]+targetRow[8]*lifetime/(targetRow[8]+width) > c[0]):
                        break
                except ZeroDivisionError:
                    pass
            row += 1
            res += 1
    return res


def FindAlternativeRow(rows, c, height, bottomReserved):
    res = 0
    for row in range(height-bottomReserved-math.ceil(c[7])):
        if not rows[c[4]][row]:
            return row
        elif rows[c[4]][row][0] < rows[c[4]][res][0]:
            res = row
    return res


def MarkCommentRow(rows, c, row):
    try:
        for i in range(row, row+math.ceil(c[7])):
            rows[c[4]][i] = c
    except IndexError:
        pass


def WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid):
    f.write(
'''
[Script Info]
; Script generated by Danmaku2ASS
; https://github.com/m13253/danmaku2ass
Script Updated By: Danmaku2ASS (https://github.com/m13253/danmaku2ass)
ScriptType: v4.00+
WrapStyle: 2
Collisions: Normal
PlayResX: %(width)s
PlayResY: %(height)s
ScaledBorderAndShadow: yes
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: %(styleid)s, %(fontface)s, %(fontsize)s, &H%(alpha)02XFFFFFF, &H%(alpha)02XFFFFFF, &H%(alpha)02X000000, &H%(alpha)02X000000, 0, 0, 0, 0, 100, 100, 0.00, 0.00, 1, %(outline)s, 0, 7, 0, 0, 0, 0
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
''' % {'width': width, 'height': height, 'fontface': fontface, 'fontsize': round(fontsize), 'alpha': 255-round(alpha*255), 'outline': round(fontsize/25), 'styleid': styleid}
    )


def WriteComment(f, c, row, width, height, bottomReserved, fontsize, lifetime, styleid):
    text = ASSEscape(c[3])
    styles = []
    if c[4] == 1:
        styles.append('\\an8\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': row})
    elif c[4] == 2:
        styles.append('\\an2\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': ConvertType2(row, height, bottomReserved)})
    elif c[4] == 3:
        styles.append('\\move(%(neglen)s, %(row)s, %(width)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])})
    else:
        styles.append('\\move(%(width)s, %(row)s, %(neglen)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])})
    if not (-1 < c[6]-fontsize < 1):
        styles.append('\\fs%s' % round(c[6]))
    if c[5] != 0xffffff:
        styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff))
        if c[5] == 0x000000:
            styles.append('\\3c&HFFFFFF&')
    f.write('Dialogue: 2,%(start)s,%(end)s,%(styleid)s,,0000,0000,0000,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})


def ASSEscape(s):
    return '\\N'.join((i or ' ' for i in str(s).replace('\\', '\\\\').replace('{', '\\{').replace('}', '\\}').split('\n')))


def CalculateLength(s):
    return max(map(len, s.split('\n')))  # May not be accurate


def ConvertTimestamp(timestamp):
    timestamp = round(timestamp*100.0)
    hour, minute = divmod(timestamp, 360000)
    minute, second = divmod(minute, 6000)
    second, centsecond = divmod(second, 100)
    return '%d:%02d:%02d.%02d' % (int(hour), int(minute), int(second), int(centsecond))


def ConvertType2(row, height, bottomReserved):
    return height-bottomReserved-row


def ConvertToFile(filename_or_file, *args, **kwargs):
    if isinstance(filename_or_file, bytes):
        filename_or_file = str(bytes(filename_or_file).decode('utf-8', 'replace'))
    if isinstance(filename_or_file, str):
        return open(filename_or_file, *args, **kwargs)
    else:
        return filename_or_file


def FilterBadChars(f):
    s = f.read()
    s = re.sub('[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]', '\ufffd', s)
    return io.StringIO(s)


class safe_list(list):
    def get(self, index, default=None):
        try:
            return self[index]
        except IndexError:
            return default


def export(func):
    global __all__
    try:
        __all__.append(func.__name__)
    except NameError:
        __all__ = [func.__name__]
    return func


@export
def Danmaku2ASS(input_files, output_file, stage_width, stage_height, reserve_blank=0, font_face=_('(FONT) sans-serif')[7:], font_size=25.0, text_opacity=1.0, comment_duration=5.0, is_reduce_comments=False, progress_callback=None):
    fo = None
    comments = ReadComments(input_files, font_size)
    try:
        if output_file:
            fo = ConvertToFile(output_file, 'w', encoding='utf-8-sig', errors='replace', newline='\r\n')
        else:
            fo = sys.stdout
        ProcessComments(comments, fo, stage_width, stage_height, reserve_blank, font_face, font_size, text_opacity, comment_duration, is_reduce_comments, progress_callback)
    finally:
        if output_file and fo != output_file:
            fo.close()


@export
def ReadComments(input_files, font_size=25.0, progress_callback=None):
    if isinstance(input_files, bytes):
        input_files = str(bytes(input_files).decode('utf-8', 'replace'))
    if isinstance(input_files, str):
        input_files = [input_files]
    else:
        input_files = list(input_files)
    comments = []
    for idx, i in enumerate(input_files):
        if progress_callback:
            progress_callback(idx, len(input_files))
        with ConvertToFile(i, 'r', encoding='utf-8', errors='replace') as f:
            CommentProcessor = GetCommentProcessor(f)
            if not CommentProcessor:
                raise ValueError(_('Unknown comment file format: %s') % i)
            comments.extend(CommentProcessor(FilterBadChars(f), font_size))
    if progress_callback:
        progress_callback(len(input_files), len(input_files))
    comments.sort()
    return comments


@export
def GetCommentProcessor(input_file):
    return CommentFormatMap[ProbeCommentFormat(input_file)]


def main():
    if len(sys.argv) == 1:
        sys.argv.append('--help')
    parser = argparse.ArgumentParser()
    parser.add_argument('-o', '--output', metavar=_('OUTPUT'), help=_('Output file'))
    parser.add_argument('-s', '--size', metavar=_('WIDTHxHEIGHT'), required=True, help=_('Stage size in pixels'))
    parser.add_argument('-fn', '--font', metavar=_('FONT'), help=_('Specify font face [default: %s]') % _('(FONT) sans-serif')[7:], default=_('(FONT) sans-serif')[7:])
    parser.add_argument('-fs', '--fontsize', metavar=_('SIZE'), help=(_('Default font size [default: %s]') % 25), type=float, default=25.0)
    parser.add_argument('-a', '--alpha', metavar=_('ALPHA'), help=_('Text opacity'), type=float, default=1.0)
    parser.add_argument('-l', '--lifetime', metavar=_('SECONDS'), help=_('Duration of comment display [default: %s]') % 5, type=float, default=5.0)
    parser.add_argument('-p', '--protect', metavar=_('HEIGHT'), help=_('Reserve blank on the bottom of the stage'), type=int, default=0)
    parser.add_argument('-r', '--reduce', action='store_true', help=_('Reduce the amount of comments if stage is full'))
    parser.add_argument('file', metavar=_('FILE'), nargs='+', help=_('Comment file to be processed'))
    args = parser.parse_args()
    try:
        width, height = str(args.size).split('x', 1)
        width = int(width)
        height = int(height)
    except ValueError:
        raise ValueError(_('Invalid stage size: %r') % args.size)
    Danmaku2ASS(args.file, args.output, width, height, args.protect, args.font, args.fontsize, args.alpha, args.lifetime, args.reduce)


if __name__ == '__main__':
    main()


================================================
FILE: bilibili_luckyman/README.md
================================================
## 说明

B 站 30 万粉丝抽奖,自己写了一个转发抽奖助手。

上次活动:

https://t.bilibili.com/675922191916728342


================================================
FILE: bilibili_luckyman/bilibili_luckyman.py
================================================
# -*- coding:utf-8 -*-
import requests
import json
import re
import random
import time

def get_dynamic_id(url):
    dynamic_id = re.findall(r'\d+', url)
    return dynamic_id

def get_data(detail_url, params):
    req = requests.get(url = detail_url, params = params)
    req_text = json.loads(req.text)
    data = req_text['data']
    offset = data['offset']
    items = data['items']
    return offset, items

def get_uses(dynamic_id):
    detail_url = "https://api.bilibili.com/x/polymer/web-dynamic/v1/detail/forward"
    params = {'id': dynamic_id}

    offset, items = get_data(detail_url, params)

    all_user_name = []
    all_user_text = []
    all_user_mid = []

    while offset != "":
        for item in items:
            name = item['user']['name']
            all_user_name.append(name)
            mid = item['user']['mid']
            all_user_mid.append(mid)
            text = item['desc']['text']
            all_user_text.append(text)

        params = {
            'id': dynamic_id,
            'offset': offset
        }
        offset, items = get_data(detail_url, params)

    return all_user_name, all_user_mid, all_user_text

def get_lucky_man(num, lucky_num):

    tmp = [i for i in range(0, num)]
    random.shuffle(tmp)
    top30_shuffle_id = tmp[:lucky_num]
    return top30_shuffle_id

def get_local_time():
    localtime = "[" + str(time.strftime('%H:%M:%S',time.localtime(time.time()))) + "]"
    return localtime

if __name__ == "__main__":
    print ("+----------------------------------------+")
    print ("      |动态转发抽奖助手 by Jack Cui|")
    print ("+----------------------------------------+")
    # 动态链接,修改为你自己的动态
    url = "https://t.bilibili.com/675922191916728342"
    print (get_local_time() + " 正在获取转发数据中......")

    awards = [
        "动手深度学习",
        "机器学习公式详解",
        "Easy RL 强化学习教程",
        "数学之美",
        "浪潮之巅 第四版",
        "C Primer Plus(第6版)中文版"
    ] * 5

    # 设置随机数种子,保证随机数固定,这里种子数设为转发数+评论数+点赞数
    random.seed(1462 + 213 + 399)
    random.shuffle(awards)

    dynamic_id = get_dynamic_id(url)
    all_user_name, all_user_mid, all_user_text = get_uses(dynamic_id)

    top30_shuffle_id = get_lucky_man(len(all_user_name), 30)
    print (get_local_time() + " 中奖用户信息:\n")
    for idx, id_ in enumerate(top30_shuffle_id):
        print("用户名:{}".format(all_user_name[id_]))
        print("用户主页:{}".format("https://space.bilibili.com/" + str(all_user_mid[id_])))
        print("转发内容:{}".format(all_user_text[id_]))
        print("获得奖品:{}".format(awards[idx]))
        print("*" * 50)


================================================
FILE: biqukan.py
================================================
# -*- coding:UTF-8 -*-
from urllib import request
from bs4 import BeautifulSoup
import collections
import re
import os
import time
import sys
import types

"""
类说明:下载《笔趣看》网小说: url:https://www.biqukan.com/

Parameters:
	target - 《笔趣看》网指定的小说目录地址(string)

Returns:
	无

Modify:
	2017-05-06
"""
class download(object):
	def __init__(self, target):
		self.__target_url = target
		self.__head = {'User-Agent':'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19',}

	"""
	函数说明:获取下载链接

	Parameters:
		无

	Returns:
		novel_name + '.txt' - 保存的小说名(string)
		numbers - 章节数(int)
		download_dict - 保存章节名称和下载链接的字典(dict)

	Modify:
		2017-05-06
	"""
	def get_download_url(self):
		charter = re.compile(u'[第弟](.+)章', re.IGNORECASE)
		target_req = request.Request(url = self.__target_url, headers = self.__head)
		target_response = request.urlopen(target_req)
		target_html = target_response.read().decode('gbk','ignore')
		listmain_soup = BeautifulSoup(target_html,'lxml')
		chapters = listmain_soup.find_all('div',class_ = 'listmain')
		download_soup = BeautifulSoup(str(chapters), 'lxml')
		novel_name = str(download_soup.dl.dt).split("》")[0][5:]
		flag_name = "《" + novel_name + "》" + "正文卷"
		numbers = (len(download_soup.dl.contents) - 1) / 2 - 8
		download_dict = collections.OrderedDict()
		begin_flag = False
		numbers = 1
		for child in download_soup.dl.children:
			if child != '\n':
				if child.string == u"%s" % flag_name:
					begin_flag = True
				if begin_flag == True and child.a != None:
					download_url = "https://www.biqukan.com" + child.a.get('href')
					download_name = child.string
					names = str(download_name).split('章')
					name = charter.findall(names[0] + '章')
					if name:
							download_dict['第' + str(numbers) + '章 ' + names[1]] = download_url
							numbers += 1
		return novel_name + '.txt', numbers, download_dict
	
	"""
	函数说明:爬取文章内容

	Parameters:
		url - 下载连接(string)

	Returns:
		soup_text - 章节内容(string)

	Modify:
		2017-05-06
	"""
	def Downloader(self, url):
		download_req = request.Request(url = url, headers = self.__head)
		download_response = request.urlopen(download_req)
		download_html = download_response.read().decode('gbk','ignore')
		soup_texts = BeautifulSoup(download_html, 'lxml')
		texts = soup_texts.find_all(id = 'content', class_ = 'showtxt')
		soup_text = BeautifulSoup(str(texts), 'lxml').div.text.replace('\xa0','')
		return soup_text

	"""
	函数说明:将爬取的文章内容写入文件

	Parameters:
		name - 章节名称(string)
		path - 当前路径下,小说保存名称(string)
		text - 章节内容(string)

	Returns:
		无

	Modify:
		2017-05-06
	"""
	def Writer(self, name, path, text):
		write_flag = True
		with open(path, 'a', encoding='utf-8') as f:
			f.write(name + '\n\n')
			for each in text:
				if each == 'h':
					write_flag = False
				if write_flag == True and each != ' ':
					f.write(each)
				if write_flag == True and each == '\r':
					f.write('\n')			
			f.write('\n\n')

if __name__ == "__main__":
	print("\n\t\t欢迎使用《笔趣看》小说下载小工具\n\n\t\t作者:Jack-Cui\t时间:2017-05-06\n")
	print("*************************************************************************")
	
	#小说地址
	target_url = str(input("请输入小说目录下载地址:\n"))

	#实例化下载类
	d = download(target = target_url)
	name, numbers, url_dict = d.get_download_url()
	if name in os.listdir():
		os.remove(name)
	index = 1

	#下载中
	print("《%s》下载中:" % name[:-4])
	for key, value in url_dict.items():
		d.Writer(key, name, d.Downloader(value))
		sys.stdout.write("已下载:%.3f%%" %  float(index/numbers) + '\r')
		sys.stdout.flush()
		index += 1	

	print("《%s》下载完成!" % name[:-4])

	


================================================
FILE: cartoon/cartoon/__init__.py
================================================


================================================
FILE: cartoon/cartoon/items.py
================================================
# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

import scrapy

class ComicItem(scrapy.Item):
	dir_name = scrapy.Field()
	link_url = scrapy.Field()
	img_url = scrapy.Field()
	image_paths = scrapy.Field()

================================================
FILE: cartoon/cartoon/middlewares.py
================================================
# -*- coding: utf-8 -*-

# Define here the models for your spider middleware
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/spider-middleware.html

from scrapy import signals


class CartoonSpiderMiddleware(object):
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the spider middleware does not modify the
    # passed objects.

    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s

    def process_spider_input(response, spider):
        # Called for each response that goes through the spider
        # middleware and into the spider.

        # Should return None or raise an exception.
        return None

    def process_spider_output(response, result, spider):
        # Called with the results returned from the Spider, after
        # it has processed the response.

        # Must return an iterable of Request, dict or Item objects.
        for i in result:
            yield i

    def process_spider_exception(response, exception, spider):
        # Called when a spider or process_spider_input() method
        # (from other spider middleware) raises an exception.

        # Should return either None or an iterable of Response, dict
        # or Item objects.
        pass

    def process_start_requests(start_requests, spider):
        # Called with the start requests of the spider, and works
        # similarly to the process_spider_output() method, except
        # that it doesn’t have a response associated.

        # Must return only requests (not items).
        for r in start_requests:
            yield r

    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)


================================================
FILE: cartoon/cartoon/pipelines.py
================================================
# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from cartoon import settings
from scrapy import Request
import requests
import os


class ComicImgDownloadPipeline(object):

	def process_item(self, item, spider):
		#如果获取了图片链接,进行如下操作
		if 'img_url' in item:
			images = []
			#文件夹名字
			dir_path = '%s/%s' % (settings.IMAGES_STORE, item['dir_name'])
			#文件夹不存在则创建文件夹
			if not os.path.exists(dir_path):
				os.makedirs(dir_path)
			#获取每一个图片链接
			for image_url in item['img_url']:
				#解析链接,根据链接为图片命名
				houzhui = image_url.split('/')[-1].split('.')[-1]
				qianzhui = item['link_url'].split('/')[-1].split('.')[0]
				#图片名
				image_file_name = '第' + qianzhui + '页.' + houzhui
				#图片保存路径
				file_path = '%s/%s' % (dir_path, image_file_name)
				images.append(file_path)
				if os.path.exists(file_path):
					continue
				#保存图片
				with open(file_path, 'wb') as handle:
					response = requests.get(url = image_url)
					for block in response.iter_content(1024):
						if not block:
							break
						handle.write(block)
			#返回图片保存路径
			item['image_paths'] = images
		return item

================================================
FILE: cartoon/cartoon/settings.py
================================================
# -*- coding: utf-8 -*-

# Scrapy settings for cartoon project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     http://doc.scrapy.org/en/latest/topics/settings.html
#     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
#     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'cartoon'

SPIDER_MODULES = ['cartoon.spiders']
NEWSPIDER_MODULE = 'cartoon.spiders'


# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'cartoon (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False

ITEM_PIPELINES = {
	'cartoon.pipelines.ComicImgDownloadPipeline': 1,
}

IMAGES_STORE = 'H:/火影忍者'

COOKIES_ENABLED = False

DOWNLOAD_DELAY = 0.25    # 250 ms of delay

# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
#COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False

# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#   'Accept-Language': 'en',
#}

# Enable or disable spider middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
#    'cartoon.middlewares.CartoonSpiderMiddleware': 543,
#}

# Enable or disable downloader middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
#    'cartoon.middlewares.MyCustomDownloaderMiddleware': 543,
#}

# Enable or disable extensions
# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
#EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
#}

# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
#ITEM_PIPELINES = {
#    'cartoon.pipelines.CartoonPipeline': 300,
#}

# Enable and configure the AutoThrottle extension (disabled by default)
# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'


================================================
FILE: cartoon/cartoon/spiders/__init__.py
================================================
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.


================================================
FILE: cartoon/cartoon/spiders/comic_spider.py
================================================
# -*- coding: utf-8 -*-

import re
import scrapy
from scrapy import Selector
from cartoon.items import ComicItem

class ComicSpider(scrapy.Spider):
	name = 'comic'

	def __init__(self):
		#图片链接server域名
		self.server_img = 'http://n.1whour.com/'
		#章节链接server域名
		self.server_link = 'http://comic.kukudm.com'
		self.allowed_domains = ['comic.kukudm.com']
		self.start_urls = ['http://comic.kukudm.com/comiclist/3/']
		#匹配图片地址的正则表达式
		self.pattern_img = re.compile(r'\+"(.+)\'><span')

	#从start_requests发送请求
	def start_requests(self):
		yield scrapy.Request(url = self.start_urls[0], callback = self.parse1)

	#解析response,获得章节图片链接地址
	def parse1(self, response):
		hxs = Selector(response)
		items = []
		#章节链接地址
		urls = hxs.xpath('//dd/a[1]/@href').extract()
		#章节名
		dir_names = hxs.xpath('//dd/a[1]/text()').extract()
		#保存章节链接和章节名
		for index in range(len(urls)):
			item = ComicItem()
			item['link_url'] = self.server_link + urls[index]
			item['dir_name'] = dir_names[index]
			items.append(item)

		#根据每个章节的链接,发送Request请求,并传递item参数
		for item in items:
			yield scrapy.Request(url = item['link_url'], meta = {'item':item}, callback = self.parse2)
		
	#解析获得章节第一页的页码数和图片链接	
	def parse2(self, response):
		#接收传递的item
		item = response.meta['item']
		#获取章节的第一页的链接
		item['link_url'] = response.url
		hxs = Selector(response)
		#获取章节的第一页的图片链接
		pre_img_url = hxs.xpath('//script/text()').extract()
		#注意这里返回的图片地址,应该为列表,否则会报错
		img_url = [self.server_img + re.findall(self.pattern_img, pre_img_url[0])[0]]
		#将获取的章节的第一页的图片链接保存到img_url中
		item['img_url'] = img_url
		#返回item,交给item pipeline下载图片
		yield item
		#获取章节的页数
		page_num = hxs.xpath('//td[@valign="top"]/text()').re(u'共(\d+)页')[0]
		#根据页数,整理出本章节其他页码的链接
		pre_link = item['link_url'][:-5]
		for each_link in range(2, int(page_num) + 1):
			new_link = pre_link + str(each_link) + '.htm'
			#根据本章节其他页码的链接发送Request请求,用于解析其他页码的图片链接,并传递item
			yield scrapy.Request(url = new_link, meta = {'item':item}, callback = self.parse3)

	#解析获得本章节其他页面的图片链接
	def parse3(self, response):
		#接收传递的item
		item = response.meta['item']
		#获取该页面的链接
		item['link_url'] = response.url
		hxs = Selector(response)
		pre_img_url = hxs.xpath('//script/text()').extract()
		#注意这里返回的图片地址,应该为列表,否则会报错
		img_url = [self.server_img + re.findall(self.pattern_img, pre_img_url[0])[0]]
		#将获取的图片链接保存到img_url中
		item['img_url'] = img_url
		#返回item,交给item pipeline下载图片
		yield item
		

================================================
FILE: cartoon/scrapy.cfg
================================================
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# https://scrapyd.readthedocs.org/en/latest/deploy.html

[settings]
default = cartoon.settings

[deploy]
#url = http://localhost:6800/
project = cartoon


================================================
FILE: daili.py
================================================
# -*- coding:UTF-8 -*-
from bs4 import BeautifulSoup
from selenium import webdriver
import subprocess as sp
from lxml import etree
import requests
import random
import re

"""
函数说明:获取IP代理
Parameters:
	page - 高匿代理页数,默认获取第一页
Returns:
	proxys_list - 代理列表
Modify:
	2017-05-27
"""
def get_proxys(page = 1):
	#requests的Session可以自动保持cookie,不需要自己维护cookie内容
	S = requests.Session()
	#西祠代理高匿IP地址
	target_url = 'http://www.xicidaili.com/nn/%d' % page
	#完善的headers
	target_headers = {'Upgrade-Insecure-Requests':'1',
		'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
		'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
		'Referer':'http://www.xicidaili.com/nn/',
		'Accept-Encoding':'gzip, deflate, sdch',
		'Accept-Language':'zh-CN,zh;q=0.8',
	}
	#get请求
	target_response = S.get(url = target_url, headers = target_headers)
	#utf-8编码
	target_response.encoding = 'utf-8'
	#获取网页信息
	target_html = target_response.text
	#获取id为ip_list的table
	bf1_ip_list = BeautifulSoup(target_html, 'lxml')
	bf2_ip_list = BeautifulSoup(str(bf1_ip_list.find_all(id = 'ip_list')), 'lxml')
	ip_list_info = bf2_ip_list.table.contents
	#存储代理的列表
	proxys_list = []
	#爬取每个代理信息
	for index in range(len(ip_list_info)):
		if index % 2 == 1 and index != 1:
			dom = etree.HTML(str(ip_list_info[index]))
			ip = dom.xpath('//td[2]')
			port = dom.xpath('//td[3]')
			protocol = dom.xpath('//td[6]')
			proxys_list.append(protocol[0].text.lower() + '#' + ip[0].text + '#' + port[0].text)
	#返回代理列表
	return proxys_list

"""
函数说明:检查代理IP的连通性
Parameters:
	ip - 代理的ip地址
	lose_time - 匹配丢包数
	waste_time - 匹配平均时间
Returns:
	average_time - 代理ip平均耗时
Modify:
	2017-05-27
"""
def check_ip(ip, lose_time, waste_time):
	#命令 -n 要发送的回显请求数 -w 等待每次回复的超时时间(毫秒)
	cmd = "ping -n 3 -w 3 %s"
	#执行命令
	p = sp.Popen(cmd % ip, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, shell=True) 
	#获得返回结果并解码
	out = p.stdout.read().decode("gbk")
	#丢包数
	lose_time = lose_time.findall(out)
	#当匹配到丢失包信息失败,默认为三次请求全部丢包,丢包数lose赋值为3
	if len(lose_time) == 0:
		lose = 3
	else:
		lose = int(lose_time[0])
	#如果丢包数目大于2个,则认为连接超时,返回平均耗时1000ms
	if lose > 2:
		#返回False
		return 1000
	#如果丢包数目小于等于2个,获取平均耗时的时间
	else:
		#平均时间
		average = waste_time.findall(out)
		#当匹配耗时时间信息失败,默认三次请求严重超时,返回平均好使1000ms
		if len(average) == 0:
			return 1000
		else:
			#
			average_time = int(average[0])
			#返回平均耗时
			return average_time

"""
函数说明:初始化正则表达式
Parameters:
	无
Returns:
	lose_time - 匹配丢包数
	waste_time - 匹配平均时间
Modify:
	2017-05-27
"""
def initpattern():
	#匹配丢包数
	lose_time = re.compile(u"丢失 = (\d+)", re.IGNORECASE)
	#匹配平均时间
	waste_time = re.compile(u"平均 = (\d+)ms", re.IGNORECASE)
	return lose_time, waste_time

if __name__ == '__main__':
	#初始化正则表达式
	lose_time, waste_time = initpattern()
	#获取IP代理
	proxys_list = get_proxys(1)

	#如果平均时间超过200ms重新选取ip
	while True:
		#从100个IP中随机选取一个IP作为代理进行访问
		proxy = random.choice(proxys_list)
		split_proxy = proxy.split('#')
		#获取IP
		ip = split_proxy[1]
		#检查ip
		average_time = check_ip(ip, lose_time, waste_time)
		if average_time > 200:
			#去掉不能使用的IP
			proxys_list.remove(proxy)
			print("ip连接超时, 重新获取中!")
		if average_time < 200:
			break

	#去掉已经使用的IP
	proxys_list.remove(proxy)
	proxy_dict = {split_proxy[0]:split_proxy[1] + ':' + split_proxy[2]}
	print("使用代理:", proxy_dict)


================================================
FILE: dingdong/README.md
================================================
## 功能

下载京东商品的晒单图。

## 作者

* Website: [http://cuijiahua.com](http://cuijiahua.com "悬停显示")
* Author: Jack Cui
* Date: 2018.7.7

## 效果图:

![image](https://github.com/Jack-Cherish/Pictures/blob/master/jd.gif)

## 使用说明

	python jd.py -k 芒果

	三个参数:
	-d	保存图片的路径,默认为fd.py文件所在文件夹
	-k	搜索关键词
	-n  	下载商品的晒单图个数,即n个商店的晒单图


================================================
FILE: dingdong/jd.py
================================================
# -*-coding:utf-8 -*-
# Author:Jack Cui
# Website:http://cuijiahua.com
# Date:2018-7-7
import os
import re
import sys
import bs4
import json
import math
import time
import math
import argparse
import requests
from contextlib import closing

def search_goods(keyword, pages):
	"""
	搜索商品
	Parameters:
		keyword - str 搜索关键词
		pages - int 搜索页数
	Returns:
		goods_urls - list 商品链接
	"""
	# 创建session
	sess = requests.Session()
	goods_urls = []
	for page in range(pages):
		# 第一次加载
		search_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
			'Accept-Encoding': 'gzip, deflate, br',
			'Accept-Language': 'zh-CN,zh;q=0.9',
			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
			'Host': 'search.jd.com'}
		s = page*28
		if s == 0:
			s = 1
		# 搜索url
		search_url = 'https://search.jd.com/Search'
		search_params = {'keyword':keyword,
			'enc':'utf-8',
			'qrst':'1',
			'rt':'1',
			'stop':'1',
			'vt':'2',
			'wq':keyword,
			'stock':'1',
			'page':page*2+1,
			's':s,
			'click':'0'}
		search_req = sess.get(url=search_url, params=search_params, headers=search_headers, verify=False)
		search_req.encoding = 'utf-8'
		# 匹配商品链接
		search_req_bf = bs4.BeautifulSoup(search_req.text, 'lxml')
		for item in search_req_bf.find_all('li', class_='gl-item'):
			item_url = item.div.div.a.get('href')
			# 滤除广告
			if 'ccc-x.jd.com' not in item_url:
				goods_urls.append(item_url)
		# 继续加载log_id
		log_id = re.findall("log_id:'(.*)',", search_req.text)[0]
		
		# 第二次加载
		# 继续加载url
		search_more_url = 'https://search.jd.com/s_new.php'
		search_more_headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
			'Accept-Encoding': 'gzip, deflate, br',
			'Accept-Language': 'zh-CN,zh;q=0.9',
			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
			'Host': 'search.jd.com',
			'Referer':search_req.url}
		s = (1+page)*25
		search_more_params = {'keyword':keyword,
			'enc':'utf-8',
			'qrst':'1',
			'rt':'1',
			'stop':'1',
			'vt':'2',
			'wq':keyword,
			'stock':'1',
			'page':(1+page)*2,
			's':s,
			'log_id':log_id,
			'scrolling':'y',
			'tpl':'1_M'}
		search_more_req = sess.get(url=search_more_url, params=search_more_params, headers=search_more_headers, verify=False)
		search_more_req.encoding = 'utf-8'
		# 匹配商品链接
		search_more_req_bf = bs4.BeautifulSoup(search_more_req.text, 'lxml')
		for item in search_more_req_bf.find_all('li', class_='gl-item'):
			item_url = item.div.div.a.get('href')
			# 滤除广告
			if 'ccc-x.jd.com' not in item_url:
				goods_urls.append(item_url)
	# 去重
	goods_urls = list(set(goods_urls))
	# 链接合成
	goods_urls = list(map(lambda x: 'http:'+x, goods_urls))
	return goods_urls

def goods_images(goods_url):
	"""
	获得商品晒图
	Parameters:
		goods_url - str 商品链接
	Returns:
		image_urls - list 图片链接
	"""
	image_urls = []
	productId = goods_url.split('/')[-1].split('.')[0]

	# 评论url
	comment_url = 'https://sclub.jd.com/comment/productPageComments.action'
	comment_params = {'productId':productId,
		'score':'0',
		'sortType':'5',
		'page':'0',
		'pageSize':'10',
		'isShadowSku':'0',
		'fold':'1'}
	comment_headers = {'Accept': '*/*',
		'Accept-Encoding': 'gzip, deflate, br',
		'Accept-Language': 'zh-CN,zh;q=0.9',
		'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
		'Referer':goods_url,
		'Host': 'sclub.jd.com'}

	comment_req = requests.get(url=comment_url, params=comment_params, headers=comment_headers, verify=False)
	html = json.loads(comment_req.text)
	# 获得晒图个数
	imageListCount = html['imageListCount']
	# 计算晒图页数,向上取整
	pages = math.ceil(imageListCount / 10)
	for page in range(1, pages+1):
		# 获取晒图图片url
		club_url = 'https://club.jd.com/discussion/getProductPageImageCommentList.action'
		now = time.time()
		now_str = str(now).split('.')
		now = now_str[0] + now_str[-1][:3]
		club_params = {'productId':productId,
			'isShadowSku':'0',
			'page':page,
			'pageSize':'10',
			'_':now}
		club_headers = comment_headers
		club_req = requests.get(url=club_url, params=club_params, headers=club_headers, verify=False)
		html = json.loads(club_req.text)
		for img in html['imgComments']['imgList']:
			image_urls.append(img['imageUrl'])
	# 去重
	image_urls = list(set(image_urls))
	# 链接合成
	image_urls = list(map(lambda x: 'http:'+x, image_urls))

	return image_urls

def download_image(path, image_url):
	"""
	图片下载
	Parameters:
		path - str 图片保存地址
		image_url - str 图片下载地址
	Returns:
		None
	"""
	print(image_url)
	filename = image_url.split('/')[-1]
	image_path = os.path.join(path, filename)
	download_headers = {'Accept': '*/*',
		'Accept-Encoding': 'gzip, deflate, br',
		'Accept-Language': 'zh-CN,zh;q=0.9',
		'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}
	size = 0
	with closing(requests.get(image_url, headers=download_headers, stream=True)) as response:
		chunk_size = 1024
		content_size = int(response.headers['content-length'])
		if response.status_code == 200:
			sys.stdout.write(filename+'下载中:\n')
			sys.stdout.write('    [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))

			with open(image_path, 'wb') as file:
				for data in response.iter_content(chunk_size = chunk_size):
					file.write(data)
					size += len(data)
					file.flush()
					sys.stdout.write('    [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
					sys.stdout.flush()

def run(path, keyword, num):
	"""
	运行函数
	Parameters:
		path - str 图片保存目录
		keyword - str 关键词
		num - int 下载的商店个数
	Returns:
		None
	"""
	flag = False
	pages = 1
	while flag == False:
		goods_urls = search_goods(keyword, pages)
		if len(goods_urls) > num:
			flag = True
		else:
			pages += 1

	if keyword not in os.listdir():
		os.mkdir(keyword)
	path = os.path.join(path, keyword)
	for goods_url in goods_urls[:num]:
		image_urls = goods_images(goods_url)
		for image_url in image_urls:
			download_image(path, image_url)

if __name__ == '__main__':
	if len(sys.argv) == 1:
		sys.argv.append('--help')
	parser = argparse.ArgumentParser()
	parser.add_argument('-d', '--dir', help=('store path'), type=str, default=os.path.dirname(__file__))
	parser.add_argument('-k', '--keyword', required=True, help=('search content'))
	parser.add_argument('-n', '--num', help=('the number of goods to download images'), type=int, default=1)
	args = parser.parse_args()
	run(args.dir, args.keyword, args.num)


================================================
FILE: douyin/README.md
================================================
## 功能

下载指定用户的抖音视频。

## 作者

* Author: [Jack Cui](http://cuijiahua.com "悬停显示")、[steven7851](https://github.com/steven7851 "悬停显示")

## 运行效果

![image](https://github.com/Jack-Cherish/Pictures/blob/master/14.gif)

## 使用说明

	python douyin.py

签名服务来源:https://github.com/coder-fly/douyin-signature<br />
也可以使用 pyppeteer 模拟浏览器来取得签名,如此就不必依赖服务<br />
要是以后服务器关了再来弄吧。 。


================================================
FILE: douyin/douyin.py
================================================
# -*- coding:utf-8 -*-
from contextlib import closing
import requests, json, re, os, sys
import urllib

class DouYin(object):
	def __init__(self, width = 500, height = 300):
		"""
		抖音App视频下载
		"""
		self.headers = {
			'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
			'sec-fetch-mode': 'cors',
			'sec-fetch-site': 'same-origin',
			'accept': 'application/json',
			'accept-encoding': 'gzip, deflate, br',
			'accept-language': 'zh-CN,zh;q=0.9',
		}
		self.headers1 = {
			'User-Agent': 'Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; MI 4S Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/9.1.3',
		}

	def get_video_urls(self, user_id, type_flag='f'):
		"""
		获得视频播放地址
		Parameters:
			user_id:查询的用户UID
		Returns:
			video_names: 视频名字列表
			video_urls: 视频链接列表
			nickname: 用户昵称
		"""
		video_names = []
		video_urls = []
		share_urls = []
		max_cursor = 0
		has_more = 1
		sign_api = 'http://49.233.200.77:5001'
		share_user_url = 'https://www.iesdouyin.com/share/user/%s' % user_id
		share_user = requests.get(share_user_url, headers=self.headers)
		while share_user.status_code != 200:
			share_user = requests.get(share_user_url, headers=self.headers)
		_tac_re = re.compile(r"tac='([\s\S]*?)'</script>")
		tac = _tac_re.search(share_user.text).group(1)
		_dytk_re = re.compile(r"dytk\s*:\s*'(.+)'")
		dytk = _dytk_re.search(share_user.text).group(1)
		_nickname_re = re.compile(r'<p class="nickname">(.+?)<\/p>')
		nickname = _nickname_re.search(share_user.text).group(1)
		data = {
			'tac': tac.split('|')[0],
			'user_id': user_id,
		}
		req = requests.post(sign_api, data=data)
		while req.status_code != 200:
			req = requests.post(sign_api, data=data)
		sign = req.json().get('signature')
		user_url_prefix = 'https://www.iesdouyin.com/web/api/v2/aweme/like' if type_flag == 'f' else 'https://www.iesdouyin.com/web/api/v2/aweme/post'
		print('解析视频链接中')
		while has_more != 0:
			user_url = user_url_prefix + '/?user_id=%s&sec_uid=&count=21&max_cursor=%s&aid=1128&_signature=%s&dytk=%s' % (user_id, max_cursor, sign, dytk)
			req = requests.get(user_url, headers=self.headers)
			while req.status_code != 200:
				req = requests.get(user_url, headers=self.headers)
			html = json.loads(req.text)
			for each in html['aweme_list']:
				try:
					url = 'https://aweme.snssdk.com/aweme/v1/play/?video_id=%s&line=0&ratio=720p&media_type=4&vr_type=0&improve_bitrate=0&is_play_url=1&is_support_h265=0&source=PackSourceEnum_PUBLISH'
					vid = each['video']['vid']
					video_url = url % vid
				except:
					continue
				share_desc = each['desc']
				if os.name == 'nt':
					for c in r'\/:*?"<>|':
						nickname = nickname.replace(c, '').strip().strip('\.')
						share_desc = share_desc.replace(c, '').strip()
				share_id = each['aweme_id']
				if share_desc in ['抖音-原创音乐短视频社区', 'TikTok', '']:
					video_names.append(share_id + '.mp4')
				else:
					video_names.append(share_id + '-' + share_desc + '.mp4')
				share_url = 'https://www.iesdouyin.com/share/video/%s' % share_id
				share_urls.append(share_url)
				video_urls.append(video_url)
			max_cursor = html['max_cursor']
			has_more = html['has_more']

		return video_names, video_urls, share_urls, nickname

	def get_download_url(self, video_url, watermark_flag):
		"""
		获得带水印的视频播放地址
		Parameters:
			video_url:带水印的视频播放地址
		Returns:
			download_url: 带水印的视频下载地址
		"""
		# 带水印视频
		if watermark_flag == True:
			download_url = video_url.replace('/play/', '/playwm/')
		# 无水印视频
		else:
			download_url = video_url.replace('/playwm/', '/play/')

		return download_url

	def video_downloader(self, video_url, video_name, watermark_flag=False):
		"""
		视频下载
		Parameters:
			video_url: 带水印的视频地址
			video_name: 视频名
			watermark_flag: 是否下载带水印的视频
		Returns:
			无
		"""
		size = 0
		video_url = self.get_download_url(video_url, watermark_flag=watermark_flag)
		with closing(requests.get(video_url, headers=self.headers1, stream=True)) as response:
			chunk_size = 1024
			content_size = int(response.headers['content-length'])
			if response.status_code == 200:
				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))

				with open(video_name, 'wb') as file:
					for data in response.iter_content(chunk_size = chunk_size):
						file.write(data)
						size += len(data)
						file.flush()

						sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
						sys.stdout.flush()

	def run(self):
		"""
		运行函数
		Parameters:
			None
		Returns:
			None
		"""
		self.hello()
		print('UID取得方式:\n分享用户页面,用浏览器打开短链接,原始链接中/share/user/后的数字即是UID')
		user_id = input('请输入UID (例如60388937600):')
		user_id = user_id if user_id else '60388937600'
		watermark_flag = input('是否下载带水印的视频 (0-否(默认), 1-是):')
		watermark_flag = watermark_flag if watermark_flag!='' else '0'
		watermark_flag = bool(int(watermark_flag))
		type_flag = input('f-收藏的(默认), p-上传的:')
		type_flag = type_flag if type_flag!='' else 'f'
		save_dir = input('保存路径 (例如"E:/Download/", 默认"./Download/"):')
		save_dir = save_dir if save_dir else "./Download/"
		video_names, video_urls, share_urls, nickname = self.get_video_urls(user_id, type_flag)
		nickname_dir = os.path.join(save_dir, nickname)
		if not os.path.exists(save_dir):
			os.makedirs(save_dir)
		if nickname not in os.listdir(save_dir):
			os.mkdir(nickname_dir)
		if type_flag == 'f':
			if 'favorite' not in os.listdir(nickname_dir):
				os.mkdir(os.path.join(nickname_dir, 'favorite'))
		print('视频下载中:共有%d个作品!\n' % len(video_urls))
		for num in range(len(video_urls)):
			print('  解析第%d个视频链接 [%s] 中,请稍后!\n' % (num + 1, share_urls[num]))
			if '\\' in video_names[num]:
				video_name = video_names[num].replace('\\', '')
			elif '/' in video_names[num]:
				video_name = video_names[num].replace('/', '')
			else:
				video_name = video_names[num]
			video_path = os.path.join(nickname_dir, video_name) if type_flag!='f' else os.path.join(nickname_dir, 'favorite', video_name)
			if os.path.isfile(video_path):
				print('视频已存在')
			else:
				self.video_downloader(video_urls[num], video_path, watermark_flag)
			print('\n')
		print('下载完成!')

	def hello(self):
		"""
		打印欢迎界面
		Parameters:
			None
		Returns:
			None
		"""
		print('*' * 100)
		print('\t\t\t\t抖音App视频下载小助手')
		print('\t\t作者:Jack Cui、steven7851')
		print('*' * 100)


if __name__ == '__main__':
	douyin = DouYin()
	douyin.run()


================================================
FILE: douyin/fuck-byted-acrawler.js
================================================
// Referer:https://raw.githubusercontent.com/loadchange/amemv-crawler/master/fuck-byted-acrawler.js
function generateSignature(userId) {
    this.navigator = {
        userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
    }
    var e = {}

    var r = (function () {
        function e(e, a, r) {
            return (b[e] || (b[e] = t("x,y", "return x " + e + " y")))(r, a)
        }

        function a(e, a, r) {
            return (k[r] || (k[r] = t("x,y", "return new x[y](" + Array(r + 1).join(",x[++y]").substr(1) + ")")))(e, a)
        }

        function r(e, a, r) {
            var n, t, s = {}, b = s.d = r ? r.d + 1 : 0;
            for (s["$" + b] = s, t = 0; t < b; t++) s[n = "$" + t] = r[n];
            for (t = 0, b = s.length = a.length; t < b; t++) s[t] = a[t];
            return c(e, 0, s)
        }

        function c(t, b, k) {
            function u(e) {
                v[x++] = e
            }

            function f() {
                return g = t.charCodeAt(b++) - 32, t.substring(b, b += g)
            }

            function l() {
                try {
                    y = c(t, b, k)
                } catch (e) {
                    h = e, y = l
                }
            }

            for (var h, y, d, g, v = [], x = 0; ;) switch (g = t.charCodeAt(b++) - 32) {
                case 1:
                    u(!v[--x]);
                    break;
                case 4:
                    v[x++] = f();
                    break;
                case 5:
                    u(function (e) {
                        var a = 0, r = e.length;
                        return function () {
                            var c = a < r;
                            return c && u(e[a++]), c
                        }
                    }(v[--x]));
                    break;
                case 6:
                    y = v[--x], u(v[--x](y));
                    break;
                case 8:
                    if (g = t.charCodeAt(b++) - 32, l(), b += g, g = t.charCodeAt(b++) - 32, y === c) b += g; else if (y !== l) return y;
                    break;
                case 9:
                    v[x++] = c;
                    break;
                case 10:
                    u(s(v[--x]));
                    break;
                case 11:
                    y = v[--x], u(v[--x] + y);
                    break;
                case 12:
                    for (y = f(), d = [], g = 0; g < y.length; g++) d[g] = y.charCodeAt(g) ^ g + y.length;
                    u(String.fromCharCode.apply(null, d));
                    break;
                case 13:
                    y = v[--x], h = delete v[--x][y];
                    break;
                case 14:
                    v[x++] = t.charCodeAt(b++) - 32;
                    break;
                case 59:
                    u((g = t.charCodeAt(b++) - 32) ? (y = x, v.slice(x -= g, y)) : []);
                    break;
                case 61:
                    u(v[--x][t.charCodeAt(b++) - 32]);
                    break;
                case 62:
                    g = v[--x], k[0] = 65599 * k[0] + k[1].charCodeAt(g) >>> 0;
                    break;
                case 65:
                    h = v[--x], y = v[--x], v[--x][y] = h;
                    break;
                case 66:
                    u(e(t[b++], v[--x], v[--x]));
                    break;
                case 67:
                    y = v[--x], d = v[--x], u((g = v[--x]).x === c ? r(g.y, y, k) : g.apply(d, y));
                    break;
                case 68:
                    u(e((g = t[b++]) < "<" ? (b--, f()) : g + g, v[--x], v[--x]));
                    break;
                case 70:
                    u(!1);
                    break;
                case 71:
                    v[x++] = n;
                    break;
                case 72:
                    v[x++] = +f();
                    break;
                case 73:
                    u(parseInt(f(), 36));
                    break;
                case 75:
                    if (v[--x]) {
                        b++;
                        break
                    }
                case 74:
                    g = t.charCodeAt(b++) - 32 << 16 >> 16, b += g;
                    break;
                case 76:
                    u(k[t.charCodeAt(b++) - 32]);
                    break;
                case 77:
                    y = v[--x], u(v[--x][y]);
                    break;
                case 78:
                    g = t.charCodeAt(b++) - 32, u(a(v, x -= g + 1, g));
                    break;
                case 79:
                    g = t.charCodeAt(b++) - 32, u(k["$" + g]);
                    break;
                case 81:
                    h = v[--x], v[--x][f()] = h;
                    break;
                case 82:
                    u(v[--x][f()]);
                    break;
                case 83:
                    h = v[--x], k[t.charCodeAt(b++) - 32] = h;
                    break;
                case 84:
                    v[x++] = !0;
                    break;
                case 85:
                    v[x++] = void 0;
                    break;
                case 86:
                    u(v[x - 1]);
                    break;
                case 88:
                    h = v[--x], y = v[--x], v[x++] = h, v[x++] = y;
                    break;
                case 89:
                    u(function () {
                        function e() {
                            return r(e.y, arguments, k)
                        }

                        return e.y = f(), e.x = c, e
                    }());
                    break;
                case 90:
                    v[x++] = null;
                    break;
                case 91:
                    v[x++] = h;
                    break;
                case 93:
                    h = v[--x];
                    break;
                case 0:
                    return v[--x];
                default:
                    u((g << 16 >> 16) - 16)
            }
        }

        var n = this, t = n.Function, s = Object.keys || function (e) {
            var a = {}, r = 0;
            for (var c in e) a[r++] = c;
            return a.length = r, a
        }, b = {}, k = {};
        return r
    })()
    ('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb<k7l l!r&lengthb%^l$1+s$jl  s#i$1ek1s$gr#tack4)zgr#tac$! +0o![#cj?o ]!l$b%s"o ]!l"l$b*b^0d#>>>s!0s%yA0s"l"l!r&lengthb<k+l"^l"1+s"jl  s&l&z0l!$ +["cs\'(0l#i\'1ps9wxb&s() &{s)/s(gr&Stringr,fromCharCodes)0s*yWl ._b&s o!])l l Jb<k$.aj;l .Tb<k$.gj/l .^b<k&i"-4j!+& s+yPo!]+s!l!l Hd>&l!l Bd>&+l!l <d>&+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld<l 4d#>>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd<l 6d#>>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d<l"b|&+l-l(l!b^&+l-l&zl\'g,)gk}ejo{cm,)|yn~Lij~em["cl$b%@d<l&zl\'l $ +["cl$b%b|&+l-l%8d<@b|l!b^&+ q$sign ', [e])
    return e.sign(userId)
}

var _ = process.argv.splice(2)

console.log(generateSignature(_[0]))


================================================
FILE: douyin.py
================================================
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
from contextlib import closing
import requests, json, time, re, os, sys, time

class DouYin(object):
	def __init__(self):
		"""
		抖音App视频下载
		"""
		#SSL认证
		pass

	def get_video_urls(self, user_id):
		"""
		获得视频播放地址
		Parameters:
			nickname:查询的用户名
		Returns:
			video_names: 视频名字列表
			video_urls: 视频链接列表
			aweme_count: 视频数量
		"""
		video_names = []
		video_urls = []
		unique_id = ''
		while unique_id != user_id:
			search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
			req = requests.get(url = search_url, verify = False)
			html = json.loads(req.text)
			aweme_count = html['user_list'][0]['user_info']['aweme_count']
			uid = html['user_list'][0]['user_info']['uid']
			nickname = html['user_list'][0]['user_info']['nickname']
			unique_id = html['user_list'][0]['user_info']['unique_id']
		user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
		req = requests.get(url = user_url, verify = False)
		html = json.loads(req.text)
		i = 1
		for each in html['aweme_list']:
			share_desc = each['share_info']['share_desc']
			if '抖音-原创音乐短视频社区' == share_desc:
				video_names.append(str(i) + '.mp4')
				i += 1
			else:
				video_names.append(share_desc + '.mp4')
			video_urls.append(each['share_info']['share_url'])

		return video_names, video_urls, nickname

	def get_download_url(self, video_url):
		"""
		获得视频播放地址
		Parameters:
			video_url:视频播放地址
		Returns:
			download_url: 视频下载地址
		"""
		req = requests.get(url = video_url, verify = False)
		bf = BeautifulSoup(req.text, 'lxml')
		script = bf.find_all('script')[-1]
		video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
		video_html = json.loads(video_url_js)
		download_url = video_html['video']['play_addr']['url_list'][0]
		return download_url

	def video_downloader(self, video_url, video_name):
		"""
		视频下载
		Parameters:
			None
		Returns:
			None
		"""
		size = 0
		with closing(requests.get(video_url, stream=True, verify = False)) as response:
			chunk_size = 1024
			content_size = int(response.headers['content-length']) 
			if response.status_code == 200:
				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))

				with open(video_name, "wb") as file:  
					for data in response.iter_content(chunk_size = chunk_size):
						file.write(data)
						size += len(data)
						file.flush()

					sys.stdout.write('    [下载进度]:%.2f%%' % float(size / content_size * 100))
					sys.stdout.flush()
		time.sleep(1)


	def run(self):
		"""
		运行函数
		Parameters:
			None
		Returns:
			None
		"""
		self.hello()
		# user_id = input('请输入ID(例如13978338):')
		user_id = 'sm666888'
		video_names, video_urls, nickname = self.get_video_urls(user_id)
		if nickname not in os.listdir():
			os.mkdir(nickname)
		sys.stdout.write('视频下载中:\n')
		for num in range(len(video_urls)):
			print('  %s\n' % video_urls[num])
			video_url = self.get_download_url(video_urls[num])
			if '\\' in video_names[num]:
				video_name = video_names[num].replace('\\', '')
			elif '/' in video_names[num]:
				video_name = video_names[num].replace('/', '')
			else:
				video_name = video_names[num]
			self.video_downloader(video_url, os.path.join(nickname, video_name))
			print('')

	def hello(self):
		"""
		打印欢迎界面
		Parameters:
			None
		Returns:
			None
		"""
		print('*' * 100)
		print('\t\t\t\t抖音App视频下载小助手')
		print('*' * 100)

		
if __name__ == '__main__':
	douyin = DouYin()
	douyin.run()

================================================
FILE: douyin_pro.py
================================================
# -*- coding:utf-8 -*-
from splinter.driver.webdriver.chrome import Options, Chrome
from splinter.browser import Browser
from contextlib import closing
import requests, json, time, re, os, sys, time
from bs4 import BeautifulSoup

class DouYin(object):
	def __init__(self, width = 500, height = 300):
		"""
		抖音App视频下载
		"""
		# 无头浏览器
		chrome_options = Options()
		chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"')
		self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True)

	def get_video_urls(self, user_id):
		"""
		获得视频播放地址
		Parameters:
			user_id:查询的用户ID
		Returns:
			video_names: 视频名字列表
			video_urls: 视频链接列表
			nickname: 用户昵称
		"""
		video_names = []
		video_urls = []
		unique_id = ''
		while unique_id != user_id:
			search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id
			req = requests.get(url = search_url, verify = False)
			html = json.loads(req.text)
			aweme_count = html['user_list'][0]['user_info']['aweme_count']
			uid = html['user_list'][0]['user_info']['uid']
			nickname = html['user_list'][0]['user_info']['nickname']
			unique_id = html['user_list'][0]['user_info']['unique_id']
		user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)
		req = requests.get(url = user_url, verify = False)
		html = json.loads(req.text)
		i = 1
		for each in html['aweme_list']:
			share_desc = each['share_info']['share_desc']
			if '抖音-原创音乐短视频社区' == share_desc:
				video_names.append(str(i) + '.mp4')
				i += 1
			else:
				video_names.append(share_desc + '.mp4')
			video_urls.append(each['share_info']['share_url'])

		return video_names, video_urls, nickname

	def get_download_url(self, video_url):
		"""
		获得带水印的视频播放地址
		Parameters:
			video_url:带水印的视频播放地址
		Returns:
			download_url: 带水印的视频下载地址
		"""
		req = requests.get(url = video_url, verify = False)
		bf = BeautifulSoup(req.text, 'lxml')
		script = bf.find_all('script')[-1]
		video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]
		video_html = json.loads(video_url_js)
		download_url = video_html['video']['play_addr']['url_list'][0]
		return download_url

	def video_downloader(self, video_url, video_name, watermark_flag=True):
		"""
		视频下载
		Parameters:
			video_url: 带水印的视频地址
			video_name: 视频名
			watermark_flag: 是否下载不带水印的视频
		Returns:
			无
		"""
		size = 0
		if watermark_flag == True:
			video_url = self.remove_watermark(video_url)
		else:
			video_url = self.get_download_url(video_url)
		with closing(requests.get(video_url, stream=True, verify = False)) as response:
			chunk_size = 1024
			content_size = int(response.headers['content-length']) 
			if response.status_code == 200:
				sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))

				with open(video_name, "wb") as file:  
					for data in response.iter_content(chunk_size = chunk_size):
						file.write(data)
						size += len(data)
						file.flush()

						sys.stdout.write('  [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
						sys.stdout.flush()


	def remove_watermark(self, video_url):
		"""
		获得无水印的视频播放地址
		Parameters:
			video_url: 带水印的视频地址
		Returns:
			无水印的视频下载地址
		"""
		self.driver.visit('http://douyin.iiilab.com/')
		self.driver.find_by_tag('input').fill(video_url)
		self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()
		html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html
		bf = BeautifulSoup(html, 'lxml')
		return bf.find('a').get('href')

	def run(self):
		"""
		运行函数
		Parameters:
			None
		Returns:
			None
		"""
		self.hello()
		user_id = input('请输入ID(例如40103580):')
		video_names, video_urls, nickname = self.get_video_urls(user_id)
		if nickname not in os.listdir():
			os.mkdir(nickname)
		print('视频下载中:共有%d个作品!\n' % len(video_urls))
		for num in range(len(video_urls)):
			print('  解析第%d个视频链接 [%s] 中,请稍后!\n' % (num+1, video_urls[num]))
			if '\\' in video_names[num]:
				video_name = video_names[num].replace('\\', '')
			elif '/' in video_names[num]:
				video_name = video_names[num].replace('/', '')
			else:
				video_name = video_names[num]
			self.video_downloader(video_urls[num], os.path.join(nickname, video_name))
			print('\n')

		print('下载完成!')

	def hello(self):
		"""
		打印欢迎界面
		Parameters:
			None
		Returns:
			None
		"""
		print('*' * 100)
		print('\t\t\t\t抖音App视频下载小助手')
		print('\t\t作者:Jack Cui')
		print('*' * 100)


if __name__ == '__main__':
	douyin = DouYin()
	douyin.run()


================================================
FILE: downloader.py
================================================
#-*- coding: UTF-8 -*-
import requests  
from contextlib import closing

class ProgressBar(object):  
    def __init__(self, title, count=0.0, run_status=None, fin_status=None, total=100.0, unit='', sep='/', chunk_size=1.0):  
        super(ProgressBar, self).__init__()  
        self.info = "[%s] %s %.2f %s %s %.2f %s"  
        self.title = title  
        self.total = total  
        self.count = count  
        self.chunk_size = chunk_size  
        self.status = run_status or ""  
        self.fin_status = fin_status or " " * len(self.status)  
        self.unit = unit  
        self.seq = sep  
  
    def __get_info(self):  
        #[名称] 状态 进度 单位 分割线 总数 单位  
        _info = self.info % (self.title, self.status, self.count/self.chunk_size, self.unit, self.seq, self.total/self.chunk_size, self.unit)  
        return _info  
  
    def refresh(self, count = 1, status = None):  
        self.count += count  
        self.status = status or self.status  
        end_str = "\r"  
        if self.count >= self.total:  
            end_str = '\n'  
            self.status = status or self.fin_status  
        print(self.__get_info(), end=end_str, )  


if __name__ == '__main__':
	#url = 'http://www.demongan.com/source/game/二十四点.zip'
	#filename = '二十四点.zip'
	print('*' * 100)
	print('\t\t\t\t欢迎使用文件下载小助手')
	print('作者:Jack-Cui\n博客:http://blog.csdn.net/c406495762')
	print('*' * 100)
	url  = input('请输入需要下载的文件链接:\n')
	filename = url.split('/')[-1]
	with closing(requests.get(url, stream=True)) as response:  
		chunk_size = 1024  
		content_size = int(response.headers['content-length'])  
		if response.status_code == 200:
			print('文件大小:%0.2f KB' % (content_size / chunk_size))
			progress = ProgressBar("%s下载进度" % filename
			            , total = content_size  
			            , unit = "KB"  
			            , chunk_size = chunk_size  
			            , run_status = "正在下载"  
			            , fin_status = "下载完成")  

			with open(filename, "wb") as file:  
			        for data in response.iter_content(chunk_size=chunk_size):  
			            file.write(data)  
			            progress.refresh(count=len(data))  
		else:
			print('链接异常')

================================================
FILE: financical.py
================================================
#-*- coding:UTF-8 -*-
import sys
import pymysql
import requests
import json
import re
from bs4 import BeautifulSoup

"""
类说明:获取财务数据

Author:
	Jack Cui
Blog:
	http://blog.csdn.net/c406495762
Zhihu:
	https://www.zhihu.com/people/Jack--Cui/
Modify:
	2017-08-31
"""
class FinancialData():

	def __init__(self):
		#服务器域名
		self.server = 'http://quotes.money.163.com/'
		self.cwnb = 'http://quotes.money.163.com/hkstock/cwsj_'
		#主要财务指标
		self.cwzb_dict = {'EPS':'基本每股收益','EPS_DILUTED':'摊薄每股收益','GROSS_MARGIN':'毛利率',
		'CAPITAL_ADEQUACY':'资本充足率','LOANS_DEPOSITS':'贷款回报率','ROTA':'总资产收益率',
		'ROEQUITY':'净资产收益率','CURRENT_RATIO':'流动比率','QUICK_RATIO':'速动比率',
		'ROLOANS':'存贷比','INVENTORY_TURNOVER':'存货周转率','GENERAL_ADMIN_RATIO':'管理费用比率',
		'TOTAL_ASSET2TURNOVER':'资产周转率','FINCOSTS_GROSSPROFIT':'财务费用比率','TURNOVER_CASH':'销售现金比率','YEAREND_DATE':'报表日期'}
		#利润表
		self.lrb_dict = {'TURNOVER':'总营收','OPER_PROFIT':'经营利润','PBT':'除税前利润',
		'NET_PROF':'净利润','EPS':'每股基本盈利','DPS':'每股派息',
		'INCOME_INTEREST':'利息收益','INCOME_NETTRADING':'交易收益','INCOME_NETFEE':'费用收益','YEAREND_DATE':'报表日期'}
		#资产负债表
		self.fzb_dict = {
			'FIX_ASS':'固定资产','CURR_ASS':'流动资产','CURR_LIAB':'流动负债',
			'INVENTORY':'存款','CASH':'现金及银行存结','OTHER_ASS':'其他资产',
			'TOTAL_ASS':'总资产','TOTAL_LIAB':'总负债','EQUITY':'股东权益',
			'CASH_SHORTTERMFUND':'库存现金及短期资金','DEPOSITS_FROM_CUSTOMER':'客户存款',
			'FINANCIALASSET_SALE':'可供出售之证券','LOAN_TO_BANK':'银行同业存款及贷款',
			'DERIVATIVES_LIABILITIES':'金融负债','DERIVATIVES_ASSET':'金融资产','YEAREND_DATE':'报表日期'}
		#现金流表
		self.llb_dict = {
			'CF_NCF_OPERACT':'经营活动产生的现金流','CF_INT_REC':'已收利息','CF_INT_PAID':'已付利息',
			'CF_INT_REC':'已收股息','CF_DIV_PAID':'已派股息','CF_INV':'投资活动产生现金流',
			'CF_FIN_ACT':'融资活动产生现金流','CF_BEG':'期初现金及现金等价物','CF_CHANGE_CSH':'现金及现金等价物净增加额',
			'CF_END':'期末现金及现金等价物','CF_EXCH':'汇率变动影响','YEAREND_DATE':'报表日期'}
		#总表
		self.table_dict = {'cwzb':self.cwzb_dict,'lrb':self.lrb_dict,'fzb':self.fzb_dict,'llb':self.llb_dict}
		#请求头
		self.headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
			'Accept-Encoding': 'gzip, deflate',
			'Accept-Language': 'zh-CN,zh;q=0.8',
			'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36'}
	
	"""
	函数说明:获取股票页面信息

	Author:
		Jack Cui
	Parameters:
	    url - 股票财务数据界面地址
	Returns:
	    name - 股票名
	    table_name_list - 财务报表名称
	    table_date_list - 财务报表年限
	    url_list - 财务报表查询连接
	Blog:
		http://blog.csdn.net/c406495762
	Zhihu:
		https://www.zhihu.com/people/Jack--Cui/
	Modify:
		2017-08-31
	"""
	def get_informations(self, url):
		req = requests.get(url = url, headers = self.headers)
		req.encoding = 'utf-8'
		html = req.text
		page_bf = BeautifulSoup(html, 'lxml')
		#股票名称,股票代码
		name = page_bf.find_all('span', class_ = 'name')[0].string
		# code = page_bf.find_all('span', class_ = 'code')[0].string
		# code = re.findall('\d+',code)[0]

		#存储各个表名的列表
		table_name_list = []
		table_date_list = []
		each_date_list = []
		url_list = []
		#表名和表时间
		table_name = page_bf.find_all('div', class_ = 'titlebar3')
		for each_table_name in table_name:
			#表名
			table_name_list.append(each_table_name.span.string)
			#表时间
			for each_table_date in each_table_name.div.find_all('select', id = re.compile('.+1$')):
				url_list.append(re.findall('(\w+)1',each_table_date.get('id'))[0])
				for each_date in each_table_date.find_all('option'):
					each_date_list.append(each_date.string)
				table_date_list.append(each_date_list)
				each_date_list = []
		return name,table_name_list,table_date_list,url_list

	"""
	函数说明:财务报表入库

	Author:
		Jack Cui
	Parameters:
	    name - 股票名
	    table_name_list - 财务报表名称
	    table_date_list - 财务报表年限
	    url_list - 财务报表查询连接
	Returns:
		无
	Blog:
		http://blog.csdn.net/c406495762
	Zhihu:
		https://www.zhihu.com/people/Jack--Cui/
	Modify:
		2017-08-31
	"""
	def insert_tables(self, name, table_name_list,table_date_list, url_list):
		#打开数据库连接:host-连接主机地址,port-端口号,user-用户名,passwd-用户密码,db-数据库名,charset-编码
		conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='yourpasswd',db='financialdata',charset='utf8')
		#使用cursor()方法获取操作游标
		cursor = conn.cursor()  
		#插入信息
		for i in range(len(table_name_list)):
			sys.stdout.write('    [正在下载       ]    %s' % table_name_list[i] + '\r')
			#获取数据地址
			url = self.server + 'hk/service/cwsj_service.php?symbol={}&start={}&end={}&type={}&unit=yuan'.format(code,table_date_list[i][-1],table_date_list[i][0],url_list[i])
			req_table = requests.get(url = url, headers = self.headers)
			table = req_table.json()
			nums = len(table)
			value_dict = {}
			for num in range(nums):
				sys.stdout.write('    [正在下载 %.2f%%]   ' % (((num+1) / nums)*100) + '\r')
				sys.stdout.flush()
				value_dict['股票名'] = name
				value_dict['股票代码'] = code
				for key, value in table[i].items():
					if key in self.table_dict[url_list[i]]:
						value_dict[self.table_dict[url_list[i]][key]] = value

				sql1 = """
				INSERT INTO %s (`股票名`,`股票代码`,`报表日期`) VALUES ('%s','%s','%s')""" % (url_list[i],value_dict['股票名'],value_dict['股票代码'],value_dict['报表日期'])
				try:
					cursor.execute(sql1)
					# 执行sql语句
					conn.commit()
				except:
					# 发生错误时回滚
					conn.rollback()

				for key, value in value_dict.items():
					if key not in ['股票名','股票代码','报表日期']:
						sql2 = """
						UPDATE %s SET %s='%s' WHERE `股票名`='%s' AND `报表日期`='%s'""" % (url_list[i],key,value,value_dict['股票名'],value_dict['报表日期'])
						try:
							cursor.execute(sql2)
							# 执行sql语句
							conn.commit()
						except:
							# 发生错误时回滚
							conn.rollback()
				value_dict = {}
			print('    [下载完成 ')

		# 关闭数据库连接
		cursor.close()  
		conn.close()

if __name__ == '__main__':
	print('*' * 100)
	print('\t\t\t\t\t财务数据下载助手\n')
	print('作者:Jack-Cui\n')
	print('About Me:\n')
	print('  知乎:https://www.zhihu.com/people/Jack--Cui')
	print('  Blog:http://blog.csdn.net/c406495762')
	print('  Gihub:https://github.com/Jack-Cherish\n')
	print('*' * 100)
	fd = FinancialData()
	#上市股票地址
	code = input('请输入股票代码:')

	name,table_name_list,table_date_list,url_list = fd.get_informations(fd.cwnb + code + '.html')
	print('\n  %s:(%s)财务数据下载中!\n' % (name,code))
	fd.insert_tables(name,table_name_list,table_date_list,url_list)
	print('\n  %s:(%s)财务数据下载完成!' % (name,code))

================================================
FILE: geetest.py
================================================
# -*-coding:utf-8 -*-
import random
import re
import time
# 图片转换
import base64
from urllib.request import urlretrieve

from bs4 import BeautifulSoup

import PIL.Image as image
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

def save_base64img(data_str, save_name):
    """
    将 base64 数据转化为图片保存到指定位置
    :param data_str: base64 数据,不包含类型
    :param save_name: 保存的全路径
    """
    img_data = base64.b64decode(data_str)
    file = open(save_name, 'wb')
    file.write(img_data)
    file.close()


def get_base64_by_canvas(driver, class_name, contain_type):
    """
    将 canvas 标签内容转换为 base64 数据
    :param driver: webdriver 对象
    :param class_name: canvas 标签的类名
    :param contain_type: 返回的数据是否包含类型
    :return: base64 数据
    """
    # 防止图片未加载完就下载一张空图
    bg_img = ''
    while len(bg_img) < 5000:
        getImgJS = 'return document.getElementsByClassName("' + class_name + '")[0].toDataURL("image/png");'
        bg_img = driver.execute_script(getImgJS)
        time.sleep(0.5)
    # print(bg_img)
    if contain_type:
        return bg_img
    else:
        return bg_img[bg_img.find(',') + 1:]


def save_bg(driver, bg_path="bg.png", bg_class="geetest_canvas_bg geetest_absolute"):
    """
    保存包含缺口的背景图
    :param driver: webdriver 对象
    :param bg_path: 保存路径
    :param bg_class: 背景图的 class 属性
    :return: 保存路径
    """
    bg_img_data = get_base64_by_canvas(driver, bg_class, False)
    save_base64img(bg_img_data, bg_path)
    return bg_path


def save_full_bg(driver, full_bg_path="fbg.png", full_bg_class="geetest_canvas_fullbg geetest_fade geetest_absolute"):
    """
    保存完整的的背景图
    :param driver: webdriver 对象
    :param full_bg_path: 保存路径
    :param full_bg_class: 完整背景图的 class 属性
    :return: 保存路径
    """
    bg_img_data = get_base64_by_canvas(driver, full_bg_class, False)
    save_base64img(bg_img_data, full_bg_path)
    return full_bg_path

class Crack():
	def __init__(self,keyword):
		self.url = '*'
		self.browser = webdriver.Chrome('D:\\chromedriver.exe')
		self.wait = WebDriverWait(self.browser, 100)
		self.keyword =
Download .txt
gitextract_u9lbtfzc/

├── 12306.py
├── 2020/
│   ├── README.md
│   ├── api/
│   │   └── api.py
│   ├── bilibili/
│   │   ├── download.py
│   │   └── xml2ass.py
│   ├── dmzj/
│   │   └── cartoon.py
│   ├── taobao/
│   │   └── taobao_login.py
│   ├── xbqg/
│   │   └── xbqg_spider.py
│   └── zycjw/
│       └── video_download.py
├── Netease/
│   ├── Netease.py
│   └── music_list.txt
├── README.md
├── baiduwenku.py
├── baiduwenku_pro_1.py
├── baiwan/
│   ├── app.js
│   ├── baiwan.py
│   ├── file.txt
│   ├── index.html
│   └── question.txt
├── bilibili/
│   ├── README.md
│   ├── bilibili.py
│   └── xml2ass.py
├── bilibili_luckyman/
│   ├── README.md
│   └── bilibili_luckyman.py
├── biqukan.py
├── cartoon/
│   ├── cartoon/
│   │   ├── __init__.py
│   │   ├── items.py
│   │   ├── middlewares.py
│   │   ├── pipelines.py
│   │   ├── settings.py
│   │   └── spiders/
│   │       ├── __init__.py
│   │       └── comic_spider.py
│   └── scrapy.cfg
├── daili.py
├── dingdong/
│   ├── README.md
│   └── jd.py
├── douyin/
│   ├── README.md
│   ├── douyin.py
│   └── fuck-byted-acrawler.js
├── douyin.py
├── douyin_pro.py
├── downloader.py
├── financical.py
├── geetest.py
├── hero.py
├── one_hour_spider/
│   ├── biquge20180731.py
│   ├── biqukan.py
│   ├── unsplash.py
│   ├── unsplash20180731.py
│   └── vidoe_downloader.py
├── shuaia.py
├── video_downloader/
│   ├── MyQR/
│   │   ├── __init__.py
│   │   ├── mylibs/
│   │   │   ├── ECC.py
│   │   │   ├── __init__.py
│   │   │   ├── constant.py
│   │   │   ├── data.py
│   │   │   ├── draw.py
│   │   │   ├── matrix.py
│   │   │   ├── structure.py
│   │   │   └── theqrmodule.py
│   │   ├── myqr.py
│   │   └── terminal.py
│   ├── requirements.txt
│   └── video_downloader.py
└── zhengfang_system_spider/
    ├── README.md
    ├── requirements.txt
    ├── spider.py
    └── zhengfang.txt
Download .txt
SYMBOL INDEX (270 symbols across 41 files)

FILE: 12306.py
  class huoche (line 10) | class huoche(object):
    method __init__ (line 36) | def __init__(self):
    method login (line 40) | def login(self):
    method start (line 52) | def start(self):

FILE: 2020/bilibili/download.py
  function addTasktoXunlei (line 19) | def addTasktoXunlei(down_url):
  function get_download_url (line 31) | def get_download_url(arcurl):

FILE: 2020/bilibili/xml2ass.py
  function SeekZero (line 28) | def SeekZero(function):
  function EOFAsNone (line 38) | def EOFAsNone(function):
  function ProbeCommentFormat (line 49) | def ProbeCommentFormat(f):
  function ReadCommentsNiconico (line 111) | def ReadCommentsNiconico(f, fontsize):
  function ReadCommentsAcfun (line 140) | def ReadCommentsAcfun(f, fontsize):
  function ReadCommentsBilibili (line 159) | def ReadCommentsBilibili(f, fontsize):
  function ReadCommentsTudou (line 179) | def ReadCommentsTudou(f, fontsize):
  function ReadCommentsMioMio (line 193) | def ReadCommentsMioMio(f, fontsize):
  function ReadCommentsSH5V (line 209) | def ReadCommentsSH5V(f, fontsize):
  function WriteCommentBilibiliPositioned (line 239) | def WriteCommentBilibiliPositioned(f, c, width, height, styleid):
  function WriteCommentAcfunPositioned (line 319) | def WriteCommentAcfunPositioned(f, c, width, height, styleid):
  function WriteCommentSH5VPositioned (line 430) | def WriteCommentSH5VPositioned(f, c, width, height, styleid):
  function GetZoomFactor (line 476) | def GetZoomFactor(SourceSize, TargetSize):
  function ConvertFlashRotation (line 506) | def ConvertFlashRotation(rotY, rotZ, X, Y, FOV=math.tan(2*math.pi/9.0)):
  function ProcessComments (line 558) | def ProcessComments(comments, f, width, height, bottomReserved, fontface...
  function TestFreeRows (line 593) | def TestFreeRows(rows, c, row, width, height, bottomReserved, lifetime):
  function FindAlternativeRow (line 623) | def FindAlternativeRow(rows, c, height, bottomReserved):
  function MarkCommentRow (line 633) | def MarkCommentRow(rows, c, row):
  function WriteASSHead (line 641) | def WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid):
  function WriteComment (line 663) | def WriteComment(f, c, row, width, height, bottomReserved, fontsize, lif...
  function ASSEscape (line 683) | def ASSEscape(s):
  function CalculateLength (line 687) | def CalculateLength(s):
  function ConvertTimestamp (line 691) | def ConvertTimestamp(timestamp):
  function ConvertType2 (line 699) | def ConvertType2(row, height, bottomReserved):
  function ConvertToFile (line 703) | def ConvertToFile(filename_or_file, *args, **kwargs):
  function FilterBadChars (line 712) | def FilterBadChars(f):
  class safe_list (line 718) | class safe_list(list):
    method get (line 719) | def get(self, index, default=None):
  function export (line 726) | def export(func):
  function Danmaku2ASS (line 736) | def Danmaku2ASS(input_files, output_file, stage_width, stage_height, res...
  function ReadComments (line 751) | def ReadComments(input_files, font_size=25.0, progress_callback=None):
  function GetCommentProcessor (line 774) | def GetCommentProcessor(input_file):
  function main (line 778) | def main():

FILE: 2020/taobao/taobao_login.py
  class taobao (line 19) | class taobao():
    method __init__ (line 20) | def __init__(self):
    method login (line 28) | def login(self, username, password):
    method get_nickname (line 69) | def get_nickname(self):
    method clear_cart (line 77) | def clear_cart(self):

FILE: 2020/xbqg/xbqg_spider.py
  function get_content (line 13) | def get_content(target):

FILE: 2020/zycjw/video_download.py
  function downVideo (line 55) | def downVideo(url):

FILE: Netease/Netease.py
  class Encrypyed (line 12) | class Encrypyed():
    method __init__ (line 16) | def __init__(self):
    method encrypted_request (line 22) | def encrypted_request(self, text):
    method aes_encrypt (line 30) | def aes_encrypt(self, text, secKey):
    method rsa_encrpt (line 38) | def rsa_encrpt(self, text, pubKey, modulus):
    method create_secret_key (line 43) | def create_secret_key(self, size):
  class Song (line 47) | class Song():
    method __init__ (line 51) | def __init__(self, song_id, song_name, song_num, song_url=None):
  class Crawler (line 57) | class Crawler():
    method __init__ (line 61) | def __init__(self, timeout=60, cookie_path='.'):
    method post_request (line 79) | def post_request(self, url, params):
    method search (line 93) | def search(self, search_content, search_type, limit=9):
    method search_song (line 107) | def search_song(self, song_name, song_num, quiet=True, limit=9):
    method get_song_url (line 128) | def get_song_url(self, song_id, bit_rate=320000):
    method get_song_by_url (line 149) | def get_song_by_url(self, song_url, song_name, song_num, folder):
  class Netease (line 179) | class Netease():
    method __init__ (line 183) | def __init__(self, timeout, folder, quiet, cookie_path):
    method download_song_by_search (line 188) | def download_song_by_search(self, song_name, song_num):
    method download_song_by_id (line 203) | def download_song_by_id(self, song_id, song_name, song_num, folder='.'):

FILE: baiduwenku_pro_1.py
  function fetch_url (line 9) | def fetch_url(url):
  function get_doc_id (line 13) | def get_doc_id(url):
  function parse_type (line 17) | def parse_type(content):
  function parse_title (line 21) | def parse_title(content):
  function parse_doc (line 25) | def parse_doc(content):
  function parse_txt (line 44) | def parse_txt(doc_id):
  function parse_other (line 59) | def parse_other(doc_id):
  function save_file (line 74) | def save_file(filename, content):
  function main (line 84) | def main():

FILE: baiwan/app.js
  function messageGet (line 14) | function messageGet(){

FILE: baiwan/baiwan.py
  class BaiWan (line 17) | class BaiWan():
    method __init__ (line 18) | def __init__(self):
    method get_question (line 25) | def get_question(self):
    method search (line 91) | def search(self, question, alternative_answers):

FILE: bilibili/bilibili.py
  class BiliBili (line 12) | class BiliBili:
    method __init__ (line 13) | def __init__(self, dirname, keyword):
    method video_downloader (line 39) | def video_downloader(self, video_url, video_name):
    method search_video (line 68) | def search_video(self, search_url):
    method get_download_url (line 87) | def get_download_url(self, arcurl):
    method download_xml (line 118) | def download_xml(self, danmu_url, danmu_name):
    method get_danmu (line 136) | def get_danmu(self, oid, filename):
    method search_videos (line 153) | def search_videos(self, keyword, pages):

FILE: bilibili/xml2ass.py
  function SeekZero (line 28) | def SeekZero(function):
  function EOFAsNone (line 38) | def EOFAsNone(function):
  function ProbeCommentFormat (line 49) | def ProbeCommentFormat(f):
  function ReadCommentsNiconico (line 111) | def ReadCommentsNiconico(f, fontsize):
  function ReadCommentsAcfun (line 140) | def ReadCommentsAcfun(f, fontsize):
  function ReadCommentsBilibili (line 159) | def ReadCommentsBilibili(f, fontsize):
  function ReadCommentsTudou (line 179) | def ReadCommentsTudou(f, fontsize):
  function ReadCommentsMioMio (line 193) | def ReadCommentsMioMio(f, fontsize):
  function ReadCommentsSH5V (line 209) | def ReadCommentsSH5V(f, fontsize):
  function WriteCommentBilibiliPositioned (line 239) | def WriteCommentBilibiliPositioned(f, c, width, height, styleid):
  function WriteCommentAcfunPositioned (line 319) | def WriteCommentAcfunPositioned(f, c, width, height, styleid):
  function WriteCommentSH5VPositioned (line 430) | def WriteCommentSH5VPositioned(f, c, width, height, styleid):
  function GetZoomFactor (line 476) | def GetZoomFactor(SourceSize, TargetSize):
  function ConvertFlashRotation (line 506) | def ConvertFlashRotation(rotY, rotZ, X, Y, FOV=math.tan(2*math.pi/9.0)):
  function ProcessComments (line 558) | def ProcessComments(comments, f, width, height, bottomReserved, fontface...
  function TestFreeRows (line 593) | def TestFreeRows(rows, c, row, width, height, bottomReserved, lifetime):
  function FindAlternativeRow (line 623) | def FindAlternativeRow(rows, c, height, bottomReserved):
  function MarkCommentRow (line 633) | def MarkCommentRow(rows, c, row):
  function WriteASSHead (line 641) | def WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid):
  function WriteComment (line 663) | def WriteComment(f, c, row, width, height, bottomReserved, fontsize, lif...
  function ASSEscape (line 683) | def ASSEscape(s):
  function CalculateLength (line 687) | def CalculateLength(s):
  function ConvertTimestamp (line 691) | def ConvertTimestamp(timestamp):
  function ConvertType2 (line 699) | def ConvertType2(row, height, bottomReserved):
  function ConvertToFile (line 703) | def ConvertToFile(filename_or_file, *args, **kwargs):
  function FilterBadChars (line 712) | def FilterBadChars(f):
  class safe_list (line 718) | class safe_list(list):
    method get (line 719) | def get(self, index, default=None):
  function export (line 726) | def export(func):
  function Danmaku2ASS (line 736) | def Danmaku2ASS(input_files, output_file, stage_width, stage_height, res...
  function ReadComments (line 751) | def ReadComments(input_files, font_size=25.0, progress_callback=None):
  function GetCommentProcessor (line 774) | def GetCommentProcessor(input_file):
  function main (line 778) | def main():

FILE: bilibili_luckyman/bilibili_luckyman.py
  function get_dynamic_id (line 8) | def get_dynamic_id(url):
  function get_data (line 12) | def get_data(detail_url, params):
  function get_uses (line 20) | def get_uses(dynamic_id):
  function get_lucky_man (line 47) | def get_lucky_man(num, lucky_num):
  function get_local_time (line 54) | def get_local_time():

FILE: biqukan.py
  class download (line 23) | class download(object):
    method __init__ (line 24) | def __init__(self, target):
    method get_download_url (line 42) | def get_download_url(self):
    method Downloader (line 82) | def Downloader(self, url):
    method Writer (line 105) | def Writer(self, name, path, text):

FILE: cartoon/cartoon/items.py
  class ComicItem (line 10) | class ComicItem(scrapy.Item):

FILE: cartoon/cartoon/middlewares.py
  class CartoonSpiderMiddleware (line 11) | class CartoonSpiderMiddleware(object):
    method from_crawler (line 17) | def from_crawler(cls, crawler):
    method process_spider_input (line 23) | def process_spider_input(response, spider):
    method process_spider_output (line 30) | def process_spider_output(response, result, spider):
    method process_spider_exception (line 38) | def process_spider_exception(response, exception, spider):
    method process_start_requests (line 46) | def process_start_requests(start_requests, spider):
    method spider_opened (line 55) | def spider_opened(self, spider):

FILE: cartoon/cartoon/pipelines.py
  class ComicImgDownloadPipeline (line 13) | class ComicImgDownloadPipeline(object):
    method process_item (line 15) | def process_item(self, item, spider):

FILE: cartoon/cartoon/spiders/comic_spider.py
  class ComicSpider (line 8) | class ComicSpider(scrapy.Spider):
    method __init__ (line 11) | def __init__(self):
    method start_requests (line 22) | def start_requests(self):
    method parse1 (line 26) | def parse1(self, response):
    method parse2 (line 45) | def parse2(self, response):
    method parse3 (line 69) | def parse3(self, response):

FILE: daili.py
  function get_proxys (line 19) | def get_proxys(page = 1):
  function check_ip (line 66) | def check_ip(ip, lose_time, waste_time):
  function initpattern (line 107) | def initpattern():

FILE: dingdong/jd.py
  function search_goods (line 17) | def search_goods(keyword, pages):
  function goods_images (line 102) | def goods_images(goods_url):
  function download_image (line 158) | def download_image(path, image_url):
  function run (line 190) | def run(path, keyword, num):

FILE: douyin.py
  class DouYin (line 6) | class DouYin(object):
    method __init__ (line 7) | def __init__(self):
    method get_video_urls (line 14) | def get_video_urls(self, user_id):
    method get_download_url (line 50) | def get_download_url(self, video_url):
    method video_downloader (line 66) | def video_downloader(self, video_url, video_name):
    method run (line 92) | def run(self):
    method hello (line 119) | def hello(self):

FILE: douyin/douyin.py
  class DouYin (line 6) | class DouYin(object):
    method __init__ (line 7) | def __init__(self, width = 500, height = 300):
    method get_video_urls (line 23) | def get_video_urls(self, user_id, type_flag='f'):
    method get_download_url (line 90) | def get_download_url(self, video_url, watermark_flag):
    method video_downloader (line 107) | def video_downloader(self, video_url, video_name, watermark_flag=False):
    method run (line 134) | def run(self):
    method hello (line 179) | def hello(self):

FILE: douyin/fuck-byted-acrawler.js
  function generateSignature (line 2) | function generateSignature(userId) {

FILE: douyin_pro.py
  class DouYin (line 8) | class DouYin(object):
    method __init__ (line 9) | def __init__(self, width = 500, height = 300):
    method get_video_urls (line 18) | def get_video_urls(self, user_id):
    method get_download_url (line 54) | def get_download_url(self, video_url):
    method video_downloader (line 70) | def video_downloader(self, video_url, video_name, watermark_flag=True):
    method remove_watermark (line 101) | def remove_watermark(self, video_url):
    method run (line 116) | def run(self):
    method hello (line 143) | def hello(self):

FILE: downloader.py
  class ProgressBar (line 5) | class ProgressBar(object):
    method __init__ (line 6) | def __init__(self, title, count=0.0, run_status=None, fin_status=None,...
    method __get_info (line 18) | def __get_info(self):
    method refresh (line 23) | def refresh(self, count = 1, status = None):

FILE: financical.py
  class FinancialData (line 21) | class FinancialData():
    method __init__ (line 23) | def __init__(self):
    method get_informations (line 78) | def get_informations(self, url):
    method insert_tables (line 126) | def insert_tables(self, name, table_name_list,table_date_list, url_list):

FILE: geetest.py
  function save_base64img (line 18) | def save_base64img(data_str, save_name):
  function get_base64_by_canvas (line 30) | def get_base64_by_canvas(driver, class_name, contain_type):
  function save_bg (line 51) | def save_bg(driver, bg_path="bg.png", bg_class="geetest_canvas_bg geetes...
  function save_full_bg (line 64) | def save_full_bg(driver, full_bg_path="fbg.png", full_bg_class="geetest_...
  class Crack (line 76) | class Crack():
    method __init__ (line 77) | def __init__(self,keyword):
    method open (line 84) | def open(self):
    method get_images (line 94) | def get_images(self, bg_filename = 'bg.jpg', fullbg_filename = 'fullbg...
    method get_merge_image (line 126) | def get_merge_image(self, filename, location_list):
    method get_merge_image (line 159) | def get_merge_image(self, filename, location_list):
    method is_pixel_equal (line 192) | def is_pixel_equal(self, img1, img2, x, y):
    method get_gap (line 210) | def get_gap(self, img1, img2):
    method get_track (line 225) | def get_track(self, distance):
    method get_slider (line 261) | def get_slider(self):
    method move_to_gap (line 274) | def move_to_gap(self, slider, track):
    method crack (line 289) | def crack(self):

FILE: hero.py
  function hero_imgs_download (line 21) | def hero_imgs_download(url, header):
  function hero_list (line 49) | def hero_list(url, header):
  function seek_weapon (line 78) | def seek_weapon(equip_id, weapon_info):
  function hero_info (line 102) | def hero_info(url, header, weapon_info):
  function hero_weapon (line 135) | def hero_weapon(url, header):

FILE: one_hour_spider/biqukan.py
  class downloader (line 14) | class downloader(object):
    method __init__ (line 16) | def __init__(self):
    method get_download_url (line 32) | def get_download_url(self):
    method get_contents (line 53) | def get_contents(self, target):
    method writer (line 72) | def writer(self, name, path, text):

FILE: one_hour_spider/unsplash.py
  class get_photos (line 5) | class get_photos(object):
    method __init__ (line 7) | def __init__(self):
    method get_ids (line 22) | def get_ids(self):
    method download (line 47) | def download(self, photo_id, filename):

FILE: one_hour_spider/vidoe_downloader.py
  class video_downloader (line 6) | class video_downloader():
    method __init__ (line 7) | def __init__(self, url):
    method get_key (line 25) | def get_key(self):
    method get_url (line 39) | def get_url(self):
    method Schedule (line 60) | def Schedule(self, a, b, c):
    method video_download (line 77) | def video_download(self, url, filename):

FILE: video_downloader/MyQR/mylibs/ECC.py
  function encode (line 6) | def encode(ver, ecl, data_codewords):
  function get_ecc (line 13) | def get_ecc(dc, ecc_num):
  function divide (line 20) | def divide(MP, *GP):
  function XOR (line 33) | def XOR(GP, *MP):

FILE: video_downloader/MyQR/mylibs/data.py
  function encode (line 6) | def encode(ver, ecl, str):
  function analyse (line 47) | def analyse(ver, ecl, str):
  function numeric_encoding (line 64) | def numeric_encoding(str):
  function alphanumeric_encoding (line 77) | def alphanumeric_encoding(str):
  function byte_encoding (line 91) | def byte_encoding(str):
  function kanji_encoding (line 99) | def kanji_encoding(str):
  function get_cci (line 103) | def get_cci(ver, mode, str):

FILE: video_downloader/MyQR/mylibs/draw.py
  function draw_qrcode (line 6) | def draw_qrcode(abspath, qrmatrix):
  function draw_a_black_unit (line 22) | def draw_a_black_unit(p, x, y, ul):

FILE: video_downloader/MyQR/mylibs/matrix.py
  function get_qrmatrix (line 5) | def get_qrmatrix(ver, ecl, bits):
  function add_finder_and_separator (line 35) | def add_finder_and_separator(m):
  function add_alignment (line 47) | def add_alignment(ver, m):
  function add_an_alignment (line 55) | def add_an_alignment(row, column, m):
  function add_timing (line 61) | def add_timing(m):
  function add_dark_and_reserving (line 65) | def add_dark_and_reserving(ver, m):
  function place_bits (line 76) | def place_bits(bits, m):
  function mask (line 89) | def mask(mm, m):
  function get_mask_patterns (line 100) | def get_mask_patterns(mm):
  function compute_score (line 133) | def compute_score(m):
  function add_format_and_version_string (line 182) | def add_format_and_version_string(ver, ecl, mask_num, m):

FILE: video_downloader/MyQR/mylibs/structure.py
  function structure_final_bits (line 5) | def structure_final_bits(ver, ecl, data_codewords, ecc):
  function interleave_dc (line 13) | def interleave_dc(ver, ecl, data_codewords):
  function interleave_ecc (line 23) | def interleave_ecc(ecc):

FILE: video_downloader/MyQR/mylibs/theqrmodule.py
  function get_qrcode (line 8) | def get_qrcode(ver, ecl, str, save_place):

FILE: video_downloader/MyQR/myqr.py
  function run (line 22) | def run(words, version=1, level='H', picture=None, colorized=False, cont...

FILE: video_downloader/MyQR/terminal.py
  function main (line 7) | def main():

FILE: video_downloader/video_downloader.py
  class APP (line 31) | class APP:
    method __init__ (line 32) | def __init__(self, width = 500, height = 300):
    method loads_jsonp (line 116) | def loads_jsonp(self, _jsonp):
    method video_play (line 135) | def video_play(self):
    method download_wmxz (line 206) | def download_wmxz(self):
    method QR_Code (line 255) | def QR_Code(self):
    method center (line 294) | def center(self):
    method loop (line 313) | def loop(self):

FILE: zhengfang_system_spider/spider.py
  class Who (line 18) | class Who:
    method __init__ (line 19) | def __init__(self, user, pswd):
  class Tool (line 24) | class Tool:
    method replace (line 31) | def replace(self, x):
  function Getgrade (line 40) | def Getgrade(response):
  function Getgradetestresults (line 61) | def Getgradetestresults(trs):
  class University (line 77) | class University:
    method __init__ (line 78) | def __init__(self, student, baseurl):
    method Login (line 85) | def Login(self):
    method GetClass (line 128) | def GetClass(self):
    method GetGrade (line 157) | def GetGrade(self):
    method GradeTestResults (line 197) | def GradeTestResults(self):
Condensed preview — 68 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (302K chars).
[
  {
    "path": "12306.py",
    "chars": 3043,
    "preview": "# -*- coding: utf-8 -*-\n\"\"\"\n@author: liuyw\n\"\"\"\nfrom splinter.browser import Browser\nfrom time import sleep\nimport traceb"
  },
  {
    "path": "2020/README.md",
    "chars": 3245,
    "preview": "# Python Spider 2020\n\n由于这个项目时间太长了,陆陆续续,很多实战示例也早已失效。\n\n网络爬虫,是一门比较通用的基础技术,各个领域都会有所涉及,比如我做视觉算法的,也需要用到网络爬虫,例如调用 API 接口清洗数据等,这"
  },
  {
    "path": "2020/api/api.py",
    "chars": 1375,
    "preview": "import requests\nimport base64\nimport json\nimport cv2\nimport numpy as np\nimport matplotlib.pyplot as plt\n%matplotlib inli"
  },
  {
    "path": "2020/bilibili/download.py",
    "chars": 4273,
    "preview": "# -*-coding:utf-8 -*-\n# Website: https://cuijiahua.com\n# Author: Jack Cui\n# Date: 2020.07.22\nimport requests\nimport json"
  },
  {
    "path": "2020/bilibili/xml2ass.py",
    "chars": 37423,
    "preview": "# The original author of this program, Danmaku2ASS, is StarBrilliant.\n# This file is released under General Public Licen"
  },
  {
    "path": "2020/dmzj/cartoon.py",
    "chars": 2440,
    "preview": "import requests\nimport os\nimport re\nfrom bs4 import BeautifulSoup\nfrom contextlib import closing\nfrom tqdm import tqdm\ni"
  },
  {
    "path": "2020/taobao/taobao_login.py",
    "chars": 3680,
    "preview": "from selenium import webdriver\nimport logging\nimport time\nfrom selenium.common.exceptions import NoSuchElementException,"
  },
  {
    "path": "2020/xbqg/xbqg_spider.py",
    "chars": 1139,
    "preview": "import requests\nimport time\nfrom tqdm import tqdm\nfrom bs4 import BeautifulSoup\n\n\"\"\"\n    Author:\n        Jack Cui\n    We"
  },
  {
    "path": "2020/zycjw/video_download.py",
    "chars": 1817,
    "preview": "import os\nimport ffmpy3\nimport requests\nfrom bs4 import BeautifulSoup\nfrom multiprocessing.dummy import Pool as ThreadPo"
  },
  {
    "path": "Netease/Netease.py",
    "chars": 7385,
    "preview": "# -*- coding:utf-8 -*-\r\nimport requests, hashlib, sys, click, re, base64, binascii, json, os\r\nfrom Crypto.Cipher import "
  },
  {
    "path": "Netease/music_list.txt",
    "chars": 78,
    "preview": "風見鶏\n外婆的话【不才】\nWe Don't Talk Anymore\n【电吉他】《青鸟》\n小棋童\n千本桜(古筝版)\n妄为\n借我\n你到底有没有爱过我\n七月上\n"
  },
  {
    "path": "README.md",
    "chars": 7289,
    "preview": "# 注:2020年最新连载教程请移步:[Python Spider 2020](https://github.com/Jack-Cherish/python-spider/tree/master/2020 \"Python Spider 20"
  },
  {
    "path": "baiduwenku.py",
    "chars": 1707,
    "preview": "# -*- coding:UTF-8 -*-\nfrom selenium import webdriver\nfrom bs4 import BeautifulSoup\nimport re\nimport time\n\nif __name__ ="
  },
  {
    "path": "baiduwenku_pro_1.py",
    "chars": 3209,
    "preview": "import requests\nimport re\nimport json\nimport os\n\nsession = requests.session()\n\n\ndef fetch_url(url):\n    return session.g"
  },
  {
    "path": "baiwan/app.js",
    "chars": 1402,
    "preview": "var http = require('http');\nvar fs = require('fs');\nvar schedule = require(\"node-schedule\"); \nvar message = {};\nvar coun"
  },
  {
    "path": "baiwan/baiwan.py",
    "chars": 4547,
    "preview": "# -*-coding:utf-8 -*-\nimport requests\nfrom lxml import etree\nfrom bs4 import BeautifulSoup\nimport urllib\nimport time, re"
  },
  {
    "path": "baiwan/file.txt",
    "chars": 560,
    "preview": "⣺Ǽ¼\n**************************************************\nѡ723  81  101  \n************************************************"
  },
  {
    "path": "baiwan/index.html",
    "chars": 5648,
    "preview": "<!DOCTYPE html>\n<html>\n  <head>\n    <meta charset=\"utf-8\" />\n    <meta http-equiv=\"refresh\" content=\"2\">\n    <title>Jack"
  },
  {
    "path": "baiwan/question.txt",
    "chars": 2,
    "preview": "Ǽ¼"
  },
  {
    "path": "bilibili/README.md",
    "chars": 336,
    "preview": "## 功能\n\n下载B站视频和弹幕,将xml原生弹幕转换为ass弹幕文件,支持plotplayer等播放器的弹幕播放。\n\n## 作者\n\n* Website: [http://cuijiahua.com](http://cuijiahua.co"
  },
  {
    "path": "bilibili/bilibili.py",
    "chars": 6620,
    "preview": "# -*-coding:utf-8 -*-\n# Website: http://cuijiahua.com\n# Author: Jack Cui\n# Date: 2018.6.9\n\nimport requests, json, re, sy"
  },
  {
    "path": "bilibili/xml2ass.py",
    "chars": 37423,
    "preview": "# The original author of this program, Danmaku2ASS, is StarBrilliant.\n# This file is released under General Public Licen"
  },
  {
    "path": "bilibili_luckyman/README.md",
    "chars": 84,
    "preview": "## 说明\n\nB 站 30 万粉丝抽奖,自己写了一个转发抽奖助手。\n\n上次活动:\n\nhttps://t.bilibili.com/675922191916728342\n"
  },
  {
    "path": "bilibili_luckyman/bilibili_luckyman.py",
    "chars": 2546,
    "preview": "# -*- coding:utf-8 -*-\nimport requests\nimport json\nimport re\nimport random\nimport time\n\ndef get_dynamic_id(url):\n    dyn"
  },
  {
    "path": "biqukan.py",
    "chars": 3596,
    "preview": "# -*- coding:UTF-8 -*-\nfrom urllib import request\nfrom bs4 import BeautifulSoup\nimport collections\nimport re\nimport os\ni"
  },
  {
    "path": "cartoon/cartoon/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "cartoon/cartoon/items.py",
    "chars": 306,
    "preview": "# -*- coding: utf-8 -*-\n\n# Define here the models for your scraped items\n#\n# See documentation in:\n# http://doc.scrapy.o"
  },
  {
    "path": "cartoon/cartoon/middlewares.py",
    "chars": 1879,
    "preview": "# -*- coding: utf-8 -*-\n\n# Define here the models for your spider middleware\n#\n# See documentation in:\n# http://doc.scra"
  },
  {
    "path": "cartoon/cartoon/pipelines.py",
    "chars": 1224,
    "preview": "# -*- coding: utf-8 -*-\n\n# Define your item pipelines here\n#\n# Don't forget to add your pipeline to the ITEM_PIPELINES s"
  },
  {
    "path": "cartoon/cartoon/settings.py",
    "chars": 3306,
    "preview": "# -*- coding: utf-8 -*-\n\n# Scrapy settings for cartoon project\n#\n# For simplicity, this file contains only settings cons"
  },
  {
    "path": "cartoon/cartoon/spiders/__init__.py",
    "chars": 161,
    "preview": "# This package will contain the spiders of your Scrapy project\n#\n# Please refer to the documentation for information on "
  },
  {
    "path": "cartoon/cartoon/spiders/comic_spider.py",
    "chars": 2403,
    "preview": "# -*- coding: utf-8 -*-\n\nimport re\nimport scrapy\nfrom scrapy import Selector\nfrom cartoon.items import ComicItem\n\nclass "
  },
  {
    "path": "cartoon/scrapy.cfg",
    "chars": 258,
    "preview": "# Automatically created by: scrapy startproject\n#\n# For more information about the [deploy] section see:\n# https://scrap"
  },
  {
    "path": "daili.py",
    "chars": 3329,
    "preview": "# -*- coding:UTF-8 -*-\nfrom bs4 import BeautifulSoup\nfrom selenium import webdriver\nimport subprocess as sp\nfrom lxml im"
  },
  {
    "path": "dingdong/README.md",
    "chars": 309,
    "preview": "## 功能\n\n下载京东商品的晒单图。\n\n## 作者\n\n* Website: [http://cuijiahua.com](http://cuijiahua.com \"悬停显示\")\n* Author: Jack Cui\n* Date: 201"
  },
  {
    "path": "dingdong/jd.py",
    "chars": 6608,
    "preview": "# -*-coding:utf-8 -*-\n# Author:Jack Cui\n# Website:http://cuijiahua.com\n# Date:2018-7-7\nimport os\nimport re\nimport sys\nim"
  },
  {
    "path": "douyin/README.md",
    "chars": 357,
    "preview": "## 功能\n\n下载指定用户的抖音视频。\n\n## 作者\n\n* Author: [Jack Cui](http://cuijiahua.com \"悬停显示\")、[steven7851](https://github.com/steven7851"
  },
  {
    "path": "douyin/douyin.py",
    "chars": 6473,
    "preview": "# -*- coding:utf-8 -*-\nfrom contextlib import closing\nimport requests, json, re, os, sys\nimport urllib\n\nclass DouYin(obj"
  },
  {
    "path": "douyin/fuck-byted-acrawler.js",
    "chars": 7388,
    "preview": "// Referer:https://raw.githubusercontent.com/loadchange/amemv-crawler/master/fuck-byted-acrawler.js\nfunction generateSig"
  },
  {
    "path": "douyin.py",
    "chars": 3898,
    "preview": "# -*- coding:utf-8 -*-\nfrom bs4 import BeautifulSoup\nfrom contextlib import closing\nimport requests, json, time, re, os,"
  },
  {
    "path": "douyin_pro.py",
    "chars": 5006,
    "preview": "# -*- coding:utf-8 -*-\nfrom splinter.driver.webdriver.chrome import Options, Chrome\nfrom splinter.browser import Browser"
  },
  {
    "path": "downloader.py",
    "chars": 2156,
    "preview": "#-*- coding: UTF-8 -*-\nimport requests  \nfrom contextlib import closing\n\nclass ProgressBar(object):  \n    def __init__(s"
  },
  {
    "path": "financical.py",
    "chars": 6263,
    "preview": "#-*- coding:UTF-8 -*-\nimport sys\nimport pymysql\nimport requests\nimport json\nimport re\nfrom bs4 import BeautifulSoup\n\n\"\"\""
  },
  {
    "path": "geetest.py",
    "chars": 8200,
    "preview": "# -*-coding:utf-8 -*-\nimport random\nimport re\nimport time\n# 图片转换\nimport base64\nfrom urllib.request import urlretrieve\n\nf"
  },
  {
    "path": "hero.py",
    "chars": 5734,
    "preview": "#-*- coding: UTF-8 -*-\nfrom urllib.request import urlretrieve\nimport requests\nimport os\n\n\"\"\"\n函数说明:下载《英雄联盟盒子》中的英雄图片\n\nPara"
  },
  {
    "path": "one_hour_spider/biquge20180731.py",
    "chars": 1596,
    "preview": "# -*- coding:utf-8 -*-\nimport requests\nfrom bs4 import BeautifulSoup\nimport os\n\n\"\"\"\n从www.biqubao.com笔趣阁爬取小说,楼主教程中的网址我当时没"
  },
  {
    "path": "one_hour_spider/biqukan.py",
    "chars": 1819,
    "preview": "# -*- coding:UTF-8 -*-\nfrom bs4 import BeautifulSoup\nimport requests, sys\n\n\"\"\"\n类说明:下载《笔趣看》网小说《一念永恒》\nParameters:\n\t无\nRetur"
  },
  {
    "path": "one_hour_spider/unsplash.py",
    "chars": 1731,
    "preview": "# -*- coding:UTF-8 -*-\nimport requests, json, time, sys\nfrom contextlib import closing\n\nclass get_photos(object):\n\n\tdef "
  },
  {
    "path": "one_hour_spider/unsplash20180731.py",
    "chars": 1031,
    "preview": "# -*- coding:utf-8 -*-\nimport requests\nimport json\nimport os\nfrom contextlib import closing\n\n\"\"\"\n从https://unsplash.com/爬"
  },
  {
    "path": "one_hour_spider/vidoe_downloader.py",
    "chars": 2110,
    "preview": "#-*- coding:UTF-8 -*-\nimport requests,re, json, sys\nfrom bs4 import BeautifulSoup\nfrom urllib import request\n\nclass vide"
  },
  {
    "path": "shuaia.py",
    "chars": 1555,
    "preview": "# -*- coding:UTF-8 -*-\nfrom bs4 import BeautifulSoup\nfrom urllib.request import urlretrieve\nimport requests\nimport os\nim"
  },
  {
    "path": "video_downloader/MyQR/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "video_downloader/MyQR/mylibs/ECC.py",
    "chars": 1035,
    "preview": "# -*- coding: utf-8 -*-\n\nfrom MyQR.mylibs.constant import GP_list, ecc_num_per_block, lindex, po2, log\n \n#ecc: Error Cor"
  },
  {
    "path": "video_downloader/MyQR/mylibs/__init__.py",
    "chars": 24,
    "preview": "# -*- coding: utf-8 -*-\n"
  },
  {
    "path": "video_downloader/MyQR/mylibs/constant.py",
    "chars": 18997,
    "preview": "# -*- coding: utf-8 -*-\n\"\"\"\n***** for data.py *******\n\"\"\"\n# character capacities\n# {level1: [version1(mode1,mode2,mode3,"
  },
  {
    "path": "video_downloader/MyQR/mylibs/data.py",
    "chars": 3287,
    "preview": "# -*- coding: utf-8 -*-\n\nfrom MyQR.mylibs.constant import char_cap, required_bytes, mindex, lindex, num_list, alphanum_l"
  },
  {
    "path": "video_downloader/MyQR/mylibs/draw.py",
    "chars": 620,
    "preview": "# -*- coding: utf-8 -*-\n\nfrom PIL import Image\nimport os\n\ndef draw_qrcode(abspath, qrmatrix):\n    unit_len = 3\n    x = y"
  },
  {
    "path": "video_downloader/MyQR/mylibs/matrix.py",
    "chars": 6122,
    "preview": "# -*- coding: utf-8 -*-\n     \nfrom MyQR.mylibs.constant import alig_location, format_info_str, version_info_str, lindex\n"
  },
  {
    "path": "video_downloader/MyQR/mylibs/structure.py",
    "chars": 829,
    "preview": "# -*- coding: utf-8 -*-\n\nfrom MyQR.mylibs.constant import required_remainder_bits, lindex, grouping_list\n\ndef structure_"
  },
  {
    "path": "video_downloader/MyQR/mylibs/theqrmodule.py",
    "chars": 737,
    "preview": "# -*- coding: utf-8 -*-\n\nfrom MyQR.mylibs import data, ECC, structure, matrix, draw\n\n# ver: Version from 1 to 40\n# ecl: "
  },
  {
    "path": "video_downloader/MyQR/myqr.py",
    "chars": 6195,
    "preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nimport os\nfrom MyQR.mylibs import theqrmodule\nfrom PIL import Image\n   \n"
  },
  {
    "path": "video_downloader/MyQR/terminal.py",
    "chars": 2549,
    "preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nfrom MyQR.myqr import run\nimport os\n\ndef main():\n    import argparse\n   "
  },
  {
    "path": "video_downloader/requirements.txt",
    "chars": 36,
    "preview": "imageio\nnumpy\nPillow\nbeautifulsoup4\n"
  },
  {
    "path": "video_downloader/video_downloader.py",
    "chars": 9491,
    "preview": "# -*- coding:utf-8 -*-\nfrom tkinter.filedialog import askdirectory\nfrom MyQR.myqr import run\nfrom urllib import request,"
  },
  {
    "path": "zhengfang_system_spider/README.md",
    "chars": 594,
    "preview": "# ZhengFang_System_Spider\n对正方教务管理系统的个人课表,个人学生成绩,绩点等简单爬取\n\n## 依赖环境\npython 3.6\n### python库\nhttp请求:requests,urllib  \n数据提取:re"
  },
  {
    "path": "zhengfang_system_spider/requirements.txt",
    "chars": 65,
    "preview": "lxml==4.6.3\nrequests==2.20.0\nPillow>=6.2.2\nbeautifulsoup4==4.6.0\n"
  },
  {
    "path": "zhengfang_system_spider/spider.py",
    "chars": 7670,
    "preview": "#!/usr/bin/env python\n#-*- coding: utf-8 -*-\n\n__author__ = 'ZYSzys'\n\nimport requests\nimport re\nimport os\nimport sys\nimpo"
  },
  {
    "path": "zhengfang_system_spider/zhengfang.txt",
    "chars": 6885,
    "preview": "本学期课表:\n1:\t电工电子技术基础AⅡ---周一第1,2节{第2-16周|双周}---郜园园/章云(章云,郜园园)---学10609(实验室)\n2:\t计算机网络A---周一第3,4节{第2-16周|双周}---吴晓平(吴晓平)---学10"
  }
]

About this extraction

This page contains the full source code of the Jack-Cherish/python-spider GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 68 files (274.5 KB), approximately 95.1k tokens, and a symbol index with 270 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!