Repository: Jack-Cherish/python-spider Branch: master Commit: 60b7ed839713 Files: 68 Total size: 274.5 KB Directory structure: gitextract_u9lbtfzc/ ├── 12306.py ├── 2020/ │ ├── README.md │ ├── api/ │ │ └── api.py │ ├── bilibili/ │ │ ├── download.py │ │ └── xml2ass.py │ ├── dmzj/ │ │ └── cartoon.py │ ├── taobao/ │ │ └── taobao_login.py │ ├── xbqg/ │ │ └── xbqg_spider.py │ └── zycjw/ │ └── video_download.py ├── Netease/ │ ├── Netease.py │ └── music_list.txt ├── README.md ├── baiduwenku.py ├── baiduwenku_pro_1.py ├── baiwan/ │ ├── app.js │ ├── baiwan.py │ ├── file.txt │ ├── index.html │ └── question.txt ├── bilibili/ │ ├── README.md │ ├── bilibili.py │ └── xml2ass.py ├── bilibili_luckyman/ │ ├── README.md │ └── bilibili_luckyman.py ├── biqukan.py ├── cartoon/ │ ├── cartoon/ │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders/ │ │ ├── __init__.py │ │ └── comic_spider.py │ └── scrapy.cfg ├── daili.py ├── dingdong/ │ ├── README.md │ └── jd.py ├── douyin/ │ ├── README.md │ ├── douyin.py │ └── fuck-byted-acrawler.js ├── douyin.py ├── douyin_pro.py ├── downloader.py ├── financical.py ├── geetest.py ├── hero.py ├── one_hour_spider/ │ ├── biquge20180731.py │ ├── biqukan.py │ ├── unsplash.py │ ├── unsplash20180731.py │ └── vidoe_downloader.py ├── shuaia.py ├── video_downloader/ │ ├── MyQR/ │ │ ├── __init__.py │ │ ├── mylibs/ │ │ │ ├── ECC.py │ │ │ ├── __init__.py │ │ │ ├── constant.py │ │ │ ├── data.py │ │ │ ├── draw.py │ │ │ ├── matrix.py │ │ │ ├── structure.py │ │ │ └── theqrmodule.py │ │ ├── myqr.py │ │ └── terminal.py │ ├── requirements.txt │ └── video_downloader.py └── zhengfang_system_spider/ ├── README.md ├── requirements.txt ├── spider.py └── zhengfang.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: 12306.py ================================================ # -*- coding: utf-8 -*- """ @author: liuyw """ from splinter.browser import Browser from time import sleep import traceback import time, sys class huoche(object): driver_name = '' executable_path = '' #用户名,密码 username = u"xxx" passwd = u"xxx" # cookies值得自己去找, 下面两个分别是沈阳, 哈尔滨 starts = u"%u6C88%u9633%2CSYT" ends = u"%u54C8%u5C14%u6EE8%2CHBB" # 时间格式2018-01-19 dtime = u"2018-01-19" # 车次,选择第几趟,0则从上之下依次点击 order = 0 ###乘客名 users = [u"xxx",u"xxx"] ##席位 xb = u"二等座" pz = u"成人票" """网址""" ticket_url = "https://kyfw.12306.cn/otn/leftTicket/init" login_url = "https://kyfw.12306.cn/otn/login/init" initmy_url = "https://kyfw.12306.cn/otn/index/initMy12306" buy = "https://kyfw.12306.cn/otn/confirmPassenger/initDc" def __init__(self): self.driver_name = 'chrome' self.executable_path = 'D:/chromedriver' def login(self): self.driver.visit(self.login_url) self.driver.fill("loginUserDTO.user_name", self.username) # sleep(1) self.driver.fill("userDTO.password", self.passwd) print(u"等待验证码,自行输入...") while True: if self.driver.url != self.initmy_url: sleep(1) else: break def start(self): self.driver = Browser(driver_name=self.driver_name,executable_path=self.executable_path) self.driver.driver.set_window_size(1400, 1000) self.login() # sleep(1) self.driver.visit(self.ticket_url) try: print(u"购票页面开始...") # sleep(1) # 加载查询信息 self.driver.cookies.add({"_jc_save_fromStation": self.starts}) self.driver.cookies.add({"_jc_save_toStation": self.ends}) self.driver.cookies.add({"_jc_save_fromDate": self.dtime}) self.driver.reload() count = 0 if self.order != 0: while self.driver.url == self.ticket_url: self.driver.find_by_text(u"查询").click() count += 1 print(u"循环点击查询... 第 %s 次" % count) # sleep(1) try: self.driver.find_by_text(u"预订")[self.order - 1].click() except Exception as e: print(e) print(u"还没开始预订") continue else: while self.driver.url == self.ticket_url: self.driver.find_by_text(u"查询").click() count += 1 print(u"循环点击查询... 第 %s 次" % count) # sleep(0.8) try: for i in self.driver.find_by_text(u"预订"): i.click() sleep(1) except Exception as e: print(e) print(u"还没开始预订 %s" % count) continue print(u"开始预订...") # sleep(3) # self.driver.reload() sleep(1) print(u'开始选择用户...') for user in self.users: self.driver.find_by_text(user).last.click() print(u"提交订单...") sleep(1) self.driver.find_by_text(self.pz).click() self.driver.find_by_id('').select(self.pz) # sleep(1) self.driver.find_by_text(self.xb).click() sleep(1) self.driver.find_by_id('submitOrder_id').click() print(u"开始选座...") self.driver.find_by_id('1D').last.click() self.driver.find_by_id('1F').last.click() sleep(1.5) print(u"确认选座...") self.driver.find_by_id('qr_submit_id').click() except Exception as e: print(e) if __name__ == '__main__': huoche = huoche() huoche.start() ================================================ FILE: 2020/README.md ================================================ # Python Spider 2020 由于这个项目时间太长了,陆陆续续,很多实战示例也早已失效。 网络爬虫,是一门比较通用的基础技术,各个领域都会有所涉及,比如我做视觉算法的,也需要用到网络爬虫,例如调用 API 接口清洗数据等,这本质也都是一个小的爬虫程序。 为了提供各位更好的学习示例,我决定重写这一系列教程,对一些失效的示例,重新找例子,并查缺补漏,完善这一些列教程。 2020年,最新版的 Python3 网络爬虫实战系列教程。 原创文章每周最少两篇,**后续最新文章**会在[【公众号】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)首发,视频[【B站】](https://space.bilibili.com/331507846)首发,大家可以加我[【微信】](https://cuijiahua.com/wp-content/uploads/2020/05/gzh-w.jpg)进**交流群**,技术交流或提意见都可以,欢迎**Star**!
## Python3 网络爬虫教程 2020 | 文章 | 公众号 | 代码 | | :------ | :--------: | :--------: | | Python3 网络爬虫(一):初识网络爬虫之夜探老王家 | [公众号](https://mp.weixin.qq.com/s/1rcq9RQYuAuHFg1w1j8HXg "Python3 网络爬虫(一)") | no | | Python3 网络爬虫(二):下载小说的正确姿势 | [公众号](https://mp.weixin.qq.com/s/5e2_r0QXUISVp9GdDsqbzg "Python3 网络爬虫(二)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/xbqg "Python3 网络爬虫(二)") | | Python3 网络爬虫(三):漫画下载,动态加载、反爬虫这都不叫事!| [公众号](https://mp.weixin.qq.com/s/wyS-OP04K3Vs9arSelRlyA "Python3网络爬虫(三)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/dmzj "Python3 网络爬虫(三)") | | Python3 网络爬虫(四):视频下载,那些事儿!| [公众号](https://mp.weixin.qq.com/s/_geNA6Dwo4kx25X7trJzlg "Python3 网络爬虫(四)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/zycjw "Python3 网络爬虫(四)") | | Python3 网络爬虫(五):老板,需要特殊服务吗?| [公众号](https://mp.weixin.qq.com/s/PPTSnIHV71b-wB3oRiYnIA "Python3 网络爬虫(五)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/api "Python3 网络爬虫(五)") | | Python3 网络爬虫(六):618,爱他/她,就清空他/她的购物车!| [公众号](https://mp.weixin.qq.com/s/lXXDfzyLVrf3f-aqJN1C3A "Python3 网络爬虫(六)") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/taobao "Python3 网络爬虫(六)") | | 宝藏B站UP主,视频弹幕尽收囊中!| [公众号](https://mp.weixin.qq.com/s/aWratg1j9RBAjIghoY66yQ "宝藏B站UP主,视频弹幕尽收囊中!") | [Code](https://github.com/Jack-Cherish/python-spider/tree/master/2020/bilibili "宝藏B站UP主,视频弹幕尽收囊中!") | 更多精彩,敬请期待!
================================================
FILE: 2020/api/api.py
================================================
import requests
import base64
import json
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
beautify_url = "https://api-cn.faceplusplus.com/facepp/v2/beautify"
# 你创建的应用的 API Key 和 API Secret(也叫 Secret Key)
AK = ''
SK = ''
# 可选参数,不填写,默认50
# 美白程度 0 - 100
whitening = 80
# 磨皮程度 0 - 100
smoothing = 80
# 瘦脸程度 0 - 100
thinface = 20
# 小脸程度 0 - 100
shrink_face = 50
# 大眼程度 0 - 100
enlarge_eye = 50
# 去眉毛程度 0 - 100
remove_eyebrow = 50
# 滤镜名称,不填写,默认无滤镜
filter_type = ''
# 二进制方式打开图片
img_name = 'test_1.png'
f = open(img_name, 'rb')
# 转 base64
img_base64 = base64.b64encode(f.read())
# 使用 whitening、smoothing、thinface 三个可选参数,其他用默认值
data = {
'api_key': AK,
'api_secret': SK,
'image_base64': img_base64,
'whitening': whitening,
'smoothing': smoothing,
'thinface': thinface,
}
r = requests.post(url=beautify_url, data=data)
html = json.loads(r.text)
# 解析base64图片
base64_data = html['result']
imgData = base64.b64decode(base64_data)
nparr = np.frombuffer(imgData, np.uint8)
img_res = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
img_res_BGR = cv2.cvtColor(img_res, cv2.COLOR_RGB2BGR)
# 原始图片
img = cv2.imread(img_name)
img_BGR = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# 显示图片
fig, axs = plt.subplots(nrows=1, ncols=2, sharex=False, sharey=False, figsize=(10,10))
axs[0].imshow(img_BGR)
axs[1].imshow(img_res_BGR)
plt.show()
================================================
FILE: 2020/bilibili/download.py
================================================
# -*-coding:utf-8 -*-
# Website: https://cuijiahua.com
# Author: Jack Cui
# Date: 2020.07.22
import requests
import json
import re
import json
import math
import xml2ass
import time
from contextlib import closing
from bs4 import BeautifulSoup
import os
from win32com.client import Dispatch
def addTasktoXunlei(down_url):
flag = False
o = Dispatch('ThunderAgent.Agent64.1')
try:
o.AddTask(down_url, "", "", "", "", -1, 0, 5)
o.CommitTasks()
flag = True
except Exception:
print(Exception.message)
print(" AddTask is fail!")
return flag
def get_download_url(arcurl):
# 微信搜索 JackCui-AI 关注公众号,后台回复「B 站」获取视频解析地址
jiexi_url = 'xxx'
payload = {'url': arcurl}
jiexi_req = requests.get(jiexi_url, params=payload)
jiexi_bf = BeautifulSoup(jiexi_req.text)
jiexi_dn_url = jiexi_bf.iframe.get('src')
dn_req = requests.get(jiexi_dn_url)
dn_bf = BeautifulSoup(dn_req.text)
video_script = dn_bf.find('script',src = None)
DPlayer = str(video_script.string)
download_url = re.findall('\'(http[s]?:(?:[a-zA-Z]|[0-9]|[$-_@.&~+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)\'', DPlayer)[0]
download_url = download_url.replace('\\', '')
return download_url
space_url = 'https://space.bilibili.com/280793434'
search_url = 'https://api.bilibili.com/x/space/arc/search'
mid = space_url.split('/')[-1]
sess = requests.Session()
search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'application/json, text/plain, */*'}
# 获取视频个数
ps = 1
pn = 1
search_params = {'mid': mid,
'ps': ps,
'tid': 0,
'pn': pn}
req = sess.get(url=search_url, headers=search_headers, params=search_params, verify=False)
info = json.loads(req.text)
video_count = info['data']['page']['count']
ps = 10
page = math.ceil(video_count/ps)
videos_list = []
for pn in range(1, page+1):
search_params = {'mid': mid,
'ps': ps,
'tid': 0,
'pn': pn}
req = sess.get(url=search_url, headers=search_headers, params=search_params, verify=False)
info = json.loads(req.text)
vlist = info['data']['list']['vlist']
for video in vlist:
title = video['title']
bvid = video['bvid']
vurl = 'https://www.bilibili.com/video/' + bvid
videos_list.append([title, vurl])
print('共 %d 个视频' % len(videos_list))
all_video = {}
# 下载前 10 个视频
for video in videos_list[:10]:
download_url = get_download_url(video[1])
print(video[0] + ':' + download_url)
# 记录视频名字
xunlei_video_name = download_url.split('?')[0].split('/')[-1]
filename = video[0]
for c in u'´☆\n<':
return 'Bilibili' # Komica, with the same file format as Bilibili
elif tmp == 'xml version="1.0" encoding="UTF-8"?>\n<':
return 'MioMio'
elif tmp == 'p':
return 'Niconico' # Himawari Douga, with the same file format as Niconico Douga
#
# ReadComments**** protocol
#
# Input:
# f: Input file
# fontsize: Default font size
#
# Output:
# yield a tuple:
# (timeline, timestamp, no, comment, pos, color, size, height, width)
# timeline: The position when the comment is replayed
# timestamp: The UNIX timestamp when the comment is submitted
# no: A sequence of 1, 2, 3, ..., used for sorting
# comment: The content of the comment
# pos: 0 for regular moving comment,
# 1 for bottom centered comment,
# 2 for top centered comment,
# 3 for reversed moving comment
# color: Font color represented in 0xRRGGBB,
# e.g. 0xffffff for white
# size: Font size
# height: The estimated height in pixels
# i.e. (comment.count('\n')+1)*size
# width: The estimated width in pixels
# i.e. CalculateLength(comment)*size
#
# After implementing ReadComments****, make sure to update ProbeCommentFormat
# and CommentFormatMap.
#
def ReadCommentsNiconico(f, fontsize):
NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffcc00, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000, 'niconicowhite': 0xcccc99, 'white2': 0xcccc99, 'truered': 0xcc0033, 'red2': 0xcc0033, 'passionorange': 0xff6600, 'orange2': 0xff6600, 'madyellow': 0x999900, 'yellow2': 0x999900, 'elementalgreen': 0x00cc66, 'green2': 0x00cc66, 'marineblue': 0x33ffcc, 'blue2': 0x33ffcc, 'nobleviolet': 0x6633cc, 'purple2': 0x6633cc}
dom = xml.dom.minidom.parse(f)
comment_element = dom.getElementsByTagName('chat')
for comment in comment_element:
try:
c = str(comment.childNodes[0].wholeText)
if c.startswith('/'):
continue # ignore advanced comments
pos = 0
color = 0xffffff
size = fontsize
for mailstyle in str(comment.getAttribute('mail')).split():
if mailstyle == 'ue':
pos = 1
elif mailstyle == 'shita':
pos = 2
elif mailstyle == 'big':
size = fontsize*1.44
elif mailstyle == 'small':
size = fontsize*0.64
elif mailstyle in NiconicoColorMap:
color = NiconicoColorMap[mailstyle]
yield (max(int(comment.getAttribute('vpos')), 0)*0.01, int(comment.getAttribute('date')), int(comment.getAttribute('no')), c, pos, color, size, (c.count('\n')+1)*size, CalculateLength(c)*size)
except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
logging.warning(_('Invalid comment: %s') % comment.toxml())
continue
def ReadCommentsAcfun(f, fontsize):
comment_element = json.load(f)
for i, comment in enumerate(comment_element):
try:
p = str(comment['c']).split(',')
assert len(p) >= 6
assert p[2] in ('1', '2', '4', '5', '7')
size = int(p[3])*fontsize/25.0
if p[2] != '7':
c = str(comment['m']).replace('\\r', '\n').replace('\r', '\n')
yield (float(p[0]), int(p[5]), i, c, {'1': 0, '2': 0, '4': 2, '5': 1}[p[2]], int(p[1]), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
else:
c = dict(json.loads(comment['m']))
yield (float(p[0]), int(p[5]), i, c, 'acfunpos', int(p[1]), size, 0, 0)
except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
logging.warning(_('Invalid comment: %r') % comment)
continue
def ReadCommentsBilibili(f, fontsize):
dom = xml.dom.minidom.parse(f)
comment_element = dom.getElementsByTagName('d')
for i, comment in enumerate(comment_element):
try:
p = str(comment.getAttribute('p')).split(',')
assert len(p) >= 5
assert p[1] in ('1', '4', '5', '6', '7')
if p[1] != '7':
c = str(comment.childNodes[0].wholeText).replace('/n', '\n')
size = int(p[2])*fontsize/25.0
yield (float(p[0]), int(p[4]), i, c, {'1': 0, '4': 2, '5': 1, '6': 3}[p[1]], int(p[3]), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
else: # positioned comment
c = str(comment.childNodes[0].wholeText)
yield (float(p[0]), int(p[4]), i, c, 'bilipos', int(p[3]), int(p[2]), 0, 0)
except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
logging.warning(_('Invalid comment: %s') % comment.toxml())
continue
def ReadCommentsTudou(f, fontsize):
comment_element = json.load(f)
for i, comment in enumerate(comment_element['comment_list']):
try:
assert comment['pos'] in (3, 4, 6)
c = str(comment['data'])
assert comment['size'] in (0, 1, 2)
size = {0: 0.64, 1: 1, 2: 1.44}[comment['size']]*fontsize
yield (int(comment['replay_time']*0.001), int(comment['commit_time']), i, c, {3: 0, 4: 2, 6: 1}[comment['pos']], int(comment['color']), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
logging.warning(_('Invalid comment: %r') % comment)
continue
def ReadCommentsMioMio(f, fontsize):
NiconicoColorMap = {'red': 0xff0000, 'pink': 0xff8080, 'orange': 0xffc000, 'yellow': 0xffff00, 'green': 0x00ff00, 'cyan': 0x00ffff, 'blue': 0x0000ff, 'purple': 0xc000ff, 'black': 0x000000}
dom = xml.dom.minidom.parse(f)
comment_element = dom.getElementsByTagName('data')
for i, comment in enumerate(comment_element):
try:
message = comment.getElementsByTagName('message')[0]
c = str(message.childNodes[0].wholeText)
pos = 0
size = int(message.getAttribute('fontsize'))*fontsize/25.0
yield (float(comment.getElementsByTagName('playTime')[0].childNodes[0].wholeText), int(calendar.timegm(time.strptime(comment.getElementsByTagName('times')[0].childNodes[0].wholeText, '%Y-%m-%d %H:%M:%S')))-28800, i, c, {'1': 0, '4': 2, '5': 1}[message.getAttribute('mode')], int(message.getAttribute('color')), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
logging.warning(_('Invalid comment: %s') % comment.toxml())
continue
def ReadCommentsSH5V(f, fontsize):
comment_element = json.load(f)
for i, comment in enumerate(comment_element["root"]["bgs"]):
try:
c_at = str(comment['at'])
c_type = str(comment['type'])
c_date = str(comment['timestamp'])
c_color = str(comment['color'])
c = str(comment['text'])
size = fontsize
if c_type != '7':
yield (float(c_at), int(c_date), i, c, {'0': 0, '1': 0, '4': 2, '5': 1}[c_type], int(c_color[1:], 16), size, (c.count('\n')+1)*size, CalculateLength(c)*size)
else:
c_x = float(comment['x'])
c_y = float(comment['y'])
size = int(comment['size'])
dur = int(comment['dur'])
data1 = float(comment['data1'])
data2 = float(comment['data2'])
data3 = int(comment['data3'])
data4 = int(comment['data4'])
yield (float(c_at), int(c_date), i, c, 'sH5Vpos', int(c_color[1:], 16), size, 0, 0, c_x, c_y, dur, data1, data2, data3, data4)
except (AssertionError, AttributeError, IndexError, TypeError, ValueError):
logging.warning(_('Invalid comment: %r') % comment)
continue
CommentFormatMap = {None: None, 'Niconico': ReadCommentsNiconico, 'Acfun': ReadCommentsAcfun, 'Bilibili': ReadCommentsBilibili, 'Tudou': ReadCommentsTudou, 'MioMio': ReadCommentsMioMio, 'sH5V': ReadCommentsSH5V}
def WriteCommentBilibiliPositioned(f, c, width, height, styleid):
#BiliPlayerSize = (512, 384) # Bilibili player version 2010
#BiliPlayerSize = (540, 384) # Bilibili player version 2012
BiliPlayerSize = (672, 438) # Bilibili player version 2014
ZoomFactor = GetZoomFactor(BiliPlayerSize, (width, height))
def GetPosition(InputPos, isHeight):
isHeight = int(isHeight) # True -> 1
if isinstance(InputPos, int):
return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
elif isinstance(InputPos, float):
if InputPos > 1:
return ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
else:
return BiliPlayerSize[isHeight]*ZoomFactor[0]*InputPos+ZoomFactor[isHeight+1]
else:
try:
InputPos = int(InputPos)
except ValueError:
InputPos = float(InputPos)
return GetPosition(InputPos, isHeight)
try:
comment_args = safe_list(json.loads(c[3]))
text = ASSEscape(str(comment_args[4]).replace('/n', '\n'))
from_x = comment_args.get(0, 0)
from_y = comment_args.get(1, 0)
to_x = comment_args.get(7, from_x)
to_y = comment_args.get(8, from_y)
from_x = round(GetPosition(from_x, False))
from_y = round(GetPosition(from_y, True))
to_x = round(GetPosition(to_x, False))
to_y = round(GetPosition(to_y, True))
alpha = safe_list(str(comment_args.get(2, '1')).split('-'))
from_alpha = float(alpha.get(0, 1))
to_alpha = float(alpha.get(1, from_alpha))
from_alpha = 255-round(from_alpha*255)
to_alpha = 255-round(to_alpha*255)
rotate_z = int(comment_args.get(5, 0))
rotate_y = int(comment_args.get(6, 0))
lifetime = float(comment_args.get(3, 4500))
duration = int(comment_args.get(9, lifetime*1000))
delay = int(comment_args.get(10, 0))
fontface = comment_args.get(12)
isborder = comment_args.get(11, 'true')
styles = []
if (from_x, from_y) == (to_x, to_y):
styles.append('\\pos(%s, %s)' % (from_x, from_y))
else:
styles.append('\\move(%s, %s, %s, %s, %s, %s)' % (from_x, from_y, to_x, to_y, delay, delay+duration))
styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (from_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (from_y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
if (from_x, from_y) != (to_x, to_y):
styles.append('\\t(%s, %s, ' % (delay, delay+duration))
styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
styles.append(')')
if fontface:
styles.append('\\fn%s' % ASSEscape(fontface))
styles.append('\\fs%s' % round(c[6]*ZoomFactor[0]))
if c[5] != 0xffffff:
styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff))
if c[5] == 0x000000:
styles.append('\\3c&HFFFFFF&')
if from_alpha == to_alpha:
styles.append('\\alpha&H%02X' % from_alpha)
elif (from_alpha, to_alpha) == (255, 0):
styles.append('\\fad(%s,0)' % (lifetime*1000))
elif (from_alpha, to_alpha) == (0, 255):
styles.append('\\fad(0, %s)' % (lifetime*1000))
else:
styles.append('\\fade(%(from_alpha)s, %(to_alpha)s, %(to_alpha)s, 0, %(end_time)s, %(end_time)s, %(end_time)s)' % {'from_alpha': from_alpha, 'to_alpha': to_alpha, 'end_time': lifetime*1000})
if isborder == 'false':
styles.append('\\bord0')
f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
except (IndexError, ValueError) as e:
try:
logging.warning(_('Invalid comment: %r') % c[3])
except IndexError:
logging.warning(_('Invalid comment: %r') % c)
def WriteCommentAcfunPositioned(f, c, width, height, styleid):
AcfunPlayerSize = (560, 400)
ZoomFactor = GetZoomFactor(AcfunPlayerSize, (width, height))
def GetPosition(InputPos, isHeight):
isHeight = int(isHeight) # True -> 1
return AcfunPlayerSize[isHeight]*ZoomFactor[0]*InputPos*0.001+ZoomFactor[isHeight+1]
def GetTransformStyles(x=None, y=None, scale_x=None, scale_y=None, rotate_z=None, rotate_y=None, color=None, alpha=None):
styles = []
if x is not None and y is not None:
styles.append('\\pos(%s, %s)' % (x, y))
if scale_x is not None:
styles.append('\\fscx%s' % scale_x)
if scale_y is not None:
styles.append('\\fscy%s' % scale_y)
if rotate_z is not None and rotate_y is not None:
assert x is not None
assert y is not None
styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(rotate_y, rotate_z, (x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (y-ZoomFactor[2])/(height-ZoomFactor[2]*2)))
if color is not None:
styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff))
if color == 0x000000:
styles.append('\\3c&HFFFFFF&')
if alpha is not None:
alpha = 255-round(alpha*255)
styles.append('\\alpha&H%02X' % alpha)
return styles
def FlushCommentLine(f, text, styles, start_time, end_time, styleid):
if end_time > start_time:
f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
try:
comment_args = c[3]
text = ASSEscape(str(comment_args['n']).replace('\r', '\n').replace('\r', '\n'))
common_styles = []
anchor = {0: 7, 1: 8, 2: 9, 3: 4, 4: 5, 5: 6, 6: 1, 7: 2, 8: 3}.get(comment_args.get('c', 0), 7)
if anchor != 7:
common_styles.append('\\an%s' % anchor)
font = comment_args.get('w')
if font:
font = dict(font)
fontface = font.get('f')
if fontface:
common_styles.append('\\fn%s' % ASSEscape(str(fontface)))
fontbold = bool(font.get('b'))
if fontbold:
common_styles.append('\\b1')
common_styles.append('\\fs%s' % round(c[6]*ZoomFactor[0]))
isborder = bool(comment_args.get('b', True))
if not isborder:
common_styles.append('\\bord0')
to_pos = dict(comment_args.get('p', {'x': 0, 'y': 0}))
to_x = round(GetPosition(int(to_pos.get('x', 0)), False))
to_y = round(GetPosition(int(to_pos.get('y', 0)), True))
to_scale_x = round(float(comment_args.get('e', 1.0))*100)
to_scale_y = round(float(comment_args.get('f', 1.0))*100)
to_rotate_z = float(comment_args.get('r', 0.0))
to_rotate_y = float(comment_args.get('k', 0.0))
to_color = c[5]
to_alpha = float(comment_args.get('a', 1.0))
from_time = float(comment_args.get('t', 0.0))
action_time = float(comment_args.get('l', 3.0))
actions = list(comment_args.get('z', []))
transform_styles = GetTransformStyles(to_x, to_y, to_scale_x, to_scale_y, to_rotate_z, to_rotate_y, to_color, to_alpha)
FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid)
for action in actions:
action = dict(action)
from_x, from_y = to_x, to_y
from_scale_x, from_scale_y = to_scale_x, to_scale_y
from_rotate_z, from_rotate_y = to_rotate_z, to_rotate_y
from_color, from_alpha = to_color, to_alpha
from_time += action_time
action_time = float(action.get('l', 0.0))
action_styles = []
if 'x' in action:
to_x = round(GetPosition(int(action['x']), False))
if 'y' in action:
to_y = round(GetPosition(int(action['y']), True))
if 'f' in action:
to_scale_x = round(float(action['f'])*100)
action_styles.append('\\fscx%s' % to_scale_x)
if 'g' in action:
to_scale_y = round(float(action['g'])*100)
action_styles.append('\\fscy%s' % to_scale_y)
if 'c' in action:
to_color = int(action['c'])
action_styles.append('\\c&H%02X%02X%02X&' % (to_color & 0xff, (to_color >> 8) & 0xff, (to_color >> 16) & 0xff))
if 't' in action:
to_alpha = float(action['t'])
action_styles.append('\\alpha&H%02X' % (255-round(to_alpha*255)))
if 'd' in action:
to_rotate_z = float(action['d'])
if 'e' in action:
to_rotate_y = float(action['e'])
if ('x' in action) or ('y' in action):
transform_styles = GetTransformStyles(None, None, from_scale_x, from_scale_y, None, None, from_color, from_alpha)
transform_styles.append('\\move(%s, %s, %s, %s)' % (from_x, from_y, to_x, to_y))
action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2)))
elif ('d' in action) or ('e' in action):
action_styles.append('\\frx%s\\fry%s\\frz%s\\fax%s\\fay%s' % ConvertFlashRotation(to_rotate_y, to_rotate_z, (to_x-ZoomFactor[1])/(width-ZoomFactor[1]*2), (to_y-ZoomFactor[2])/(width-ZoomFactor[2]*2)))
else:
transform_styles = GetTransformStyles(from_x, from_y, from_scale_x, from_scale_y, from_rotate_z, from_rotate_y, from_color, from_alpha)
if action_styles:
transform_styles.append('\\t(%s)' % (''.join(action_styles)))
FlushCommentLine(f, text, common_styles+transform_styles, c[0]+from_time, c[0]+from_time+action_time, styleid)
except (IndexError, ValueError) as e:
logging.warning(_('Invalid comment: %r') % c[3])
def WriteCommentSH5VPositioned(f, c, width, height, styleid):
def GetTransformStyles(x=None, y=None, fsize=None, rotate_z=None, rotate_y=None, color=None, alpha=None):
styles = []
if x is not None and y is not None:
styles.append('\\pos(%s, %s)' % (x, y))
if fsize is not None:
styles.append('\\fs%s' % fsize)
if rotate_y is not None and rotate_z is not None:
styles.append('\\frz%s' % round(rotate_z))
styles.append('\\fry%s' % round(rotate_y))
if color is not None:
styles.append('\\c&H%02X%02X%02X&' % (color & 0xff, (color >> 8) & 0xff, (color >> 16) & 0xff))
if color == 0x000000:
styles.append('\\3c&HFFFFFF&')
if alpha is not None:
alpha = 255-round(alpha*255)
styles.append('\\alpha&H%02X' % alpha)
return styles
def FlushCommentLine(f, text, styles, start_time, end_time, styleid):
if end_time > start_time:
f.write('Dialogue: -1,%(start)s,%(end)s,%(styleid)s,,0,0,0,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(start_time), 'end': ConvertTimestamp(end_time), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
try:
text = ASSEscape(str(c[3]))
to_x = round(float(c[9])*width)
to_y = round(float(c[10])*height)
to_rotate_z = -int(c[14])
to_rotate_y = -int(c[15])
to_color = c[5]
to_alpha = float(c[12])
#Note: Alpha transition hasn't been worked out yet.
to_size = round(int(c[6])*math.sqrt(width*height/307200))
#Note: Because sH5V's data is the absolute size of font,temporarily solve by it at present.[*math.sqrt(width/640*height/480)]
#But it seems to be working fine...
from_time = float(c[0])
action_time = float(c[11])/1000
transform_styles = GetTransformStyles(to_x, to_y, to_size, to_rotate_z, to_rotate_y, to_color, to_alpha)
FlushCommentLine(f, text, transform_styles, from_time, from_time+action_time, styleid)
except (IndexError, ValueError) as e:
logging.warning(_('Invalid comment: %r') % c[3])
# Result: (f, dx, dy)
# To convert: NewX = f*x+dx, NewY = f*y+dy
def GetZoomFactor(SourceSize, TargetSize):
try:
if (SourceSize, TargetSize) == GetZoomFactor.Cached_Size:
return GetZoomFactor.Cached_Result
except AttributeError:
pass
GetZoomFactor.Cached_Size = (SourceSize, TargetSize)
try:
SourceAspect = SourceSize[0]/SourceSize[1]
TargetAspect = TargetSize[0]/TargetSize[1]
if TargetAspect < SourceAspect: # narrower
ScaleFactor = TargetSize[0]/SourceSize[0]
GetZoomFactor.Cached_Result = (ScaleFactor, 0, (TargetSize[1]-TargetSize[0]/SourceAspect)/2)
elif TargetAspect > SourceAspect: # wider
ScaleFactor = TargetSize[1]/SourceSize[1]
GetZoomFactor.Cached_Result = (ScaleFactor, (TargetSize[0]-TargetSize[1]*SourceAspect)/2, 0)
else:
GetZoomFactor.Cached_Result = (TargetSize[0]/SourceSize[0], 0, 0)
return GetZoomFactor.Cached_Result
except ZeroDivisionError:
GetZoomFactor.Cached_Result = (1, 0, 0)
return GetZoomFactor.Cached_Result
# Calculation is based on https://github.com/jabbany/CommentCoreLibrary/issues/5#issuecomment-40087282
# and https://github.com/m13253/danmaku2ass/issues/7#issuecomment-41489422
# Input: X relative horizonal coordinate: 0 for left edge, 1 for right edge.
# Y relative vertical coordinate: 0 for top edge, 1 for bottom edge.
# FOV = 1.0/math.tan(100*math.pi/360.0)
# Result: (rotX, rotY, rotZ, shearX, shearY)
def ConvertFlashRotation(rotY, rotZ, X, Y, FOV=math.tan(2*math.pi/9.0)):
def WrapAngle(deg):
return 180-((180-deg)%360)
def CalcPerspectiveCorrection(alpha, X, FOV=FOV):
alpha = WrapAngle(alpha)
if FOV is None:
return alpha
if 0 <= alpha <= 180:
costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV+max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0))
try:
if costheta > 1:
costheta = 1
raise ValueError
elif costheta < -1:
costheta = -1
raise ValueError
except ValueError:
logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X))
theta = math.acos(costheta)*180/math.pi
else:
costheta = (FOV*math.cos(alpha*math.pi/180.0)-X*math.sin(alpha*math.pi/180.0))/(FOV-max(2, abs(X)+1)*math.sin(alpha*math.pi/180.0))
try:
if costheta > 1:
costheta = 1
raise ValueError
elif costheta < -1:
costheta = -1
raise ValueError
except ValueError:
logging.error('Clipped rotation angle: (alpha=%s, X=%s), it is a bug!' % (alpha, X))
theta = -math.acos(costheta)*180/math.pi
return WrapAngle(theta)
X = 2*X-1
Y = 2*Y-1
rotY = WrapAngle(rotY)
rotZ = WrapAngle(rotZ)
if rotY == 0 or rotZ == 0:
outX = 0
outY = -rotY # Positive value means clockwise in Flash
outZ = -rotZ
else:
rotY = rotY*math.pi/180.0
rotZ = rotZ*math.pi/180.0
outY = math.atan2(-math.sin(rotY)*math.cos(rotZ), math.cos(rotY))*180/math.pi
outZ = math.atan2(-math.cos(rotY)*math.sin(rotZ), math.cos(rotZ))*180/math.pi
outX = math.asin(math.sin(rotY)*math.sin(rotZ))*180/math.pi
if FOV is not None:
#outX = CalcPerspectiveCorrection(outX, -Y, FOV*0.75)
outY = CalcPerspectiveCorrection(outY, X, FOV)
return (WrapAngle(round(outX)), WrapAngle(round(outY)), WrapAngle(round(outZ)), 0, round(-0.75*Y*math.sin(outY*math.pi/180.0), 3))
def ProcessComments(comments, f, width, height, bottomReserved, fontface, fontsize, alpha, lifetime, reduced, progress_callback):
styleid = 'Danmaku2ASS_%04x' % random.randint(0, 0xffff)
WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid)
rows = [[None]*(height-bottomReserved+1) for i in range(4)]
for idx, i in enumerate(comments):
if progress_callback and idx % 1000 == 0:
progress_callback(idx, len(comments))
if isinstance(i[4], int):
row = 0
rowmax = height-bottomReserved-i[7]
while row <= rowmax:
freerows = TestFreeRows(rows, i, row, width, height, bottomReserved, lifetime)
if freerows >= i[7]:
MarkCommentRow(rows, i, row)
WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid)
break
else:
row += freerows or 1
else:
if not reduced:
row = FindAlternativeRow(rows, i, height, bottomReserved)
MarkCommentRow(rows, i, row)
WriteComment(f, i, row, width, height, bottomReserved, fontsize, lifetime, styleid)
elif i[4] == 'bilipos':
WriteCommentBilibiliPositioned(f, i, width, height, styleid)
elif i[4] == 'acfunpos':
WriteCommentAcfunPositioned(f, i, width, height, styleid)
elif i[4] == 'sH5Vpos':
WriteCommentSH5VPositioned(f, i, width, height, styleid)
else:
logging.warning(_('Invalid comment: %r') % i[3])
if progress_callback:
progress_callback(len(comments), len(comments))
def TestFreeRows(rows, c, row, width, height, bottomReserved, lifetime):
res = 0
rowmax = height-bottomReserved
targetRow = None
if c[4] in (1, 2):
while row < rowmax and res < c[7]:
if targetRow != rows[c[4]][row]:
targetRow = rows[c[4]][row]
if targetRow and targetRow[0]+lifetime > c[0]:
break
row += 1
res += 1
else:
try:
thresholdTime = c[0]-lifetime*(1-width/(c[8]+width))
except ZeroDivisionError:
thresholdTime = c[0]-lifetime
while row < rowmax and res < c[7]:
if targetRow != rows[c[4]][row]:
targetRow = rows[c[4]][row]
try:
if targetRow and (targetRow[0] > thresholdTime or targetRow[0]+targetRow[8]*lifetime/(targetRow[8]+width) > c[0]):
break
except ZeroDivisionError:
pass
row += 1
res += 1
return res
def FindAlternativeRow(rows, c, height, bottomReserved):
res = 0
for row in range(height-bottomReserved-math.ceil(c[7])):
if not rows[c[4]][row]:
return row
elif rows[c[4]][row][0] < rows[c[4]][res][0]:
res = row
return res
def MarkCommentRow(rows, c, row):
try:
for i in range(row, row+math.ceil(c[7])):
rows[c[4]][i] = c
except IndexError:
pass
def WriteASSHead(f, width, height, fontface, fontsize, alpha, styleid):
f.write(
'''
[Script Info]
; Script generated by Danmaku2ASS
; https://github.com/m13253/danmaku2ass
Script Updated By: Danmaku2ASS (https://github.com/m13253/danmaku2ass)
ScriptType: v4.00+
WrapStyle: 2
Collisions: Normal
PlayResX: %(width)s
PlayResY: %(height)s
ScaledBorderAndShadow: yes
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: %(styleid)s, %(fontface)s, %(fontsize)s, &H%(alpha)02XFFFFFF, &H%(alpha)02XFFFFFF, &H%(alpha)02X000000, &H%(alpha)02X000000, 0, 0, 0, 0, 100, 100, 0.00, 0.00, 1, %(outline)s, 0, 7, 0, 0, 0, 0
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
''' % {'width': width, 'height': height, 'fontface': fontface, 'fontsize': round(fontsize), 'alpha': 255-round(alpha*255), 'outline': round(fontsize/25), 'styleid': styleid}
)
def WriteComment(f, c, row, width, height, bottomReserved, fontsize, lifetime, styleid):
text = ASSEscape(c[3])
styles = []
if c[4] == 1:
styles.append('\\an8\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': row})
elif c[4] == 2:
styles.append('\\an2\\pos(%(halfwidth)s, %(row)s)' % {'halfwidth': round(width/2), 'row': ConvertType2(row, height, bottomReserved)})
elif c[4] == 3:
styles.append('\\move(%(neglen)s, %(row)s, %(width)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])})
else:
styles.append('\\move(%(width)s, %(row)s, %(neglen)s, %(row)s)' % {'width': width, 'row': row, 'neglen': -math.ceil(c[8])})
if not (-1 < c[6]-fontsize < 1):
styles.append('\\fs%s' % round(c[6]))
if c[5] != 0xffffff:
styles.append('\\c&H%02X%02X%02X&' % (c[5] & 0xff, (c[5] >> 8) & 0xff, (c[5] >> 16) & 0xff))
if c[5] == 0x000000:
styles.append('\\3c&HFFFFFF&')
f.write('Dialogue: 2,%(start)s,%(end)s,%(styleid)s,,0000,0000,0000,,{%(styles)s}%(text)s\n' % {'start': ConvertTimestamp(c[0]), 'end': ConvertTimestamp(c[0]+lifetime), 'styles': ''.join(styles), 'text': text, 'styleid': styleid})
def ASSEscape(s):
return '\\N'.join((i or ' ' for i in str(s).replace('\\', '\\\\').replace('{', '\\{').replace('}', '\\}').split('\n')))
def CalculateLength(s):
return max(map(len, s.split('\n'))) # May not be accurate
def ConvertTimestamp(timestamp):
timestamp = round(timestamp*100.0)
hour, minute = divmod(timestamp, 360000)
minute, second = divmod(minute, 6000)
second, centsecond = divmod(second, 100)
return '%d:%02d:%02d.%02d' % (int(hour), int(minute), int(second), int(centsecond))
def ConvertType2(row, height, bottomReserved):
return height-bottomReserved-row
def ConvertToFile(filename_or_file, *args, **kwargs):
if isinstance(filename_or_file, bytes):
filename_or_file = str(bytes(filename_or_file).decode('utf-8', 'replace'))
if isinstance(filename_or_file, str):
return open(filename_or_file, *args, **kwargs)
else:
return filename_or_file
def FilterBadChars(f):
s = f.read()
s = re.sub('[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]', '\ufffd', s)
return io.StringIO(s)
class safe_list(list):
def get(self, index, default=None):
try:
return self[index]
except IndexError:
return default
def export(func):
global __all__
try:
__all__.append(func.__name__)
except NameError:
__all__ = [func.__name__]
return func
@export
def Danmaku2ASS(input_files, output_file, stage_width, stage_height, reserve_blank=0, font_face=_('(FONT) sans-serif')[7:], font_size=25.0, text_opacity=1.0, comment_duration=5.0, is_reduce_comments=False, progress_callback=None):
fo = None
comments = ReadComments(input_files, font_size)
try:
if output_file:
fo = ConvertToFile(output_file, 'w', encoding='utf-8-sig', errors='replace', newline='\r\n')
else:
fo = sys.stdout
ProcessComments(comments, fo, stage_width, stage_height, reserve_blank, font_face, font_size, text_opacity, comment_duration, is_reduce_comments, progress_callback)
finally:
if output_file and fo != output_file:
fo.close()
@export
def ReadComments(input_files, font_size=25.0, progress_callback=None):
if isinstance(input_files, bytes):
input_files = str(bytes(input_files).decode('utf-8', 'replace'))
if isinstance(input_files, str):
input_files = [input_files]
else:
input_files = list(input_files)
comments = []
for idx, i in enumerate(input_files):
if progress_callback:
progress_callback(idx, len(input_files))
with ConvertToFile(i, 'r', encoding='utf-8', errors='replace') as f:
CommentProcessor = GetCommentProcessor(f)
if not CommentProcessor:
raise ValueError(_('Unknown comment file format: %s') % i)
comments.extend(CommentProcessor(FilterBadChars(f), font_size))
if progress_callback:
progress_callback(len(input_files), len(input_files))
comments.sort()
return comments
@export
def GetCommentProcessor(input_file):
return CommentFormatMap[ProbeCommentFormat(input_file)]
def main():
if len(sys.argv) == 1:
sys.argv.append('--help')
parser = argparse.ArgumentParser()
parser.add_argument('-o', '--output', metavar=_('OUTPUT'), help=_('Output file'))
parser.add_argument('-s', '--size', metavar=_('WIDTHxHEIGHT'), required=True, help=_('Stage size in pixels'))
parser.add_argument('-fn', '--font', metavar=_('FONT'), help=_('Specify font face [default: %s]') % _('(FONT) sans-serif')[7:], default=_('(FONT) sans-serif')[7:])
parser.add_argument('-fs', '--fontsize', metavar=_('SIZE'), help=(_('Default font size [default: %s]') % 25), type=float, default=25.0)
parser.add_argument('-a', '--alpha', metavar=_('ALPHA'), help=_('Text opacity'), type=float, default=1.0)
parser.add_argument('-l', '--lifetime', metavar=_('SECONDS'), help=_('Duration of comment display [default: %s]') % 5, type=float, default=5.0)
parser.add_argument('-p', '--protect', metavar=_('HEIGHT'), help=_('Reserve blank on the bottom of the stage'), type=int, default=0)
parser.add_argument('-r', '--reduce', action='store_true', help=_('Reduce the amount of comments if stage is full'))
parser.add_argument('file', metavar=_('FILE'), nargs='+', help=_('Comment file to be processed'))
args = parser.parse_args()
try:
width, height = str(args.size).split('x', 1)
width = int(width)
height = int(height)
except ValueError:
raise ValueError(_('Invalid stage size: %r') % args.size)
Danmaku2ASS(args.file, args.output, width, height, args.protect, args.font, args.fontsize, args.alpha, args.lifetime, args.reduce)
if __name__ == '__main__':
main()
================================================
FILE: 2020/dmzj/cartoon.py
================================================
import requests
import os
import re
from bs4 import BeautifulSoup
from contextlib import closing
from tqdm import tqdm
import time
"""
Author:
Jack Cui
Wechat:
https://mp.weixin.qq.com/s/OCWwRVDFNslIuKyiCVUoTA
"""
# 创建保存目录
save_dir = '妖神记'
if save_dir not in os.listdir('./'):
os.mkdir(save_dir)
target_url = "https://www.dmzj.com/info/yaoshenji.html"
# 获取动漫章节链接和章节名
r = requests.get(url = target_url)
bs = BeautifulSoup(r.text, 'lxml')
list_con_li = bs.find('ul', class_="list_con_li")
cartoon_list = list_con_li.find_all('a')
chapter_names = []
chapter_urls = []
for cartoon in cartoon_list:
href = cartoon.get('href')
name = cartoon.text
chapter_names.insert(0, name)
chapter_urls.insert(0, href)
# 下载漫画
for i, url in enumerate(tqdm(chapter_urls)):
download_header = {
'Referer': url
}
name = chapter_names[i]
# 去掉.
while '.' in name:
name = name.replace('.', '')
chapter_save_dir = os.path.join(save_dir, name)
if name not in os.listdir(save_dir):
os.mkdir(chapter_save_dir)
r = requests.get(url = url)
html = BeautifulSoup(r.text, 'lxml')
script_info = html.script
pics = re.findall('\d{13,14}', str(script_info))
for j, pic in enumerate(pics):
if len(pic) == 13:
pics[j] = pic + '0'
pics = sorted(pics, key=lambda x:int(x))
chapterpic_hou = re.findall('\|(\d{5})\|', str(script_info))[0]
chapterpic_qian = re.findall('\|(\d{4})\|', str(script_info))[0]
for idx, pic in enumerate(pics):
if pic[-1] == '0':
url = 'https://images.dmzj.com/img/chapterpic/' + chapterpic_qian + '/' + chapterpic_hou + '/' + pic[:-1] + '.jpg'
else:
url = 'https://images.dmzj.com/img/chapterpic/' + chapterpic_qian + '/' + chapterpic_hou + '/' + pic + '.jpg'
pic_name = '%03d.jpg' % (idx + 1)
pic_save_path = os.path.join(chapter_save_dir, pic_name)
with closing(requests.get(url, headers = download_header, stream = True)) as response:
chunk_size = 1024
content_size = int(response.headers['content-length'])
if response.status_code == 200:
with open(pic_save_path, "wb") as file:
for data in response.iter_content(chunk_size=chunk_size):
file.write(data)
else:
print('链接异常')
time.sleep(10)
================================================
FILE: 2020/taobao/taobao_login.py
================================================
from selenium import webdriver
import logging
import time
from selenium.common.exceptions import NoSuchElementException, WebDriverException
from retrying import retry
from selenium.webdriver import ActionChains
import pyautogui
pyautogui.PAUSE = 0.5
logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
"""
微信公众号 JackCui-AI
更多精彩教程、源码尽在微信公众号
"""
class taobao():
def __init__(self):
self.browser = webdriver.Chrome("path\to\your\chromedriver.exe")
# 最大化窗口
self.browser.maximize_window()
self.browser.implicitly_wait(5)
self.domain = 'http://www.taobao.com'
self.action_chains = ActionChains(self.browser)
def login(self, username, password):
while True:
self.browser.get(self.domain)
time.sleep(1)
#会xpath可以简化这几步
#self.browser.find_element_by_class_name('h').click()
#self.browser.find_element_by_id('fm-login-id').send_keys(username)
#self.browser.find_element_by_id('fm-login-password').send_keys(password)
self.browser.find_element_by_xpath('//*[@id="J_SiteNavLogin"]/div[1]/div[1]/a[1]').click()
self.browser.find_element_by_xpath('//*[@id="fm-login-id"]').send_keys(username)
self.browser.find_element_by_xpath('//*[@id="fm-login-password"]').send_keys(password)
time.sleep(1)
try:
# 出现验证码,滑动验证
slider = self.browser.find_element_by_xpath("//span[contains(@class, 'btn_slide')]")
if slider.is_displayed():
# 拖拽滑块
self.action_chains.drag_and_drop_by_offset(slider, 258, 0).perform()
time.sleep(0.5)
# 释放滑块,相当于点击拖拽之后的释放鼠标
self.action_chains.release().perform()
except (NoSuchElementException, WebDriverException):
logger.info('未出现登录验证码')
# 会xpath可以简化点击登陆按钮,但都无法登录,需要使用 pyautogui 完成点击事件
#self.browser.find_element_by_class_name('password-login').click()
#self.browser.find_element_by_xpath('//*[@id="login-form"]/div[4]/button').click()
# 图片地址
coords = pyautogui.locateOnScreen('1.png')
x, y = pyautogui.center(coords)
pyautogui.leftClick(x, y)
nickname = self.get_nickname()
if nickname:
logger.info('登录成功,呢称为:' + nickname)
break
logger.debug('登录出错,5s后继续登录')
time.sleep(5)
def get_nickname(self):
self.browser.get(self.domain)
time.sleep(0.5)
try:
return self.browser.find_element_by_class_name('site-nav-user').text
except NoSuchElementException:
return ''
def clear_cart(self):
cart = self.browser.find_element_by_xpath('//*[@id="J_MiniCart"]')
if cart.is_displayed():
cart.click()
select = self.browser.find_element_by_xpath('//*[@id="J_SelectAll1"]/div/label')
if select.is_displayed():
select.click()
time.sleep(0.5)
go = self.browser.find_element_by_xpath('//*[@id="J_Go"]')
if go.is_displayed():
go.click()
submit = self.browser.find_element_by_xpath('//*[@id="submitOrderPC_1"]/div/a[2]')
if submit.is_displayed():
submit.click()
if __name__ == '__main__':
# 填入自己的用户名,密码
username = 'username'
password = 'password'
tb = taobao()
tb.login(username, password)
#tb.clear_cart()
================================================
FILE: 2020/xbqg/xbqg_spider.py
================================================
import requests
import time
from tqdm import tqdm
from bs4 import BeautifulSoup
"""
Author:
Jack Cui
Wechat:
https://mp.weixin.qq.com/s/OCWwRVDFNslIuKyiCVUoTA
"""
def get_content(target):
req = requests.get(url = target)
req.encoding = 'utf-8'
html = req.text
bf = BeautifulSoup(html, 'lxml')
texts = bf.find('div', id='content')
content = texts.text.strip().split('\xa0'*4)
return content
if __name__ == '__main__':
server = 'https://www.xsbiquge.com'
book_name = '诡秘之主.txt'
target = 'https://www.xsbiquge.com/15_15338/'
req = requests.get(url = target)
req.encoding = 'utf-8'
html = req.text
chapter_bs = BeautifulSoup(html, 'lxml')
chapters = chapter_bs.find('div', id='list')
chapters = chapters.find_all('a')
for chapter in tqdm(chapters):
chapter_name = chapter.string
url = server + chapter.get('href')
content = get_content(url)
with open(book_name, 'a', encoding='utf-8') as f:
f.write(chapter_name)
f.write('\n')
f.write('\n'.join(content))
f.write('\n')
================================================
FILE: 2020/zycjw/video_download.py
================================================
import os
import ffmpy3
import requests
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool as ThreadPool
search_keyword = '越狱第一季'
search_url = 'http://www.jisudhw.com/index.php'
serach_params = {
'm': 'vod-search'
}
serach_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
'Referer': 'http://www.jisudhw.com/',
'Origin': 'http://www.jisudhw.com',
'Host': 'www.jisudhw.com'
}
serach_datas = {
'wd': search_keyword,
'submit': 'search'
}
video_dir = ''
r = requests.post(url=search_url, params=serach_params, headers=serach_headers, data=serach_datas)
r.encoding = 'utf-8'
server = 'http://www.jisudhw.com'
search_html = BeautifulSoup(r.text, 'lxml')
search_spans = search_html.find_all('span', class_='xing_vb4')
for span in search_spans:
url = server + span.a.get('href')
name = span.a.string
print(name)
print(url)
video_dir = name
if name not in os.listdir('./'):
os.mkdir(name)
detail_url = url
r = requests.get(url = detail_url)
r.encoding = 'utf-8'
detail_bf = BeautifulSoup(r.text, 'lxml')
num = 1
serach_res = {}
for each_url in detail_bf.find_all('input'):
if 'm3u8' in each_url.get('value'):
url = each_url.get('value')
if url not in serach_res.keys():
serach_res[url] = num
print('第%03d集:' % num)
print(url)
num += 1
def downVideo(url):
num = serach_res[url]
name = os.path.join(video_dir, '第%03d集.mp4' % num)
ffmpy3.FFmpeg(inputs={url: None}, outputs={name:None}).run()
# 开8个线程池
pool = ThreadPool(8)
results = pool.map(downVideo, serach_res.keys())
pool.close()
pool.join()
================================================
FILE: Netease/Netease.py
================================================
# -*- coding:utf-8 -*-
import requests, hashlib, sys, click, re, base64, binascii, json, os
from Crypto.Cipher import AES
from http import cookiejar
"""
Website:http://cuijiahua.com
Author:Jack Cui
Refer:https://github.com/darknessomi/musicbox
"""
class Encrypyed():
"""
解密算法
"""
def __init__(self):
self.modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
self.nonce = '0CoJUm6Qyw8W8jud'
self.pub_key = '010001'
# 登录加密算法, 基于https://github.com/stkevintan/nw_musicbox脚本实现
def encrypted_request(self, text):
text = json.dumps(text)
sec_key = self.create_secret_key(16)
enc_text = self.aes_encrypt(self.aes_encrypt(text, self.nonce), sec_key.decode('utf-8'))
enc_sec_key = self.rsa_encrpt(sec_key, self.pub_key, self.modulus)
data = {'params': enc_text, 'encSecKey': enc_sec_key}
return data
def aes_encrypt(self, text, secKey):
pad = 16 - len(text) % 16
text = text + chr(pad) * pad
encryptor = AES.new(secKey.encode('utf-8'), AES.MODE_CBC, b'0102030405060708')
ciphertext = encryptor.encrypt(text.encode('utf-8'))
ciphertext = base64.b64encode(ciphertext).decode('utf-8')
return ciphertext
def rsa_encrpt(self, text, pubKey, modulus):
text = text[::-1]
rs = pow(int(binascii.hexlify(text), 16), int(pubKey, 16), int(modulus, 16))
return format(rs, 'x').zfill(256)
def create_secret_key(self, size):
return binascii.hexlify(os.urandom(size))[:16]
class Song():
"""
歌曲对象,用于存储歌曲的信息
"""
def __init__(self, song_id, song_name, song_num, song_url=None):
self.song_id = song_id
self.song_name = song_name
self.song_num = song_num
self.song_url = '' if song_url is None else song_url
class Crawler():
"""
网易云爬取API
"""
def __init__(self, timeout=60, cookie_path='.'):
self.headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/search/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
self.session = requests.Session()
self.session.headers.update(self.headers)
self.session.cookies = cookiejar.LWPCookieJar(cookie_path)
self.download_session = requests.Session()
self.timeout = timeout
self.ep = Encrypyed()
def post_request(self, url, params):
"""
Post请求
:return: 字典
"""
data = self.ep.encrypted_request(params)
resp = self.session.post(url, data=data, timeout=self.timeout)
result = resp.json()
if result['code'] != 200:
click.echo('post_request error')
else:
return result
def search(self, search_content, search_type, limit=9):
"""
搜索API
:params search_content: 搜索内容
:params search_type: 搜索类型
:params limit: 返回结果数量
:return: 字典.
"""
url = 'http://music.163.com/weapi/cloudsearch/get/web?csrf_token='
params = {'s': search_content, 'type': search_type, 'offset': 0, 'sub': 'false', 'limit': limit}
result = self.post_request(url, params)
return result
def search_song(self, song_name, song_num, quiet=True, limit=9):
"""
根据音乐名搜索
:params song_name: 音乐名
:params song_num: 下载的歌曲数
:params quiet: 自动选择匹配最优结果
:params limit: 返回结果数量
:return: Song独享
"""
result = self.search(song_name, search_type=1, limit=limit)
if result['result']['songCount'] <= 0:
click.echo('Song {} not existed.'.format(song_name))
else:
songs = result['result']['songs']
if quiet:
song_id, song_name = songs[0]['id'], songs[0]['name']
song = Song(song_id=song_id, song_name=song_name, song_num=song_num)
return song
def get_song_url(self, song_id, bit_rate=320000):
"""
获得歌曲的下载地址
:params song_id: 音乐ID (.+?)<\/p>')
nickname = _nickname_re.search(share_user.text).group(1)
data = {
'tac': tac.split('|')[0],
'user_id': user_id,
}
req = requests.post(sign_api, data=data)
while req.status_code != 200:
req = requests.post(sign_api, data=data)
sign = req.json().get('signature')
user_url_prefix = 'https://www.iesdouyin.com/web/api/v2/aweme/like' if type_flag == 'f' else 'https://www.iesdouyin.com/web/api/v2/aweme/post'
print('解析视频链接中')
while has_more != 0:
user_url = user_url_prefix + '/?user_id=%s&sec_uid=&count=21&max_cursor=%s&aid=1128&_signature=%s&dytk=%s' % (user_id, max_cursor, sign, dytk)
req = requests.get(user_url, headers=self.headers)
while req.status_code != 200:
req = requests.get(user_url, headers=self.headers)
html = json.loads(req.text)
for each in html['aweme_list']:
try:
url = 'https://aweme.snssdk.com/aweme/v1/play/?video_id=%s&line=0&ratio=720p&media_type=4&vr_type=0&improve_bitrate=0&is_play_url=1&is_support_h265=0&source=PackSourceEnum_PUBLISH'
vid = each['video']['vid']
video_url = url % vid
except:
continue
share_desc = each['desc']
if os.name == 'nt':
for c in r'\/:*?"<>|':
nickname = nickname.replace(c, '').strip().strip('\.')
share_desc = share_desc.replace(c, '').strip()
share_id = each['aweme_id']
if share_desc in ['抖音-原创音乐短视频社区', 'TikTok', '']:
video_names.append(share_id + '.mp4')
else:
video_names.append(share_id + '-' + share_desc + '.mp4')
share_url = 'https://www.iesdouyin.com/share/video/%s' % share_id
share_urls.append(share_url)
video_urls.append(video_url)
max_cursor = html['max_cursor']
has_more = html['has_more']
return video_names, video_urls, share_urls, nickname
def get_download_url(self, video_url, watermark_flag):
"""
获得带水印的视频播放地址
Parameters:
video_url:带水印的视频播放地址
Returns:
download_url: 带水印的视频下载地址
"""
# 带水印视频
if watermark_flag == True:
download_url = video_url.replace('/play/', '/playwm/')
# 无水印视频
else:
download_url = video_url.replace('/playwm/', '/play/')
return download_url
def video_downloader(self, video_url, video_name, watermark_flag=False):
"""
视频下载
Parameters:
video_url: 带水印的视频地址
video_name: 视频名
watermark_flag: 是否下载带水印的视频
Returns:
无
"""
size = 0
video_url = self.get_download_url(video_url, watermark_flag=watermark_flag)
with closing(requests.get(video_url, headers=self.headers1, stream=True)) as response:
chunk_size = 1024
content_size = int(response.headers['content-length'])
if response.status_code == 200:
sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
with open(video_name, 'wb') as file:
for data in response.iter_content(chunk_size = chunk_size):
file.write(data)
size += len(data)
file.flush()
sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
sys.stdout.flush()
def run(self):
"""
运行函数
Parameters:
None
Returns:
None
"""
self.hello()
print('UID取得方式:\n分享用户页面,用浏览器打开短链接,原始链接中/share/user/后的数字即是UID')
user_id = input('请输入UID (例如60388937600):')
user_id = user_id if user_id else '60388937600'
watermark_flag = input('是否下载带水印的视频 (0-否(默认), 1-是):')
watermark_flag = watermark_flag if watermark_flag!='' else '0'
watermark_flag = bool(int(watermark_flag))
type_flag = input('f-收藏的(默认), p-上传的:')
type_flag = type_flag if type_flag!='' else 'f'
save_dir = input('保存路径 (例如"E:/Download/", 默认"./Download/"):')
save_dir = save_dir if save_dir else "./Download/"
video_names, video_urls, share_urls, nickname = self.get_video_urls(user_id, type_flag)
nickname_dir = os.path.join(save_dir, nickname)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if nickname not in os.listdir(save_dir):
os.mkdir(nickname_dir)
if type_flag == 'f':
if 'favorite' not in os.listdir(nickname_dir):
os.mkdir(os.path.join(nickname_dir, 'favorite'))
print('视频下载中:共有%d个作品!\n' % len(video_urls))
for num in range(len(video_urls)):
print(' 解析第%d个视频链接 [%s] 中,请稍后!\n' % (num + 1, share_urls[num]))
if '\\' in video_names[num]:
video_name = video_names[num].replace('\\', '')
elif '/' in video_names[num]:
video_name = video_names[num].replace('/', '')
else:
video_name = video_names[num]
video_path = os.path.join(nickname_dir, video_name) if type_flag!='f' else os.path.join(nickname_dir, 'favorite', video_name)
if os.path.isfile(video_path):
print('视频已存在')
else:
self.video_downloader(video_urls[num], video_path, watermark_flag)
print('\n')
print('下载完成!')
def hello(self):
"""
打印欢迎界面
Parameters:
None
Returns:
None
"""
print('*' * 100)
print('\t\t\t\t抖音App视频下载小助手')
print('\t\t作者:Jack Cui、steven7851')
print('*' * 100)
if __name__ == '__main__':
douyin = DouYin()
douyin.run()
================================================
FILE: douyin/fuck-byted-acrawler.js
================================================
// Referer:https://raw.githubusercontent.com/loadchange/amemv-crawler/master/fuck-byted-acrawler.js
function generateSignature(userId) {
this.navigator = {
userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
}
var e = {}
var r = (function () {
function e(e, a, r) {
return (b[e] || (b[e] = t("x,y", "return x " + e + " y")))(r, a)
}
function a(e, a, r) {
return (k[r] || (k[r] = t("x,y", "return new x[y](" + Array(r + 1).join(",x[++y]").substr(1) + ")")))(e, a)
}
function r(e, a, r) {
var n, t, s = {}, b = s.d = r ? r.d + 1 : 0;
for (s["$" + b] = s, t = 0; t < b; t++) s[n = "$" + t] = r[n];
for (t = 0, b = s.length = a.length; t < b; t++) s[t] = a[t];
return c(e, 0, s)
}
function c(t, b, k) {
function u(e) {
v[x++] = e
}
function f() {
return g = t.charCodeAt(b++) - 32, t.substring(b, b += g)
}
function l() {
try {
y = c(t, b, k)
} catch (e) {
h = e, y = l
}
}
for (var h, y, d, g, v = [], x = 0; ;) switch (g = t.charCodeAt(b++) - 32) {
case 1:
u(!v[--x]);
break;
case 4:
v[x++] = f();
break;
case 5:
u(function (e) {
var a = 0, r = e.length;
return function () {
var c = a < r;
return c && u(e[a++]), c
}
}(v[--x]));
break;
case 6:
y = v[--x], u(v[--x](y));
break;
case 8:
if (g = t.charCodeAt(b++) - 32, l(), b += g, g = t.charCodeAt(b++) - 32, y === c) b += g; else if (y !== l) return y;
break;
case 9:
v[x++] = c;
break;
case 10:
u(s(v[--x]));
break;
case 11:
y = v[--x], u(v[--x] + y);
break;
case 12:
for (y = f(), d = [], g = 0; g < y.length; g++) d[g] = y.charCodeAt(g) ^ g + y.length;
u(String.fromCharCode.apply(null, d));
break;
case 13:
y = v[--x], h = delete v[--x][y];
break;
case 14:
v[x++] = t.charCodeAt(b++) - 32;
break;
case 59:
u((g = t.charCodeAt(b++) - 32) ? (y = x, v.slice(x -= g, y)) : []);
break;
case 61:
u(v[--x][t.charCodeAt(b++) - 32]);
break;
case 62:
g = v[--x], k[0] = 65599 * k[0] + k[1].charCodeAt(g) >>> 0;
break;
case 65:
h = v[--x], y = v[--x], v[--x][y] = h;
break;
case 66:
u(e(t[b++], v[--x], v[--x]));
break;
case 67:
y = v[--x], d = v[--x], u((g = v[--x]).x === c ? r(g.y, y, k) : g.apply(d, y));
break;
case 68:
u(e((g = t[b++]) < "<" ? (b--, f()) : g + g, v[--x], v[--x]));
break;
case 70:
u(!1);
break;
case 71:
v[x++] = n;
break;
case 72:
v[x++] = +f();
break;
case 73:
u(parseInt(f(), 36));
break;
case 75:
if (v[--x]) {
b++;
break
}
case 74:
g = t.charCodeAt(b++) - 32 << 16 >> 16, b += g;
break;
case 76:
u(k[t.charCodeAt(b++) - 32]);
break;
case 77:
y = v[--x], u(v[--x][y]);
break;
case 78:
g = t.charCodeAt(b++) - 32, u(a(v, x -= g + 1, g));
break;
case 79:
g = t.charCodeAt(b++) - 32, u(k["$" + g]);
break;
case 81:
h = v[--x], v[--x][f()] = h;
break;
case 82:
u(v[--x][f()]);
break;
case 83:
h = v[--x], k[t.charCodeAt(b++) - 32] = h;
break;
case 84:
v[x++] = !0;
break;
case 85:
v[x++] = void 0;
break;
case 86:
u(v[x - 1]);
break;
case 88:
h = v[--x], y = v[--x], v[x++] = h, v[x++] = y;
break;
case 89:
u(function () {
function e() {
return r(e.y, arguments, k)
}
return e.y = f(), e.x = c, e
}());
break;
case 90:
v[x++] = null;
break;
case 91:
v[x++] = h;
break;
case 93:
h = v[--x];
break;
case 0:
return v[--x];
default:
u((g << 16 >> 16) - 16)
}
}
var n = this, t = n.Function, s = Object.keys || function (e) {
var a = {}, r = 0;
for (var c in e) a[r++] = c;
return a.length = r, a
}, b = {}, k = {};
return r
})()
('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb
================================================
FILE: baiduwenku.py
================================================
# -*- coding:UTF-8 -*-
from selenium import webdriver
from bs4 import BeautifulSoup
import re
import time
if __name__ == '__main__':
options = webdriver.ChromeOptions()
options.add_argument('user-agent="Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19"')
driver = webdriver.Chrome('J:\迅雷下载\chromedriver.exe', chrome_options=options)
driver.get('https://wenku.baidu.com/view/aa31a84bcf84b9d528ea7a2c.html')
html = driver.page_source
bf1 = BeautifulSoup(html, 'lxml')
result = bf1.find_all(class_='rtcspage')
bf2 = BeautifulSoup(str(result[0]), 'lxml')
title = bf2.div.div.h1.string
pagenum = bf2.find_all(class_='size')
pagenum = BeautifulSoup(str(pagenum), 'lxml').span.string
pagepattern = re.compile('页数:(\d+)页')
num = int(pagepattern.findall(pagenum)[0])
print('文章标题:%s' % title)
print('文章页数:%d' % num)
while True:
num = num / 5.0
html = driver.page_source
bf1 = BeautifulSoup(html, 'lxml')
result = bf1.find_all(class_='rtcspage')
for each_result in result:
bf2 = BeautifulSoup(str(each_result), 'lxml')
texts = bf2.find_all('p')
for each_text in texts:
main_body = BeautifulSoup(str(each_text), 'lxml')
for each in main_body.find_all(True):
if each.name == 'span':
print(each.string.replace('\xa0',''),end='')
elif each.name == 'br':
print('')
print('\n')
if num > 1:
page = driver.find_elements_by_xpath("//div[@class='page']")
driver.execute_script('arguments[0].scrollIntoView();', page[-1]) #拖动到可见的元素去
nextpage = driver.find_element_by_xpath("//a[@data-fun='next']")
nextpage.click()
time.sleep(3)
else:
break
================================================
FILE: baiduwenku_pro_1.py
================================================
import requests
import re
import json
import os
session = requests.session()
def fetch_url(url):
return session.get(url).content.decode('gbk')
def get_doc_id(url):
return re.findall('view/(.*).html', url)[0]
def parse_type(content):
return re.findall(r"docType.*?\:.*?\'(.*?)\'\,", content)[0]
def parse_title(content):
return re.findall(r"title.*?\:.*?\'(.*?)\'\,", content)[0]
def parse_doc(content):
result = ''
url_list = re.findall('(https.*?0.json.*?)\\\\x22}', content)
url_list = [addr.replace("\\\\\\/", "/") for addr in url_list]
for url in url_list[:-5]:
content = fetch_url(url)
y = 0
txtlists = re.findall('"c":"(.*?)".*?"y":(.*?),', content)
for item in txtlists:
if not y == item[1]:
y = item[1]
n = '\n'
else:
n = ''
result += n
result += item[0].encode('utf-8').decode('unicode_escape', 'ignore')
return result
def parse_txt(doc_id):
content_url = 'https://wenku.baidu.com/api/doc/getdocinfo?callback=cb&doc_id=' + doc_id
content = fetch_url(content_url)
md5 = re.findall('"md5sum":"(.*?)"', content)[0]
pn = re.findall('"totalPageNum":"(.*?)"', content)[0]
rsign = re.findall('"rsign":"(.*?)"', content)[0]
content_url = 'https://wkretype.bdimg.com/retype/text/' + doc_id + '?rn=' + pn + '&type=txt' + md5 + '&rsign=' + rsign
content = json.loads(fetch_url(content_url))
result = ''
for item in content:
for i in item['parags']:
result += i['c'].replace('\\r', '\r').replace('\\n', '\n')
return result
def parse_other(doc_id):
content_url = "https://wenku.baidu.com/browse/getbcsurl?doc_id=" + doc_id + "&pn=1&rn=99999&type=ppt"
content = fetch_url(content_url)
url_list = re.findall('{"zoom":"(.*?)","page"', content)
url_list = [item.replace("\\", '') for item in url_list]
if not os.path.exists(doc_id):
os.mkdir(doc_id)
for index, url in enumerate(url_list):
content = session.get(url).content
path = os.path.join(doc_id, str(index) + '.jpg')
with open(path, 'wb') as f:
f.write(content)
print("图片保存在" + doc_id + "文件夹")
def save_file(filename, content):
with open(filename, 'w', encoding='utf8') as f:
f.write(content)
print('已保存为:' + filename)
# test_txt_url = 'https://wenku.baidu.com/view/cbb4af8b783e0912a3162a89.html?from=search'
# test_ppt_url = 'https://wenku.baidu.com/view/2b7046e3f78a6529657d5376.html?from=search'
# test_pdf_url = 'https://wenku.baidu.com/view/dd6e15c1227916888586d795.html?from=search'
# test_xls_url = 'https://wenku.baidu.com/view/eb4a5bb7312b3169a551a481.html?from=search'
def main():
url = input('请输入要下载的文库URL地址')
content = fetch_url(url)
doc_id = get_doc_id(url)
type = parse_type(content)
title = parse_title(content)
if type == 'doc':
result = parse_doc(content)
save_file(title + '.txt', result)
elif type == 'txt':
result = parse_txt(doc_id)
save_file(title + '.txt', result)
else:
parse_other(doc_id)
if __name__ == "__main__":
main()
================================================
FILE: baiwan/app.js
================================================
var http = require('http');
var fs = require('fs');
var schedule = require("node-schedule");
var message = {};
var count = 0;
var server = http.createServer(function (req,res){
fs.readFile('./index.html',function(error,data){
res.writeHead(200,{'Content-Type':'text/html'});
res.end(data,'utf-8');
});
}).listen(80);
console.log('Server running!');
var lineReader = require('line-reader');
function messageGet(){
lineReader.eachLine('file.txt', function(line, last) {
count++;
var name = 'line' + count;
console.log(name);
console.log(line);
message[name] = line;
});
if(count == 25){
count = 0;
}
else{
for(var i = count+1; i <= 25; i++){
var name = 'line' + i;
message[name] = 'f';
}
count = 0;
}
}
var io = require('socket.io').listen(server);
var rule = new schedule.RecurrenceRule();
var times = [];
for(var i=1; i<1800; i++){
times.push(i);
}
rule.second = times;
schedule.scheduleJob(rule, function(){
messageGet();
});
io.sockets.on('connection',function(socket){
// console.log('User connected' + count + 'user(s) present');
socket.emit('users',message);
socket.broadcast.emit('users',message);
socket.on('disconnect',function(){
console.log('User disconnected');
//socket.broadcast.emit('users',message);
});
});
================================================
FILE: baiwan/baiwan.py
================================================
# -*-coding:utf-8 -*-
import requests
from lxml import etree
from bs4 import BeautifulSoup
import urllib
import time, re, types, os
"""
代码写的匆忙,本来想再重构下,完善好注释再发,但是比较忙,想想算了,所以自行完善吧!写法很不规范,勿见怪。
作者: Jack Cui
Website:http://cuijiahua.com
注: 本软件仅用于学习交流,请勿用于任何商业用途!
"""
class BaiWan():
def __init__(self):
# 百度知道搜索接口
self.baidu = 'http://zhidao.baidu.com/search?'
# 百万英雄及接口,每个人的接口都不一样,里面包含的手机信息,因此不公布,请自行抓包,有疑问欢迎留言:http://cuijiahua.com/liuyan.html
self.api = 'https://api-spe-ttl.ixigua.com/xxxxxxx={}'.format(int(time.time()*1000))
# 获取答案并解析问题
def get_question(self):
to = True
while to:
list_dir = os.listdir('./')
if 'question.txt' not in list_dir:
fw = open('question.txt', 'w')
fw.write('百万英雄尚未出题请稍后!')
fw.close()
go = True
while go:
req = requests.get(self.api, verify=False)
req.encoding = 'utf-8'
html = req.text
print(html)
if '*' in html:
question_start = html.index('*')
try:
question_end = html.index('?')
except:
question_end = html.index('?')
question = html[question_start:question_end][2:]
if question != None:
fr = open('question.txt', 'r')
text = fr.readline()
fr.close()
if text != question:
print(question)
go = False
with open('question.txt', 'w') as f:
f.write(question)
else:
time.sleep(1)
else:
to = False
else:
to = False
temp = re.findall(r'[\u4e00-\u9fa5a-zA-Z0-9\+\-\*/]', html[question_end+1:])
b_index = []
print(temp)
for index, each in enumerate(temp):
if each == 'B':
b_index.append(index)
elif each == 'P' and (len(temp) - index) <= 3 :
b_index.append(index)
break
if len(b_index) == 4:
a = ''.join(temp[b_index[0] + 1:b_index[1]])
b = ''.join(temp[b_index[1] + 1:b_index[2]])
c = ''.join(temp[b_index[2] + 1:b_index[3]])
alternative_answers = [a,b,c]
if '下列' in question:
question = a + ' ' + b + ' ' + c + ' ' + question.replace('下列', '')
elif '以下' in question:
question = a + ' ' + b + ' ' + c + ' ' + question.replace('以下', '')
else:
alternative_answers = []
# 根据问题和备选答案搜索答案
self.search(question, alternative_answers)
time.sleep(1)
def search(self, question, alternative_answers):
print(question)
print(alternative_answers)
infos = {"word":question}
# 调用百度接口
url = self.baidu + 'lm=0&rn=10&pn=0&fr=search&ie=gbk&' + urllib.parse.urlencode(infos, encoding='GB2312')
print(url)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36',
}
sess = requests.Session()
req = sess.get(url = url, headers=headers, verify=False)
req.encoding = 'gbk'
# print(req.text)
bf = BeautifulSoup(req.text, 'lxml')
answers = bf.find_all('dd',class_='dd answer')
for answer in answers:
print(answer.text)
# 推荐答案
recommend = ''
if alternative_answers != []:
best = []
print('\n')
for answer in answers:
# print(answer.text)
for each_answer in alternative_answers:
if each_answer in answer.text:
best.append(each_answer)
print(each_answer,end=' ')
# print(answer.text)
print('\n')
break
statistics = {}
for each in best:
if each not in statistics.keys():
statistics[each] = 1
else:
statistics[each] += 1
errors = ['没有', '不是', '不对', '不正确','错误','不包括','不包含','不在','错']
error_list = list(map(lambda x: x in question, errors))
print(error_list)
if sum(error_list) >= 1:
for each_answer in alternative_answers:
if each_answer not in statistics.items():
recommend = each_answer
print('推荐答案:', recommend)
break
elif statistics != {}:
recommend = sorted(statistics.items(), key=lambda e:e[1], reverse=True)[0][0]
print('推荐答案:', recommend)
# 写入文件
with open('file.txt', 'w') as f:
f.write('问题:' + question)
f.write('\n')
f.write('*' * 50)
f.write('\n')
if alternative_answers != []:
f.write('选项:')
for i in range(len(alternative_answers)):
f.write(alternative_answers[i])
f.write(' ')
f.write('\n')
f.write('*' * 50)
f.write('\n')
f.write('参考答案:\n')
for answer in answers:
f.write(answer.text)
f.write('\n')
f.write('*' * 50)
f.write('\n')
if recommend != '':
f.write('最终答案请自行斟酌!\t')
f.write('推荐答案:' + sorted(statistics.items(), key=lambda e:e[1], reverse=True)[0][0])
if __name__ == '__main__':
bw = BaiWan()
bw.get_question()
================================================
FILE: baiwan/file.txt
================================================
⣺Ǽ¼
**************************************************
ѡ723 81 101
**************************************************
ο𰸣
Ƽ
81 ÿİһйžգҲСһڡ August 1, anniversary of the founding of the Chinese People's Liberation Army֪Ⱦ뵽http://baike.baidu.com/view/23211.htm
[ϸ]
ãйžĽÿİһգưһڣİһպ
𣺽81գ71ա ÿ81йžգ׳ơһڡ192781գй챱ˣܶ Ҷͦ е쵼£ڽϲװ壬췴Թ...
730
𣺰һǽڣǰһ첻731ô
192781һϲ,йװɵĵһǹ,־йй쵼װʱ,־й͵ӵĵÿİһйž
Դйʱй쵼ϲ塣192781յϲ壬йװɵĵһǹ־йй쵼װʱڣ־й͵ӵĵ 19337£...
Ԫ1181101
𣺰һŽ
201581 ũ ʮ 201681 ũ إ ÿİһйžգҲСһڡ1933711գлάʱίԱ630յĽ飬81...
**************************************************
մã Ƽ𰸣81
================================================
FILE: baiwan/index.html
================================================
百万英雄答题辅助系统
================================================
FILE: baiwan/question.txt
================================================
Ǽ¼
================================================
FILE: bilibili/README.md
================================================
## 功能
下载B站视频和弹幕,将xml原生弹幕转换为ass弹幕文件,支持plotplayer等播放器的弹幕播放。
## 作者
* Website: [http://cuijiahua.com](http://cuijiahua.com "悬停显示")
* Author: Jack Cui
* Date: 2018.6.12
## 更新
* 2018.09.12:添加FFmpeg分段视频合并
## 使用说明
FFmpeg下载,并配置环境变量。http://ffmpeg.org/
python bilibili.py -d 猫 -k 猫 -p 10
三个参数:
-d 保存视频的文件夹名
-k B站搜索的关键字
-p 下载搜索结果前多少页
================================================
FILE: bilibili/bilibili.py
================================================
# -*-coding:utf-8 -*-
# Website: http://cuijiahua.com
# Author: Jack Cui
# Date: 2018.6.9
import requests, json, re, sys, os, urllib, argparse, time
from urllib.request import urlretrieve
from contextlib import closing
from urllib import parse
import xml2ass
class BiliBili:
def __init__(self, dirname, keyword):
self.dn_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Referer': 'https://search.bilibili.com/all?keyword=%s' % parse.quote(keyword)}
self.search_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'application/json, text/plain, */*'}
self.video_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'}
self.danmu_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9'}
self.sess = requests.Session()
self.dir = dirname
def video_downloader(self, video_url, video_name):
"""
视频下载
Parameters:
video_url: 带水印的视频地址
video_name: 视频名
Returns:
无
"""
size = 0
with closing(self.sess.get(video_url, headers=self.dn_headers, stream=True, verify=False)) as response:
chunk_size = 1024
content_size = int(response.headers['content-length'])
if response.status_code == 200:
sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))
video_name = os.path.join(self.dir, video_name)
with open(video_name, 'wb') as file:
for data in response.iter_content(chunk_size = chunk_size):
file.write(data)
size += len(data)
file.flush()
sys.stdout.write(' [下载进度]:%.2f%%' % float(size / content_size * 100) + '\r')
# sys.stdout.flush()
if size / content_size == 1:
print('\n')
else:
print('链接异常')
def search_video(self, search_url):
"""
搜索接口
Parameters:
search_url: 带水印的视频地址
Returns:
titles:视频名列表
arcurls: 视频播放地址列表
"""
req = self.sess.get(url=search_url, headers=self.search_headers, verify=False)
html = json.loads(req.text)
videos = html["data"]['result']
titles = []
arcurls = []
for video in videos:
titles.append(video['title'].replace('','').replace('',''))
arcurls.append(video['arcurl'])
return titles, arcurls
def get_download_url(self, arcurl):
"""
获取视频下载地址
Parameters:
arcurl: 视频播放地址
oid:弹幕地址参数
Returns:
download_url:视频下载地址
"""
req = self.sess.get(url=arcurl, headers=self.video_headers, verify=False)
pattern = '.__playinfo__=(.*)")
tac = _tac_re.search(share_user.text).group(1)
_dytk_re = re.compile(r"dytk\s*:\s*'(.+)'")
dytk = _dytk_re.search(share_user.text).group(1)
_nickname_re = re.compile(r'
||
')
rmtr = re.compile('| || || ')
rmtime1 = re.compile(' .*? ')
rmtime2 = re.compile('')
def replace(self, x):
x = re.sub(self.rma, ' ', x)
x = re.sub(self.rmtb, '---', x)
x = re.sub(self.rmtr, ' ', x)
x = re.sub(self.rmtime1, '\n', x)
x = re.sub(self.rmtime2, '', x)
return x.strip()
def Getgrade(response):
html = response.content
soup = BeautifulSoup(html, 'lxml')
trs = soup.find(id="Datagrid1").findAll("tr")
Grades = []
keys = []
tds = trs[0].findAll("td")
tds = tds[:2] + tds[3:5] + tds[6:9]
for td in tds:
keys.append(td.string)
for tr in trs[1:]:
tds = tr.findAll("td")
tds = tds[:2] + tds[3:5] + tds[6:9]
values = []
for td in tds:
values.append(td.string)
one = dict((key, value) for key, value in zip(keys, values))
Grades.append(one)
return Grades
def Getgradetestresults(trs):
results = []
k = []
for td in trs[0].xpath('.//td/text()'):
k.append(td)
trs = trs[1:]
for tr in trs:
tds = tr.xpath('.//td/text()')
v = []
for td in tds:
v.append(td)
one = dict((i, j) for i, j in zip(k, v))
results.append(one)
return results
class University:
def __init__(self, student, baseurl):
reload(sys)
self.student = student
self.baseurl = baseurl
self.session = requests.session()
self.session.headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
def Login(self):
url = self.baseurl+'/default2.aspx'
res = self.session.get(url)
cont = res.content
selector = etree.HTML(cont)
__VIEWSTATE = selector.xpath('//*[@id="form1"]/input/@value')[0]
imgurl = self.baseurl + '/CheckCode.aspx'
imgres = self.session.get(imgurl, stream=True)
img = imgres.content
with open('code.jpg', 'wb') as f:
f.write(img)
jpg = Image.open('{}/code.jpg'.format(os.getcwd()))
jpg.show()
jpg.close
code = input('输入验证码:')
RadioButtonList1 = u"学生"
data = {
"__VIEWSTATE": __VIEWSTATE,
"txtUserName": self.student.user,
"TextBox1": self.student.pswd,
"TextBox2": self.student.pswd,
"txtSecretCode": code,
"RadioButtonList1": RadioButtonList1,
"Button1": "",
"lbLanguage": ""
}
loginres = self.session.post(url, data=data)
logcont = loginres.text
pattern = re.compile(
'