([^(([^<]*?)|
([^<]*?)
' matchs = re.findall(pattern, data.text) ret = '' for match in matchs: ret += match[0].strip() + match[1].strip() + match[2].strip() return ret.replace('半佛仙人', '').replace('看一看入口已关闭', '') if __name__ == '__main__': for index, msgUrl in enumerate(getAllMsgUrl(start=0)): if index < 557: if index < 217 or index > 226: continue ret = getText(msgUrl) if ret == '': continue print(index, msgUrl) print(ret) with open(os.path.join('banfoText', '{}.txt'.format(index)), 'w+', encoding='utf-8') as f: f.write(ret) print('done!') ================================================ FILE: conf.py ================================================ BackgroundMusic = 'backgroundMusic.mp3' DoutulaButton = 0 BaiduButton = 1 MODELNAME = 'gpt-cpm-small-cn-distill' ================================================ FILE: edit.py ================================================ # -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'edit.ui' # # Created by: PyQt5 UI code generator 5.15.4 # # WARNING: Any manual changes made to this file will be lost when pyuic5 is # run again. Do not edit this file unless you know what you are doing. import os import sys import time from PyQt5 import QtCore, QtWidgets from PyQt5.QtCore import QThread, pyqtSignal, QObject from system_hotkey import SystemHotkey from generation import getPredictText from utils import getTips class getTipsThread(QThread): """ 获取提示线程类 """ signal = pyqtSignal(str) def __init__(self, mText: str) -> None: """ 初始化获取提示线程类 :param mText: 关键词句 """ super().__init__() self.text = mText def __del__(self): self.wait() def run(self): """ 文本生成 :return: None """ ''' # 使用deepai的文本生成服务 text = getTips(self.text) self.signal.emit(text) ''' text = getPredictText(self.text.replace('\n', ''), length=500) self.signal.emit(text) class Ui_MainWindow(QObject): hotkeySign = pyqtSignal() def __init__(self): super().__init__() self.subThread = None self.fileName = None def setupUi(self, MainWindow): MainWindow.resize(862, 579) self.mainWindow = MainWindow self.centralwidget = QtWidgets.QWidget(MainWindow) # 文案输入&编辑框 self.writingEdit = QtWidgets.QPlainTextEdit(self.centralwidget) self.writingEdit.setGeometry(QtCore.QRect(10, 10, 431, 501)) # 给文案提示加个框框 self.groupBox = QtWidgets.QGroupBox(self.centralwidget) self.groupBox.setGeometry(QtCore.QRect(450, 10, 401, 501)) self.groupBox.setCheckable(False) self.groupBox.setTitle('提示') # 文案提示框框 self.tipsEdit = QtWidgets.QPlainTextEdit(self.groupBox) self.tipsEdit.setGeometry(QtCore.QRect(10, 20, 381, 471)) # 文件名 self.FileNameLabel = QtWidgets.QLabel(self.centralwidget) self.FileNameLabel.setGeometry(QtCore.QRect(10, 520, 101, 31)) self.FileNameLabel.setText('文件路径:') self.fileNameEdit = QtWidgets.QLineEdit(self.centralwidget) self.fileNameEdit.setGeometry(QtCore.QRect(90, 520, 351, 31)) # 三个按钮水平分布 self.horizontalLayoutWidget = QtWidgets.QWidget(self.centralwidget) self.horizontalLayoutWidget.setGeometry(QtCore.QRect(450, 514, 401, 41)) self.horizontalLayout = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget) self.horizontalLayout.setContentsMargins(0, 0, 0, 0) self.horizontalLayout.setObjectName('horizontalLayout') # 打开 self.openButton = QtWidgets.QPushButton(self.horizontalLayoutWidget) self.openButton.setText('打开') self.openButton.clicked.connect(self.openFile) self.horizontalLayout.addWidget(self.openButton) # 保存 self.saveButton = QtWidgets.QPushButton(self.horizontalLayoutWidget) self.saveButton.setText('保存') self.saveButton.clicked.connect(self.saveFile) self.horizontalLayout.addWidget(self.saveButton) # 提示 self.tipsButton = QtWidgets.QPushButton(self.horizontalLayoutWidget) self.tipsButton.setText('提示') self.tipsButton.clicked.connect(self.tips) self.horizontalLayout.addWidget(self.tipsButton) # 热键 self.hotkeySign.connect(self.tips) self.F1Hotkey = SystemHotkey() self.F1Hotkey.register(['f1'], callback=lambda x: self.hotkeyEvent()) MainWindow.setCentralWidget(self.centralwidget) QtCore.QMetaObject.connectSlotsByName(MainWindow) def hotkeyEvent(self): self.hotkeySign.emit() def msgBox(self, msg: str, hasQuery: bool = False) -> bool: """ :param msg: 消息 :param hasQuery: 是否带询问 :return: 如果hasQuery为True,返回值为用户是否点击了确定 """ if not hasQuery: QtWidgets.QMessageBox.information(self.mainWindow, '提示', msg, QtWidgets.QMessageBox.Ok, QtWidgets.QMessageBox.Ok) return True reply = QtWidgets.QMessageBox.question(self.mainWindow, '提示', msg, QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.Cancel, QtWidgets.QMessageBox.Cancel) return reply == QtWidgets.QMessageBox.Yes def openFile(self) -> None: """ 打开一个文件 :return: None """ fileName, _ = QtWidgets.QFileDialog.getOpenFileName(self.mainWindow, '选择文案', os.path.join(os.getcwd(), 'writing'), 'Text Files (*.txt)') if not os.path.exists(fileName): self.msgBox('未选择文件!') return with open(fileName, 'r+', encoding='UTF-8') as f: data = f.read() self.writingEdit.appendPlainText(data) self.fileName = fileName self.fileNameEdit.setText(fileName) def saveFile(self) -> None: """ 保存文件 :return: None """ fileName = self.fileNameEdit.text() if os.path.sep not in fileName: if fileName == '': self.msgBox('请先设置文件名!') return if len(fileName) <= 4: self.msgBox('文件名非法!') return fileName = os.path.join('writing', fileName) if not os.path.exists('writing'): os.mkdir('writing') self.fileName = fileName with open(fileName, 'w+', encoding='UTF-8') as f: f.write(self.writingEdit.toPlainText()) self.msgBox('文件已保存在{}'.format(fileName)) def tips(self) -> None: text = self.writingEdit.toPlainText() if text == '': return self.tipsEdit.setPlainText('') if self.subThread is not None: self.subThread.terminate() while self.subThread.isRunning() and not self.subThread.isFinished(): time.sleep(0.1) self.subThread = getTipsThread(self.writingEdit.toPlainText()) self.subThread.signal.connect(self.setTips) self.subThread.start() def setTips(self, text: str) -> None: """ 设置提示 :param text: 提示文本 :return: None """ self.tipsEdit.setPlainText(text) if __name__ == "__main__": app = QtWidgets.QApplication(sys.argv) MainWindow = QtWidgets.QMainWindow() ui = Ui_MainWindow() ui.setupUi(MainWindow) MainWindow.show() sys.exit(app.exec_()) ================================================ FILE: generation.py ================================================ import paddle import paddlenlp from conf import MODELNAME paddle.set_device('gpu') gptModel = paddlenlp.transformers.GPTModel.from_pretrained('models') gptModel = paddlenlp.transformers.GPTForPretraining(gptModel) gptModel.eval() tokenizer = paddlenlp.transformers.GPTChineseTokenizer.from_pretrained(MODELNAME) def getPredictText(text: str, length: int = 200) -> str: """ 生成半佛风格文本 :param text: 前面部分的文本 :param length: 生成文本长度 :return: 生成的文本 """ encodedText = tokenizer(text=text, return_token_type_ids=False) inputIds = paddle.to_tensor(encodedText['input_ids'], dtype='int64').unsqueeze(0) ids, _ = gptModel.generate(input_ids=inputIds, max_length=length, min_length=32, decode_strategy='sampling') ids = ids[0].numpy().tolist() # 使用tokenizer将生成的id转为文本 generatedText = tokenizer.convert_ids_to_string(ids) return generatedText getPredictText('开始预测模型会先初始化一下, 抵消掉这个时间') ================================================ FILE: main.py ================================================ import logging import math import os.path import pickle import random import shutil import threading import time from PyQt5 import QtWidgets from PyQt5.QtCore import QThread, pyqtSignal from PyQt5.QtGui import QDropEvent, QDragEnterEvent, QStandardItem import mainWindow from PyQt5.QtWidgets import QApplication, QDialog from moviepy.editor import * from conf import BackgroundMusic, BaiduButton, DoutulaButton from utils import getUuid, getBaiDuAudio, getBaiduImgPath, getDoutulaImgPath, resizeImg, convertToRGB class genVideoThread(QThread): """ 生成视频线程类 """ signal = pyqtSignal(str) def __init__(self, sections: list, materialName: str, fileName: str) -> None: """ 初始化几个参数 :param sections: 字幕以及图片信息 :param materialName: 素材路径 :param fileName: 保存视频名称 """ super().__init__() self.sections = sections self.materialName = materialName self.fileName = fileName def __del__(self): self.wait() def run(self): screensize = (800, 600) videoClips = [] for i, section in enumerate(self.sections): imgPath, text = section[0], section[1] if len(text) >= 1: mark = text[-1] if mark == '$': text = text[:-1] text = text.split(r'\\') text = '\\'.join([x.replace(r'\n', '\n') for x in text]) text = text.strip().split('\n') if imgPath is None: imgPath = 'background.png' # gif用到,用于标记当前字幕对应的gif从哪儿开始 index = 0 print(text, imgPath) if imgPath.endswith('.gif'): # 首先计算一下当前所有语音时间长度 clip = VideoFileClip(imgPath) clip = clip.loop() else: convertToRGB(imgPath) clip = ImageClip(imgPath) # 设置一下图片/gif大小 if mark != '$': width, height = clip.size width, height = resizeImg(width, height) clip = clip.resize((width, height)) # 考虑到每张表情包可能对应多句字幕 for txt in text: # 合成语音 txtAudio = getBaiDuAudio(txt, os.path.join(self.materialName, 'audio')) if len(txt) < 12: fontsize = 50 else: fontsize = 40 txtClip = TextClip(txt, color='white', font='STKaiti', kerning=5, fontsize=fontsize, align='South') if txtAudio is None: logging.error('get the audio of {} failed!'.format(txt)) continue txtAudio = AudioFileClip(txtAudio) # 表情包视频与字幕融合 cvc = CompositeVideoClip([clip.set_position(('center', 'center')).subclip(index, txtAudio.duration), txtClip.set_position(('center', 0.85), relative=True)], size=screensize) cvc = cvc.subclip(0, txtAudio.duration) index += txtAudio.duration # 添加配音 cvc = cvc.set_audio(txtAudio) videoClips.append(cvc) self.signal.emit('进度: {}%'.format(math.ceil((i * 80) / len(self.sections)))) finalClip = concatenate_videoclips(videoClips) # 获取原视频声音 audio = finalClip.audio # 整体背景音乐 audioClip = AudioFileClip(BackgroundMusic) if audioClip.duration > finalClip.duration: audioClip = audioClip.subclip(0, audio.duration) elif audioClip.duration < finalClip.duration: audioClip = afx.audio_loop(audioClip, duration=audio.duration) audioClip = afx.volumex(audioClip, factor=0.35) self.signal.emit('进度: {}%'.format(math.ceil(i * 80 / len(self.sections)) + random.randint(5, 15))) # 声音结合起来 audio = CompositeAudioClip([audio, audioClip]) finalClip = finalClip.set_audio(audio) fileName = os.path.join('out', self.fileName) self.signal.emit('进度: {}'.format('处理完成! 正在写出文件...')) finalClip.write_videofile(fileName, fps=25, codec='mpeg4') self.signal.emit(fileName) class addImgThread(QThread): """ 获取网络表情包线程类 """ signal = pyqtSignal(str) def __init__(self, mText: str, mButton: int) -> None: """ 初始化获取网络表情包线程类 :param mText: 搜索关键词 :param mButton: 采用哪个搜索引擎 """ super().__init__() self.text = mText self.button = mButton def __del__(self): self.wait() def run(self): if self.button == BaiduButton: for path in getBaiduImgPath(self.text): self.signal.emit(path) if self.button == DoutulaButton: for path in getDoutulaImgPath(self.text): self.signal.emit(path) class MainDialog(QDialog): def __init__(self, parent=None): super(QDialog, self).__init__(parent) # 当前指向的句子 self.nowPos = None # 获取表情包线程句柄 self.subThread = None # 已经设定好的句子以及表情包,三元组(图片路径, 文案, 时间戳),时间戳一旦生成不再修改,主要用于区分文案并生成对应的图片路径 self.sections = [] # 接收拖放对象 self.setAcceptDrops(True) self.fileName = None self.materialName = None # lock of save and read the bfs file self.lock = threading.Lock() self.ui = mainWindow.Ui_MainWindow() self.ui.setupUi(self) # start save thread self.saveThread = threading.Thread(target=self.save) self.saveThread.start() def save(self) -> None: """ 每隔5秒保存一次工程信息 :return: None """ time.sleep(5) # 窗口存在则一直保存 while self.ui.windowIsVisible(): self.lock.acquire() if len(self.sections) <= 0: self.lock.release() time.sleep(5) continue data = dict() data['nowPos'] = self.nowPos data['sections'] = self.sections data['fileName'] = self.fileName data['materialName'] = self.materialName fileName = os.path.join(self.materialName, self.fileName[:self.fileName.rfind('.')] + '.bfs') with open(fileName, 'wb') as f: pickle.dump(data, f) self.lock.release() time.sleep(5) def dragEnterEvent(self, event: QDragEnterEvent) -> None: """ 拖放事件 :param event: QDragEnterEvent :return: None """ if event.mimeData().hasText(): event.accept() else: event.ignore() def dropEvent(self, event: QDropEvent) -> None: """ 拖放事件,主要处理.bfs工程文件, .txt文案文件, .gif/.png等图片文件 :param event: 拖放对象事件 :return: None """ filePathList = event.mimeData().text() filePath = filePathList.split('\n')[0].replace('file:///', '', 1) # 说明是加载的工程文件 if filePath.endswith('.bfs'): self.loadBfs(filePath) return # 说明加载的是文案文件 if filePath.endswith('.txt'): if self.fileName is None: self.ui.msgBox('请先设置工程目录!') return self.loadText(True, filePath) return # 其他情况应该是加载的图片文件,先判断是不是gif/图片 if not filePath.endswith('.gif'): # 通过加载文件来判断是否为图片,不是则返回 try: ImageClip(filePath) except: return self.loadPic(filePath) def loadBfs(self, filePath: str) -> None: """ 加载工程文件 :param filePath: 工程文件路径 :return: None """ if len(self.sections) > 0: if not self.ui.msgBox('导入工程文件将清空当前工作内容,可能导致部分内容丢失,是否继续?', True): return # 需要在material目录下有对应的文件夹,没有的禁止载入 fileName = os.path.basename(filePath).replace('.bfs', '') if not os.path.exists(os.path.join('material', fileName)): self.ui.msgBox('未找到对应的素材文件夹!') return with open(filePath, 'rb') as f: data = pickle.load(f) self.lock.acquire() self.nowPos = data['nowPos'] self.sections = data['sections'] # 检查资源文件是否都存在 for section in self.sections: if section[0] is not None and not os.path.exists(section[0]): self.nowPos = None self.sections = [] self.ui.msgBox('对应的素材缺失!') self.lock.release() return self.fileName = data['fileName'] self.ui.setFileName(self.fileName) self.materialName = data['materialName'] # 将sections的内容填充到表格 self.ui.delAllRow() for section in self.sections: self.ui.addRow(section[1]) if self.nowPos is not None: self.ui.setSubTileText(self.sections[self.nowPos][1]) self.ui.setSearchText(self.sections[self.nowPos][1]) imgPath = self.sections[self.nowPos][0] if imgPath is None: self.ui.delVideoImg() else: self.ui.changeVideoImg(imgPath) else: self.ui.delVideoImg() self.lock.release() def loadPic(self, imgPath: str) -> None: """ 加载拖放进来的图片文件 :param imgPath: 图片文件路径 :return: None """ if self.nowPos is None: # 没有工程内容则忽略该次拖入文件 self.ui.msgBox('请先设置字幕内容!') return self.ui.changeVideoImg(imgPath) imgBaseName = os.path.basename(imgPath) suffix = imgBaseName[imgBaseName.rfind('.') + 1:] uuid = self.sections[self.nowPos][2] newPath = '{}.{}'.format(uuid, suffix) newPath = os.path.join(os.path.join(self.materialName, 'img'), newPath) shutil.copyfile(imgPath, newPath) # 保存图片与字幕信息 self.sections[self.nowPos] = [newPath, self.ui.getSubtitle(), uuid] self.ui.setRowText(self.nowPos, self.ui.getSubtitle()) def setFilename(self) -> None: """ 设置文件名 :return: None """ # 加锁,禁止保存或者载入工程文件 self.lock.acquire() fileName = self.ui.getFileName() materialName = os.path.join('material', fileName[:fileName.rfind('.')]) # 当前工作区没内容,说明是新建的工程,新建的工程的名字不能和之前重复 if len(self.sections) <= 0 and os.path.exists(materialName): self.ui.msgBox('文件名已存在,请更换名字或加载之前缓存!') self.lock.release() return self.fileName = fileName # 文件名并未更改,可能是未修改或者只修改了后缀名,都可以忽略 if self.materialName == materialName: self.ui.msgBox('设置工程文件夹成功!') self.lock.release() return # 当前工作区无内容,说明是新建的工程,需要新建文件夹 if len(self.sections) <= 0: os.makedirs(os.path.join(materialName, 'audio')) os.makedirs(os.path.join(materialName, 'img')) self.ui.msgBox('新建工程文件夹成功!') else: # 当前工作区有内容,说明工程已经存在,需要对所有数据进行重命名 self.changeFileName(self.materialName, materialName) self.ui.msgBox('重命名工程文件夹成功!') self.materialName = materialName self.lock.release() def changeFileName(self, oldMaterialName: str, materialName: str) -> None: """ 修改了文件名需要对文件夹等全部进行修改 :param oldMaterialName: 旧的素材文件夹名 :param materialName: 新的素材文件夹名 :return: """ # 对sections里面包含的图片信息的地址进行修改 for section in self.sections: if section[0] is None: continue imgBaseName = os.path.basename(section[0]) newPath = os.path.join(os.path.join(materialName, 'img'), imgBaseName) section[0] = newPath # 更改文件夹名字 os.rename(oldMaterialName, materialName) def setSubtitleInfo(self) -> None: """ 设置上一句/下一句对应的字幕以及图片信息 :return: None """ self.ui.setSubTileText(self.sections[self.nowPos][1]) self.ui.setSearchText(self.sections[self.nowPos][1]) # 如果已经设置好了表情包,显示出来;否则就清空 imgPath = self.sections[self.nowPos][0] if imgPath is None: self.ui.delVideoImg() else: self.ui.changeVideoImg(imgPath) def last(self) -> None: """ 上一句,加载上一句字幕与图片 :return: None """ if len(self.sections) <= 0: self.ui.msgBox('当前工作区暂无内容!') return # 计算当前应该到达的光标 if self.nowPos is None: self.nowPos = 1 else: # 保存下一句的字幕信息 self.sections[self.nowPos][1] = self.ui.getSubtitle() self.ui.setRowText(self.nowPos, self.ui.getSubtitle()) self.nowPos = self.nowPos - 1 if self.nowPos > 0 else len(self.sections) - 1 self.setSubtitleInfo() def next(self) -> None: """ 下一句,加载下一句字幕与图片 :return: None """ if len(self.sections) <= 0: self.ui.msgBox('当前工作区暂无内容!') return if self.nowPos is None: self.nowPos = -1 else: # 保存上一句的字幕信息 self.sections[self.nowPos][1] = self.ui.getSubtitle() self.ui.setRowText(self.nowPos, self.ui.getSubtitle()) self.nowPos = self.nowPos + 1 if self.nowPos < len(self.sections) - 1 else 0 self.setSubtitleInfo() def changeThePicText(self, text: str) -> None: """ 视频字幕实时更改 :param text: 字幕信息 :return: None """ if len(self.sections) <= 0: return self.ui.setVideoText(text) self.sections[self.nowPos][1] = text self.ui.setRowText(self.nowPos, text) def previewImg(self, path: str) -> None: """ 将网络表情包加载预览以供选择 :param path: 网络表情包路径 :return: None """ self.ui.addImg(path) def search(self, button: int) -> None: """ 搜索表情包,button代表了不同的搜索引擎 :param button: 来自哪个按钮,代表了不同的搜索引擎 :return: None """ if len(self.sections) <= 0 or self.ui.getSearchText() == '': self.ui.msgBox('工作区暂无内容或未输入搜索文字!') return if self.subThread is not None: self.subThread.terminate() while self.subThread.isRunning() and not self.subThread.isFinished(): time.sleep(0.1) # 清空当前的所有表情包图片 self.ui.delImg() self.subThread = addImgThread(self.ui.getSearchText(), button) self.subThread.signal.connect(self.previewImg) self.subThread.start() def imgClicked(self, index: int) -> None: """ 表情包点击回调函数,将选好的表情包加载到视频预览区 :param index: 选好的表情包索引 :return: None """ if len(self.sections) <= 0: self.ui.msgBox('工作区无内容!') return if self.nowPos is None: self.ui.msgBox('暂未选择文案与字幕!') return imgPath = self.ui.getImgPathByIndex(index) self.ui.changeVideoImg(imgPath) # 将表情包复制到material目录 imgBaseName = os.path.basename(imgPath) suffix = imgBaseName[imgBaseName.rfind('.') + 1:] uuid = self.sections[self.nowPos][2] newPath = '{}.{}'.format(uuid, suffix) newPath = os.path.join(os.path.join(self.materialName, 'img'), newPath) shutil.copyfile(imgPath, newPath) # 保存图片与字幕信息 self.sections[self.nowPos] = [newPath, self.ui.getSubtitle(), uuid] self.ui.setRowText(self.nowPos, self.ui.getSubtitle()) def genVideo(self) -> None: """ 开始生成视频 :return: None """ if len(self.sections) <= 0: self.ui.msgBox('工作区无内容!') return self.setDisabled(True) self.gvt = genVideoThread(self.sections, self.materialName, self.fileName) self.gvt.signal.connect(self.genVideoFinished) self.gvt.start() def genVideoFinished(self, msg: str) -> None: """ 视频生成完成回调函数 :param msg: 生成视频传回的信息。主要有两种,第一是进度,第二是输出的视频的路径 :return: None """ if ': ' in msg: self.ui.setGenVideoText(msg) return self.setDisabled(False) self.ui.setGenVideoText('生成视频') self.ui.msgBox('生成完毕!位置:{}'.format(msg)) def loadText(self, drag=False, fileName: str=None) -> None: """ 文件浏览器回调函数/同时支持拖放导入文案解析 :param drag: 是否是拖放导入的 :param fileName: 拖放进来的文件名 :return: None """ if self.fileName is None: self.ui.msgBox('请先设置工程目录!') return if len(self.sections) > 0 and not self.ui.msgBox('当前导入会覆盖工作区内容,不可撤销!是否继续?', True): return self.ui.delAllRow() if not drag: fileName, _ = QtWidgets.QFileDialog.getOpenFileName(self, '选择文案', os.getcwd(), 'Text Files (*.txt)') if not os.path.exists(fileName): self.ui.msgBox('未选择文件!') return with open(fileName, 'r', encoding='utf-8') as f: data = f.read() self.sections = list() for text in data.split('\n'): if text == '': continue text = text.strip() self.ui.addRow(text) self.sections.append([None, text, getUuid()]) self.ui.msgBox('导入完成!') def addFrontText(self) -> None: """ 在当前选中的表格单元前面增加一行空白行 :return: None """ if len(self.sections) <= 0: if self.fileName is not None: index = 0 else: self.ui.msgBox('请设置文件名后再添加!') return else: index = self.ui.getCurrentSelected() if index == -1: self.ui.msgBox('未选中表格!') return if self.nowPos is not None and self.nowPos >= index: self.nowPos += 1 self.ui.insertRow(index) self.sections.insert(index, [None, '', getUuid()]) def addBehindText(self) -> None: """ 在当前选中的表格单元后面增加一行空白行 :return: None """ if len(self.sections) <= 0: if self.fileName is not None: index = -1 else: self.ui.msgBox('请设置文件名后再添加!') return else: index = self.ui.getCurrentSelected() if index == -1: self.ui.msgBox('未选中表格!') return if self.nowPos is not None and self.nowPos > index: self.nowPos += 1 self.ui.insertRow(index + 1) self.sections.insert(index + 1, [None, '', getUuid()]) def delText(self) -> None: """ 删除该行 :return: None """ if len(self.sections) <= 0: self.ui.msgBox('请先创建工程或输入文案!') return index = self.ui.getCurrentSelected() if index == -1: self.ui.msgBox('未选中表格!') return # 如果删除了当前预览位置的文案 if self.nowPos is not None and self.nowPos == index: # 加载下一句文案 self.next() # 如果在当前位置之前删除了一个文案,那么当前位置-1 if self.nowPos is not None and self.nowPos >= index: self.nowPos -= 1 self.ui.delRow(index) del self.sections[index] def exportText(self) -> None: """ 导出文案内容 :return: None """ if len(self.sections) <= 0: self.ui.msgBox('当前工作区没有内容!') return texts = '' for section in self.sections: texts += section[1] + '\n' path = os.path.join(self.materialName, 'work.txt') with open(path, 'w+') as f: f.write(texts) self.ui.msgBox('导出文案成功!位置:{}'.format(path)) def tableItemChange(self, item: QStandardItem) -> None: """ 文案内容被修改时,同步到sections :param item: 被修改的表格item :return: """ if len(self.sections) > item.row(): self.sections[item.row()][1] = item.text() # 如果当前单句字幕正好是修改的单元格部分,则修改的内容同步修改到单句字幕编辑处. 需要排除由于单句字幕修改造成的表格修改的情况 if self.nowPos is not None and self.nowPos == item.row() and self.ui.getSubtitle() != item.text(): self.ui.setSubTileText(item.text()) self.ui.setSearchText(item.text()) def jumpToIndex(self) -> None: """ 跳转到指定的文案 :return: None """ if self.nowPos is not None and self.ui.getCurrentSelected() != -1: # 保存上一句的字幕信息 self.sections[self.nowPos][1] = self.ui.getSubtitle() self.ui.setRowText(self.nowPos, self.ui.getSubtitle()) self.nowPos = self.ui.getCurrentSelected() self.setSubtitleInfo() if __name__ == '__main__': if os.path.exists('tmp'): shutil.rmtree('tmp') os.mkdir('tmp') if not os.path.exists('out'): os.mkdir('out') myapp = QApplication(sys.argv) myDlg = MainDialog() myDlg.show() sys.exit(myapp.exec_()) ================================================ FILE: mainWindow.py ================================================ # -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'mainWindow.ui' # # Created by: PyQt5 UI code generator 5.15.4 # # WARNING: Any manual changes made to this file will be lost when pyuic5 is # run again. Do not edit this file unless you know what you are doing. import math import os.path import shutil import time from PyQt5 import QtCore, QtGui, QtWidgets from PyQt5.QtGui import QMovie, QStandardItemModel, QStandardItem from moviepy.video.VideoClip import ImageClip from moviepy.video.io.VideoFileClip import VideoFileClip from conf import DoutulaButton, BaiduButton from utils import resizeImg class clickedButton(QtWidgets.QPushButton): """ 搜索表情包按钮事件,主要可以区分哪个按钮 """ clicked = QtCore.pyqtSignal(int) def __init__(self, button, parent=None): super(clickedButton, self).__init__(parent) self.button = button def mouseReleaseEvent(self, QMouseEvent): self.clicked.emit(self.button) class clickedLabel(QtWidgets.QLabel): """ 标签类,主要增加了点击事件,用于图片缓存区,用户点击了图片后可知道点击了哪张图 """ clicked = QtCore.pyqtSignal(int) def __init__(self, index: int, parent=None): """ :param index: 图片索引 :param parent: """ super(clickedLabel, self).__init__(parent) self.index = index def mouseReleaseEvent(self, QMouseEvent): self.clicked.emit(self.index) class Ui_MainWindow(object): def __init__(self): # 所有缓存的图片 self.img = [] # 当前视频预览区gif self.gif = None # 当前视频预览区图片 self.videoImg = None def setupUi(self, MainWindow): # 初始设置窗口信息 MainWindow.setObjectName('MainWindow') MainWindow.resize(1890, 702) MainWindow.setLayoutDirection(QtCore.Qt.LeftToRight) MainWindow.setWindowTitle('半佛风格视频生成') self.mainWindow = MainWindow self.centralwidget = QtWidgets.QWidget(MainWindow) # 输入文件名标签 self.fileLabel = QtWidgets.QLabel(self.centralwidget) self.fileLabel.setGeometry(QtCore.QRect(10, 23, 121, 16)) self.fileLabel.setText('导出视频名称: ') # 文件名 self.filenName = QtWidgets.QLineEdit(self.centralwidget) self.filenName.setGeometry(QtCore.QRect(140, 16, 361, 31)) self.filenName.setText('{}.mp4'.format(int(time.time()))) # 输入文件名确定按钮 self.filenameButton = QtWidgets.QPushButton(self.centralwidget) self.filenameButton.setGeometry(QtCore.QRect(501, 16, 60, 31)) self.filenameButton.setText('设置') self.filenameButton.clicked.connect(MainWindow.setFilename) # 给输入框加个分组box self.groupBox = QtWidgets.QGroupBox(self.centralwidget) self.groupBox.setGeometry(QtCore.QRect(0, 53, 561, 618)) self.groupBox.setTitle('在此输入/编辑文案') # 文案以表格的形式展示 self.model = QStandardItemModel(0, 0) self.model.itemChanged.connect(MainWindow.tableItemChange) # 设置水平方向四个头标签文本内容 self.model.setHorizontalHeaderLabels(['文案&字幕']) self.row = 0 self.tableView = QtWidgets.QTableView(self.groupBox) self.tableView.setGeometry(QtCore.QRect(10, 20, 541, 542)) self.tableView.setShowGrid(True) self.tableView.setModel(self.model) self.tableView.horizontalHeader().setStretchLastSection(True) self.tableView.horizontalHeader().setSectionResizeMode(QtWidgets.QHeaderView.Stretch) self.tableView.clicked.connect(MainWindow.jumpToIndex) # 打开文案/前增一句/后增一句/删除一句/修改一句/导出文案按钮布局 self.tableButtonWidget = QtWidgets.QWidget(self.groupBox) self.tableButtonWidget.setGeometry(QtCore.QRect(10, 561, 541, 55)) # 水平分布 self.hbox = QtWidgets.QHBoxLayout() self.hbox.setGeometry(QtCore.QRect()) self.hbox.setContentsMargins(0, 0, 0, 0) # 打开文案 self.openText = QtWidgets.QPushButton() self.openText.setText('打开') self.openText.clicked.connect(MainWindow.loadText) self.hbox.addWidget(self.openText) # 在前面增加一句 self.addFrontText = QtWidgets.QPushButton() self.addFrontText.setText('前增一句') self.addFrontText.clicked.connect(MainWindow.addFrontText) self.hbox.addWidget(self.addFrontText) # 在后面增加一句 self.addBehindText = QtWidgets.QPushButton() self.addBehindText.setText('后增一句') self.addBehindText.clicked.connect(MainWindow.addBehindText) self.hbox.addWidget(self.addBehindText) # 删除该句 self.delText = QtWidgets.QPushButton() self.delText.setText('删除该句') self.delText.clicked.connect(MainWindow.delText) self.hbox.addWidget(self.delText) # 导出文案 self.exportText = QtWidgets.QPushButton() self.exportText.setText('导出文案') self.exportText.clicked.connect(MainWindow.exportText) self.hbox.addWidget(self.exportText) self.tableButtonWidget.setLayout(self.hbox) # 单句字幕 self.singleText = QtWidgets.QLineEdit(self.centralwidget) self.singleText.setGeometry(QtCore.QRect(660, 620, 611, 50)) self.singleText.textChanged.connect(MainWindow.changeThePicText) # 搜索框文本 self.searchText = QtWidgets.QLineEdit(self.centralwidget) self.searchText.setGeometry(QtCore.QRect(1360, 17, 421, 41)) # 斗图啦/百度搜索按钮 self.searchDou = clickedButton(DoutulaButton, self.centralwidget) self.searchDou.setGeometry(QtCore.QRect(1785, 17, 45, 41)) self.searchDou.setText('斗图') self.searchDou.clicked.connect(MainWindow.search) self.searchBai = clickedButton(BaiduButton, self.centralwidget) self.searchBai.setGeometry(QtCore.QRect(1834, 17, 45, 41)) self.searchBai.setText('百度') self.searchBai.clicked.connect(MainWindow.search) # 视频背景图 self.videoBackgroud = QtWidgets.QLabel(self.centralwidget) self.videoBackgroud.setGeometry(QtCore.QRect(560, 16, 800, 600)) self.videoBackgroud.setPixmap(QtGui.QPixmap('background.png')) self.videoBackgroud.setScaledContents(True) # 视频字幕 self.addSubtitleLayout() # 上一句按钮 self.last = QtWidgets.QPushButton(self.centralwidget) self.last.setGeometry(QtCore.QRect(560, 620, 101, 51)) self.last.setText('上一句') self.last.clicked.connect(MainWindow.last) # 下一句按钮 self.next = QtWidgets.QPushButton(self.centralwidget) self.next.setGeometry(QtCore.QRect(1270, 620, 91, 51)) self.next.setText('下一句') self.next.clicked.connect(MainWindow.next) # 生成视频按钮 self.genVideo = QtWidgets.QPushButton(self.centralwidget) self.genVideo.setGeometry(QtCore.QRect(1360, 620, 521, 51)) self.genVideo.setText('生成视频') self.genVideo.clicked.connect(MainWindow.genVideo) # 表情包图片位置布局 self.widget = QtWidgets.QWidget(self.centralwidget) self.widget.setGeometry(QtCore.QRect(1349, 48, 540, 579)) self.topFiller = QtWidgets.QWidget() self.scroll = QtWidgets.QScrollArea() self.scroll.setWidget(self.topFiller) self.vbox = QtWidgets.QVBoxLayout() self.vbox.addWidget(self.scroll) self.widget.setLayout(self.vbox) QtCore.QMetaObject.connectSlotsByName(MainWindow) def addSubtitleLayout(self) -> None: """ 视频字幕设置布局 :return: """ # 视频字幕 self.videoText = QtWidgets.QLabel(self.centralwidget) self.videoText.setGeometry(QtCore.QRect(560, 526, 801, 51)) # 视频字幕字体颜色 palette = QtGui.QPalette() brush = QtGui.QBrush(QtGui.QColor(255, 255, 255)) brush.setStyle(QtCore.Qt.SolidPattern) palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.WindowText, brush) brush = QtGui.QBrush(QtGui.QColor(255, 255, 255)) brush.setStyle(QtCore.Qt.SolidPattern) palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.WindowText, brush) brush = QtGui.QBrush(QtGui.QColor(120, 120, 120)) brush.setStyle(QtCore.Qt.SolidPattern) palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.WindowText, brush) self.videoText.setPalette(palette) # 视频字幕字体设置 font = QtGui.QFont() font.setFamily('华文楷体') font.setPointSize(20) self.videoText.setFont(font) self.videoText.setTextFormat(QtCore.Qt.PlainText) self.videoText.setScaledContents(False) self.videoText.setAlignment(QtCore.Qt.AlignCenter) self.videoText.setWordWrap(False) def delVideoImg(self) -> None: """ 清空视频预览区图片 :return: None """ if self.videoImg is not None: self.videoImg.deleteLater() self.videoImg = None if self.gif is not None: self.gif.deleteLater() self.gif = None def getResizedOfVideoImg(self, path) -> (int, int): """ 获取路径图片重新调整过的大小 :param path: 图片路径 :return: (width, height) """ try: clip = VideoFileClip(path) except: clip = ImageClip(path) width, height = clip.size return resizeImg(width, height) def changeVideoImg(self, path: str) -> None: """ 加载图片到视频预览区 :param path: 图片路径 :return: None """ # 如果图片不是在tmp目录保存,则转到tmp目录再加载,不然重命名工程会有问题 if not path.startswith('tmp' + os.path.sep): newPath = os.path.join('tmp', os.path.basename(path)) if not os.path.exists(newPath): shutil.copyfile(path, newPath) path = newPath self.delVideoImg() self.videoImg = QtWidgets.QLabel(self.centralwidget) wight, height = self.getResizedOfVideoImg(path) self.videoImg.setGeometry(QtCore.QRect(int(960 - wight / 2), int(316 - height / 2), wight, height)) self.videoImg.setScaledContents(True) self.gif = QMovie(path) self.videoImg.setMovie(self.gif) self.videoImg.setAlignment(QtCore.Qt.AlignCenter) self.gif.start() self.videoImg.show() self.centralwidget.show() def addImg(self, path: str) -> None: """ 给图片缓存区加一张图片 :param path: 图片路径 :return: None """ # 计算图片框位置 row = math.ceil((len(self.img) + 1) / 3) - 1 col = len(self.img) % 3 img_label = clickedLabel(len(self.img), self.topFiller) img_label.setGeometry(QtCore.QRect(1370, 60, 151, 151)) gif = QMovie(path) img_label.setMovie(gif) img_label.setScaledContents(True) gif.start() img_label.move(col * (151 + 10) + 10, row * (151 + 10) + 10) self.img.append((img_label, gif, path)) self.topFiller.setMinimumSize(490, (row + 1) * (151 + 10)) self.scroll.setWidget(self.topFiller) img_label.clicked.connect(self.mainWindow.imgClicked) img_label.show() self.topFiller.show() self.widget.show() def delImg(self) -> None: """ 清空从网络获取的所有的表情包 :return: None """ for imgLabel, gif, _ in self.img: imgLabel.deleteLater() gif.deleteLater() self.img = [] self.widget.show() def getImgPathByIndex(self, index: int) -> str: """ 通过表情包缓存区的索引获取表情包的实际路径 :param index: 表情包缓存区的索引 :return: 表情包的实际位置 """ return self.img[index][2] def addRow(self, text: str) -> None: """ 向表中添加一行新的数据 :param text: :return: """ item = QStandardItem(text) self.model.setItem(self.row, 0, item) self.row += 1 def delRow(self, row: int) -> None: """ 删除一行表中的数据 :param row: 删除索引 :return: """ self.model.removeRow(row) def delAllRow(self) -> None: """ 删除所有行 :return: """ for index in range(self.model.rowCount())[::-1]: self.delRow(index) def msgBox(self, msg: str, hasQuery: bool = False) -> bool: """ :param msg: 消息 :param hasQuery: 是否带询问 :return: 如果hasQuery为True,返回值为用户是否点击了确定 """ if not hasQuery: QtWidgets.QMessageBox.information(self.mainWindow, '提示', msg, QtWidgets.QMessageBox.Ok, QtWidgets.QMessageBox.Ok) return True reply = QtWidgets.QMessageBox.question(self.mainWindow, '提示', msg, QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.Cancel, QtWidgets.QMessageBox.Cancel) return reply == QtWidgets.QMessageBox.Yes def getCurrentSelected(self) -> int: """ 获取当前选中的表格单元索引 :return: 前选中的表格单元索引 """ selectedIndex = self.tableView.selectedIndexes() if len(selectedIndex) > 0: return selectedIndex[0].row() return -1 def insertRow(self, index: int) -> None: """ 增加一行空白行 :index: 增加的位置索引 :return: None """ self.model.insertRow(index) def getSubtitle(self) -> str: """ 获取当前编辑的字幕信息 :return: 前编辑的字幕 """ return self.singleText.text() def getSearchText(self) -> str: """ 获取搜索框文本信息 :return: 搜索框文本信息 """ return self.searchText.text() def setVideoText(self, text: str) -> None: """ 设置视频预览区字幕信息 :param text: 字幕信息 :return: None """ self.videoText.setText(text) def setFileName(self, fileName: str) -> None: """ 设置文件名 :param fileName: 文件名 :return: None """ self.filenName.setText(fileName) def windowIsVisible(self) -> bool: """ 当前窗口是否可见,即是否被关闭 :return: 返回当前窗口是否可见,即是否被关闭 """ return self.centralwidget.isVisible() def setSubTileText(self, text: str) -> None: """ 设置字幕编辑框内容 :param text: 字幕 :return: None """ self.singleText.setText(text) self.singleText.home(False) def setSearchText(self, text: str) -> None: """ 设置搜索框内容 :param text: 搜索文字 :return: None """ self.searchText.setText(text) self.searchText.home(False) def getFileName(self) -> str: """ 获取文件名 :return: 文件名 """ return self.filenName.text() def setRowText(self, row: int, text: str) -> None: """ 设置某一行的文案信息 :param row: 行索引 :param text: 文本 :return: """ item = self.model.item(row, 0) if item is not None: item.setText(text) def subtitleHasFocus(self) -> bool: """ 单句字幕编辑框是否有焦点 :return: 单句字幕编辑框是否有焦点 """ return self.singleText.hasFocus() def setGenVideoText(self, text: str) -> None: """ 设置生成视频按钮的文本 :param text: 文本 :return: None """ self.genVideo.setText(text) ================================================ FILE: preprocess.py ================================================ import os import pickle import paddlenlp # 加载tokeniezer from conf import MODELNAME tokenizer = paddlenlp.transformers.GPTChineseTokenizer.from_pretrained(MODELNAME) trainData = [] # 处理所有的公众号文章 for index, path in enumerate(os.listdir('banfoText')): if not path.endswith('.txt'): continue print(index, path) with open(os.path.join('banfoText', path), 'r+', encoding='utf-8') as f: data = f.read() data = tokenizer(text=data, return_token_type_ids=False) data = data['input_ids'] start = -30 lenght = 100 step = 30 if len(data) <= 2 * lenght: continue # 滑动窗口截断获取inputData和label while start + step + 1 < len(data) and start + step + lenght + 1 < len(data): start = start + step input_data = data[start: start + lenght] label = data[start + 1: start + lenght + 1] trainData.append([input_data, label]) trainData.append([data[-lenght-1: -1], data[-lenght:]]) if not os.path.exists('preprocessData'): os.mkdir('preprocessData') with open(os.path.join('preprocessData', 'trainData.pkl'), 'wb') as f: pickle.dump(trainData, f) print(len(trainData)) print('done!') ================================================ FILE: requirements.txt ================================================ moviepy==1.0.3 requests~=2.24.0 numpy==1.21.2 beautifulsoup4==4.10.0 PyQt5==5.15.4 system_hotkey==1.0.3 Flask~=2.0.2 bs4~=0.0.1 Pillow~=8.3.2 piexif~=1.1.3 googletrans~=3.0.0 paddlepaddle-gpu==2.1.3.post110 paddlenlp~=2.1.0 ================================================ FILE: train.py ================================================ import os import pickle import paddle import paddlenlp from paddle.io import Dataset, DataLoader import paddle.nn as nn from conf import MODELNAME class BanfoDataset(Dataset): def __init__(self, data, tokenizer): super().__init__() self.data = data self.tokenizer = tokenizer def __getitem__(self, idx): return paddle.to_tensor(self.data[idx][0], dtype='int64'), paddle.to_tensor(self.data[idx][1], dtype='int64') def __len__(self): return len(self.data) if not os.path.exists('models'): os.mkdir('models') paddle.set_device('gpu') gptModel = paddlenlp.transformers.GPTModel.from_pretrained(MODELNAME) gptModel = paddlenlp.transformers.GPTForPretraining(gptModel) tokenizer = paddlenlp.transformers.GPTChineseTokenizer.from_pretrained(MODELNAME) # 有本地模型存在,则加载本地模型 checkpoint = os.path.join('models', 'model_state.pdparams') if os.path.exists(checkpoint): model_state = paddle.load(checkpoint) gptModel.set_state_dict(model_state) # 设置为评估模型 gptModel.eval() # 测试效果 encodedText = tokenizer(text='前段时间我跟一个老大哥一起吃火锅。大哥的孩子,都上学了', return_token_type_ids=False) ids, _ = gptModel.generate(input_ids=paddle.to_tensor(encodedText['input_ids'], dtype='int64').unsqueeze(0), max_length=16, min_length=1, decode_strategy='sampling') ids = ids[0].numpy().tolist() # 使用tokenizer将生成的id转为文本 text = tokenizer.convert_ids_to_string(ids) print('generation text is {}'.format(text)) # 加载训练数据 with open(os.path.join('preprocessData', 'trainData.pkl'), 'rb') as f: data = pickle.load(f) trainDataLoader = DataLoader(dataset=BanfoDataset(data, tokenizer), batch_size=64, shuffle=True, return_list=True) numEpochs = 100 learningRate = 2e-5 warmupProportion = 0.1 weightDecay = 0.1 maxSteps = (len(trainDataLoader) * numEpochs) lr_scheduler = paddle.optimizer.lr.LambdaDecay(learningRate, lambda currentStep, numWarmupSteps=maxSteps * warmupProportion, numTrainingSteps=maxSteps: float(currentStep) / float(max(1, numWarmupSteps)) if currentStep < numWarmupSteps else max(0.0, float(numTrainingSteps - currentStep) / float(max(1, numTrainingSteps - numWarmupSteps)))) optimizer = paddle.optimizer.AdamW(learning_rate=lr_scheduler, parameters=gptModel.parameters(), weight_decay=weightDecay, grad_clip=nn.ClipGradByGlobalNorm(1.0), apply_decay_param_fun=lambda x: x in [ p.name for n, p in gptModel.named_parameters() if not any(nd in n for nd in ["bias", "norm"]) ]) globalStep = 1 save_steps = 100 criterion = paddle.nn.loss.CrossEntropyLoss() gptModel.train() for epoch in range(numEpochs): for step, batch in enumerate(trainDataLoader, start=1): ids, label = batch logits, _ = gptModel.forward(ids, use_cache=True) loss = criterion(logits, label) loss.backward() optimizer.step() lr_scheduler.step() optimizer.clear_gradients() if globalStep % save_steps == 0: print(globalStep, loss.numpy()) gptModel.save_pretrained('models') globalStep += 1 ================================================ FILE: utils.py ================================================ import base64 import hashlib import json import os import random from collections.abc import Iterable from urllib.parse import quote import piexif import requests from PIL import Image from bs4 import BeautifulSoup from config import APIKEY, BAIDUAPPID, BAIDUAPPKEY proxies = None verify = True def getUuid() -> str: """ 生成一个uuid :return: uuid """ return str(random.randint(1, 999999)).zfill(6) def getBaiDuAudio(text: str, filePath: str) -> [str, None]: """ 利用百度语音合成将文字合成语音 :param text: 合成语音的文字 :param filePath: 文件保存路径 :return: 文件路径或者None """ # 生成文件名 md5 = hashlib.md5() md5.update(text.encode('utf-8')) fileName = md5.hexdigest() + '.mp3' fileName = os.path.join(filePath, fileName) # 文件已经存在的话直接返回 if os.path.exists(fileName): return fileName url = 'https://cloud.baidu.com/aidemo' data = 'type=tns&per=4105&spd=8&pit=7&vol=5&aue=6&tex=' + quote(text) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7' } res = requests.post(url, data=data, headers=headers, verify=False) if res.status_code != 200: return None res = json.loads(res.text) if res['msg'] != 'success': return None data = res['data'].replace('data:audio/x-mpeg;base64,', '') if ',' in data: data = data[:data.find(',')] data = base64.b64decode(data) with open(fileName, 'wb') as f: f.write(data) return fileName def decodeBaiduImg(objUrl: str) -> str: """ 百度图片地址解码函数 :param objUrl: 编码的url :return: 解码的url """ res = '' c = ['_z2C$q', '_z&e3B', 'AzdH3F'] d = {'w': 'a', 'k': 'b', 'v': 'c', '1': 'd', 'j': 'e', 'u': 'f', '2': 'g', 'i': 'h', 't': 'i', '3': 'j', 'h': 'k', 's': 'l', '4': 'm', 'g': 'n', '5': 'o', 'r': 'p', 'q': 'q', '6': 'r', 'f': 's', 'p': 't', '7': 'u', 'e': 'v', 'o': 'w', '8': '1', 'd': '2', 'n': '3', '9': '4', 'c': '5', 'm': '6', '0': '7', 'b': '8', 'l': '9', 'a': '0', '_z2C$q': ':', '_z&e3B': '.', 'AzdH3F': '/'} for m in c: objUrl = objUrl.replace(m, d[m]) for char in objUrl: char = d[char] if char in d else char res = res + char return res def getBaiduImgPath(text: str) -> [Iterable, None]: """ 从百度图片接口拉取图片 :param text: 搜索关键字 :return: 图片路径迭代器或者None """ url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&logid=8763701186511659178&ipn=rj&ct=201326592&is=' \ '&fp=result&queryWord={0}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word={0}' \ '&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&nojc=&acjsonfr=click&pn=0&rn=30&itg=1' \ '&gsm=3c&1634043752626=' url = url.format(text) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7'} res = requests.get(url, headers=headers, verify=False) if res.status_code != 200: return None try: jsonData = json.loads(res.text) except: return None if 'data' not in jsonData: return None data = jsonData['data'] for img in data: imgUrl = '' if 'objURL' in img: imgUrl = decodeBaiduImg(img['objURL']) elif 'middleURL' in img: imgUrl = img['middleURL'] elif 'thumbURL' in img: imgUrl = img['thumbURL'] elif imgUrl == '': return None if 'is_gif' in img and img['is_gif'] == 1: is_gif = 1 else: is_gif = 0 res = requests.get(imgUrl) if res.status_code != 200: continue md5 = hashlib.md5() md5.update(res.content) fileName = md5.hexdigest() + '.gif' if is_gif else md5.hexdigest() + '.png' path = os.path.join('tmp', fileName) with open(path, 'wb') as f: f.write(res.content) yield path def getDoutulaImgPath(text: str) -> [Iterable, None]: """ 从斗图啦表情包接口拉取图片 :param text: 搜索关键字 :return: 图片路径迭代器或者None """ url = 'https://www.doutula.com/search?keyword=' + text headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7'} res = requests.get(url, headers=headers) if res.status_code != 200: return None soup = BeautifulSoup(res.text, 'html.parser') randomPics = soup.find_all('a', attrs={'class': 'col-xs-6 col-md-2'}) for pic in randomPics: imgUrl = pic.find('img', attrs={'referrerpolicy': 'no-referrer'})['data-original'] try: res = requests.get(imgUrl) except: return None if res.status_code != 200: continue path = os.path.join('tmp', imgUrl[imgUrl.rfind('/') + 1:]) with open(path, 'wb') as f: f.write(res.content) yield path def resizeImg(width: int, height: int) -> (int, int): """ 重新设置图片大小 :param width: 图片宽度 :param height: 图片高度 :return: (width, height) """ default_width, default_height = 440, 360 if (width < default_width and height < default_height) or (width > default_width and height > default_height): # 如果比例大于1.5,就不强制拉伸,按照宽度进行缩放 if width / height >= 1.5: width, height = default_width, height * default_width / width elif height / width > 1.5: height, width = default_height, width * default_height / height else: # 小于1.5的强制拉伸 width, height = default_width, default_height elif width > default_width and height <= default_height: width, height = default_width, height * default_width / width if height > default_height and width <= default_width: height, width = default_height, width * default_height / height return width, height def convertToRGB(path: str) -> None: """ 如果图片不是RGB模式,则转换成RGB模式,否则生成视频会出错 :param path: 图片路径 :return: None """ # convert to RGB im = Image.open(path) if im.mode != 'RGB': im = im.convert('RGB') # 获取exif是否正常,不正常则添加 try: im.getexif() im.save(path) except: exif_dict = {} exif_dat = piexif.dump(exif_dict) im.save(path, exif=exif_dat) def baiduTranslate(text: str, zhcn2en: bool = True) -> str: """ 将中文翻译成英文 :param text: 中文/英文 :param zhcn2en : 是否为中文转英文, 默认为是 :return: 英文/中文 """ url = 'https://fanyi-api.baidu.com/api/trans/vip/translate' headers = {'Content-Type': 'application/x-www-form-urlencoded'} data = BAIDUAPPID + text + url + BAIDUAPPKEY md5 = hashlib.md5() md5.update(data.encode('utf-8')) if zhcn2en: data = {'appid': BAIDUAPPID, 'q': text, 'from': 'zh', 'to': 'en', 'salt': url, 'sign': md5.hexdigest()} else: data = {'appid': BAIDUAPPID, 'q': text, 'from': 'en', 'to': 'zh', 'salt': url, 'sign': md5.hexdigest()} res = requests.post(url, data=data, headers=headers) if res.status_code != 200: return '' res = json.loads(res.text) if 'trans_result' not in res: return '' result = '' for trans in res['trans_result']: result += trans['dst'] + '。' return result # TODO # 有点儿问题, 翻译的结果为多行的时候解决比较麻烦 def googleTranslate(text: str, zhcn2en: bool = True) -> str: """ 将中文翻译成英文 :param text: 中文/英文 :param zhcn2en : 是否为中文转英文, 默认为是 :return: 英文/中文 """ global proxies global verify text = text.replace('\n', r'\\n').replace('"', r'\\\"') url = 'https://translate.google.cn/_/TranslateWebserverUi/data/batchexecute' if zhcn2en: data = '[[["MkEWBc","[[\\"{}\\",\\"zh-CN\\",\\"en\\",true],[null]]",null,"generic"]]]'.format(text) else: data = '[[["MkEWBc","[[\\"{}\\",\\"en\\",\\"zh-CN\\",true],[null]]",null,"generic"]]]'.format(text) data = quote(data) data = 'f.req=' + data + '&' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7'} res = requests.post(url=url, data=data, headers=headers, verify=verify, proxies=proxies) if res.status_code != 200: return '' lIndex = res.text.find('l,[[\\"') rIndex = res.text.find('\\"', lIndex + len('l,[[\\"')) if lIndex != -1 and rIndex != -1: text = res.text[lIndex + len('l,[[\\"'):rIndex] text = text.replace(r'\\n', '\n').replace(r'\\\"', '"') return text return '' def getTips(text: str) -> str: """ 获取指定文本的生成文本信息 :param text: 线索文本 :return: 生成的文本 """ global proxies global verify # 中文 -> 英文 text = baiduTranslate(text) if text == '': return '' # 获取生成文本结果 url = 'https://api.deepai.org/api/text-generator' data = {'text': text} headers = {'api-key': APIKEY} res = requests.post(url, files=data, headers=headers, verify=verify, proxies=proxies) if res.status_code != 200: return '' res = json.loads(res.text) if 'output' not in res: return '' text = res['output'] # 英文 -> 中文 text = baiduTranslate(text, zhcn2en=False).replace('\n\n', '\n') return text def setProxies(switch: bool=False) -> None: """ 是否采用代理 :param switch: 是 :return: None """ global proxies global verify if switch: proxies = {'https': '127.0.0.1:8887', 'http': '127.0.0.1:8887'} verify = False else: proxies = None verify = True