Showing preview only (1,648K chars total). Download the full file or copy to clipboard to get everything.
Repository: WEIFENG2333/VideoCaptioner
Branch: master
Commit: b38a361f6ae0
Files: 223
Total size: 1.5 MB
Directory structure:
gitextract_7v46ty1z/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── 01_bug.yaml
│ │ ├── 02_request.yaml
│ │ └── 03_question.yaml
│ └── workflows/
│ ├── claude-code-review.yml
│ ├── claude.yml
│ └── deploy-docs.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── app/
│ ├── __init__.py
│ ├── common/
│ │ ├── config.py
│ │ └── signal_bus.py
│ ├── components/
│ │ ├── DonateDialog.py
│ │ ├── EditComboBoxSettingCard.py
│ │ ├── FasterWhisperSettingWidget.py
│ │ ├── LanguageSettingDialog.py
│ │ ├── LineEditSettingCard.py
│ │ ├── MySettingCard.py
│ │ ├── MyVideoWidget.py
│ │ ├── SimpleSettingCard.py
│ │ ├── SpinBoxSettingCard.py
│ │ ├── SubtitleSettingDialog.py
│ │ ├── TranscriptionOutputDialog.py
│ │ ├── TranscriptionSettingDialog.py
│ │ ├── WhisperAPISettingWidget.py
│ │ ├── WhisperCppSettingWidget.py
│ │ └── transcription_setting_card.py
│ ├── config.py
│ ├── core/
│ │ ├── asr/
│ │ │ ├── __init__.py
│ │ │ ├── asr_data.py
│ │ │ ├── base.py
│ │ │ ├── bcut.py
│ │ │ ├── chunk_merger.py
│ │ │ ├── chunked_asr.py
│ │ │ ├── faster_whisper.py
│ │ │ ├── jianying.py
│ │ │ ├── status.py
│ │ │ ├── transcribe.py
│ │ │ ├── whisper_api.py
│ │ │ └── whisper_cpp.py
│ │ ├── constant.py
│ │ ├── entities.py
│ │ ├── llm/
│ │ │ ├── __init__.py
│ │ │ ├── check_llm.py
│ │ │ ├── check_whisper.py
│ │ │ ├── client.py
│ │ │ ├── context.py
│ │ │ └── request_logger.py
│ │ ├── optimize/
│ │ │ └── optimize.py
│ │ ├── prompts/
│ │ │ ├── __init__.py
│ │ │ ├── analysis/
│ │ │ │ └── video.md
│ │ │ ├── optimize/
│ │ │ │ └── subtitle.md
│ │ │ ├── split/
│ │ │ │ ├── semantic.md
│ │ │ │ └── sentence.md
│ │ │ └── translate/
│ │ │ ├── reflect.md
│ │ │ ├── single.md
│ │ │ └── standard.md
│ │ ├── split/
│ │ │ ├── alignment.py
│ │ │ ├── split.py
│ │ │ └── split_by_llm.py
│ │ ├── subtitle/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── ass_renderer.py
│ │ │ ├── ass_utils.py
│ │ │ ├── font_utils.py
│ │ │ ├── rounded_renderer.py
│ │ │ ├── styles.py
│ │ │ └── text_utils.py
│ │ ├── task_factory.py
│ │ ├── translate/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── bing_translator.py
│ │ │ ├── deeplx_translator.py
│ │ │ ├── factory.py
│ │ │ ├── google_translator.py
│ │ │ ├── llm_translator.py
│ │ │ └── types.py
│ │ ├── tts/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── openai_fm.py
│ │ │ ├── openai_tts.py
│ │ │ ├── siliconflow.py
│ │ │ ├── status.py
│ │ │ └── tts_data.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── cache.py
│ │ ├── logger.py
│ │ ├── platform_utils.py
│ │ ├── subprocess_helper.py
│ │ ├── text_utils.py
│ │ └── video_utils.py
│ ├── thread/
│ │ ├── batch_process_thread.py
│ │ ├── file_download_thread.py
│ │ ├── modelscope_download_thread.py
│ │ ├── subtitle_pipeline_thread.py
│ │ ├── subtitle_thread.py
│ │ ├── transcript_thread.py
│ │ ├── version_checker_thread.py
│ │ ├── video_download_thread.py
│ │ ├── video_info_thread.py
│ │ └── video_synthesis_thread.py
│ └── view/
│ ├── batch_process_interface.py
│ ├── home_interface.py
│ ├── llm_logs_interface.py
│ ├── log_window.py
│ ├── main_window.py
│ ├── setting_interface.py
│ ├── subtitle_interface.py
│ ├── subtitle_style_interface.py
│ ├── task_creation_interface.py
│ ├── transcription_interface.py
│ └── video_synthesis_interface.py
├── docs/
│ ├── .vitepress/
│ │ ├── config.mts
│ │ └── theme/
│ │ ├── CustomHome.vue
│ │ ├── custom.css
│ │ └── index.ts
│ ├── README.md
│ ├── config/
│ │ ├── asr.md
│ │ ├── cookies.md
│ │ ├── llm.md
│ │ └── translator.md
│ ├── dev/
│ │ ├── api.md
│ │ ├── architecture.md
│ │ ├── asr-chunk-merger.md
│ │ ├── asr-chunked-usage.md
│ │ ├── contributing.md
│ │ ├── translate-module.md
│ │ └── view-structure.md
│ ├── en/
│ │ ├── config/
│ │ │ ├── asr.md
│ │ │ ├── cookies.md
│ │ │ ├── llm.md
│ │ │ └── translator.md
│ │ ├── dev/
│ │ │ ├── api.md
│ │ │ ├── architecture.md
│ │ │ └── contributing.md
│ │ ├── guide/
│ │ │ ├── batch-processing.md
│ │ │ ├── configuration.md
│ │ │ ├── faq.md
│ │ │ ├── getting-started.md
│ │ │ ├── manuscript.md
│ │ │ ├── subtitle-style.md
│ │ │ └── workflow.md
│ │ └── index.md
│ ├── guide/
│ │ ├── configuration.md
│ │ ├── cookies-config.md
│ │ ├── faq.md
│ │ ├── getting-started.md
│ │ ├── llm-config.md
│ │ ├── quick-example.md
│ │ └── workflow.md
│ ├── index.md
│ ├── package-lock.json
│ ├── package.json
│ └── public/
│ ├── BingSiteAuth.xml
│ └── robots.txt
├── legacy-docs/
│ ├── README_EN.md
│ ├── README_JA.md
│ ├── README_TW.md
│ ├── about_chunk_merge.md
│ ├── get_cookies.md
│ ├── llm_config.md
│ └── test.md
├── main.py
├── pyproject.toml
├── resource/
│ ├── assets/
│ │ └── qss/
│ │ ├── dark/
│ │ │ └── demo.qss
│ │ └── light/
│ │ └── demo.qss
│ ├── subtitle_style/
│ │ ├── default.json
│ │ ├── default.txt
│ │ ├── 毕导科普风.txt
│ │ ├── 番剧可爱风.txt
│ │ └── 竖屏.txt
│ └── translations/
│ ├── VideoCaptioner_en_US.qm
│ ├── VideoCaptioner_en_US.ts
│ ├── VideoCaptioner_zh_CN.qm
│ ├── VideoCaptioner_zh_CN.ts
│ ├── VideoCaptioner_zh_HK.qm
│ └── VideoCaptioner_zh_HK.ts
├── scripts/
│ ├── lint.sh
│ ├── run.bat
│ ├── run.sh
│ ├── trans-compile.sh
│ ├── trans-extract.sh
│ └── translate_llm.py
└── tests/
├── README.md
├── __init__.py
├── conftest.py
├── fixtures/
│ └── README.md
├── test_asr/
│ ├── README.md
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_asr_data.py
│ ├── test_bcut_asr.py
│ ├── test_chunk_merger.py
│ ├── test_chunked_asr.py
│ ├── test_chunking.py
│ ├── test_jianying_asr.py
│ └── test_whisper_api_asr.py
├── test_optimize/
│ └── test_optimize.py
├── test_split/
│ ├── __init__.py
│ ├── test_alignment.py
│ ├── test_split.py
│ ├── test_split_by_llm.py
│ ├── test_split_core.py
│ └── test_split_realistic.py
├── test_subtitle/
│ ├── __init__.py
│ ├── conftest.py
│ └── test_subtitle_thread.py
├── test_thread/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_subtitle_pipeline_thread.py
│ ├── test_transcript_thread.py
│ ├── test_video_info_thread.py
│ └── test_video_synthesis_thread.py
├── test_translate/
│ ├── __init__.py
│ ├── test_bing_translator.py
│ ├── test_cache_validation.py
│ ├── test_deeplx_translator.py
│ ├── test_google_translator.py
│ └── test_llm_translator.py
└── test_tts/
├── __init__.py
├── test_tts_core.py
└── test_tts_integration.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/01_bug.yaml
================================================
name: 错误 | Bug
description: 反馈程序出现的错误 | Report bugs
labels: ["bug"]
body:
- type: markdown
attributes:
value: |
感谢您报告问题!请提供以下信息帮助我更好地解决问题。
Thank you for reporting the issue! Using English or Chinese.
- type: textarea
id: description
attributes:
label: 问题描述 | Problem Description
description: |
描述您遇到的问题,如果能提供一个复现步骤将帮我更好定位修复问题。(例如:错误字幕内容、或者视频链接、或者具体报错)
Please describe in detail the problem you encountered.
validations:
required: true
- type: textarea
id: logs
attributes:
label: 日志信息(可选)| Logs (Optional)
description: |
(可选)如果你在生成字幕视频过程遇到了错误,请打开根目录下的 AppData/logs/app.log 文件,根据日志的时间复制最近一次运行错误的日志信息并填写。这样可以更好帮助我排查。
(Optional) Please open the AppData/logs/app.log file in the root directory and copy the log information from the most recent run error.
render: shell
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/02_request.yaml
================================================
name: 功能请求 | Feature Request
description: 提出增加新功能的请求 | Create the request for a new feature
labels: ["enhancement"]
body:
- type: markdown
attributes:
value: |
✨ 感谢您提出功能建议!请描述您希望的新功能,对于有用可行的建议我会努力实现的。
🌟 Thank you for your feature suggestion! Please describe the new feature you expect. Using English or Chinese.
- type: textarea
id: feature
attributes:
label: 💡 预期的功能 | Expected Feature
description: |
请详细描述您期望添加的功能,包括使用场景和希望达到的效果。
Please describe in detail the feature you want to add, including usage scenarios and desired effects.
validations:
required: true
================================================
FILE: .github/ISSUE_TEMPLATE/03_question.yaml
================================================
name: 问题咨询 Question
description: 向作者咨询软件使用或配置相关的问题 | Consult about software usage or configuration
labels: ["question"]
body:
- type: textarea
id: problem
attributes:
label: 🤔 问题描述 Problem Description
validations:
required: true
================================================
FILE: .github/workflows/claude-code-review.yml
================================================
name: Claude Code Review
on:
pull_request:
types: [opened, synchronize]
# Optional: Only run on specific file changes
# paths:
# - "src/**/*.ts"
# - "src/**/*.tsx"
# - "src/**/*.js"
# - "src/**/*.jsx"
jobs:
claude-review:
# Optional: Filter by PR author
# if: |
# github.event.pull_request.user.login == 'external-contributor' ||
# github.event.pull_request.user.login == 'new-developer' ||
# github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
issues: read
id-token: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Run Claude Code Review
id: claude-review
uses: anthropics/claude-code-action@beta
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
# Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1)
# model: "claude-opus-4-1-20250805"
# Direct prompt for automated review (no @claude mention needed)
direct_prompt: |
Please review this pull request and provide feedback on:
- Code quality and best practices
- Potential bugs or issues
- Performance considerations
- Security concerns
- Test coverage
Be constructive and helpful in your feedback.
# Optional: Use sticky comments to make Claude reuse the same comment on subsequent pushes to the same PR
# use_sticky_comment: true
# Optional: Customize review based on file types
# direct_prompt: |
# Review this PR focusing on:
# - For TypeScript files: Type safety and proper interface usage
# - For API endpoints: Security, input validation, and error handling
# - For React components: Performance, accessibility, and best practices
# - For tests: Coverage, edge cases, and test quality
# Optional: Different prompts for different authors
# direct_prompt: |
# ${{ github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' &&
# 'Welcome! Please review this PR from a first-time contributor. Be encouraging and provide detailed explanations for any suggestions.' ||
# 'Please provide a thorough code review focusing on our coding standards and best practices.' }}
# Optional: Add specific tools for running tests or linting
# allowed_tools: "Bash(npm run test),Bash(npm run lint),Bash(npm run typecheck)"
# Optional: Skip review for certain conditions
# if: |
# !contains(github.event.pull_request.title, '[skip-review]') &&
# !contains(github.event.pull_request.title, '[WIP]')
================================================
FILE: .github/workflows/claude.yml
================================================
name: Claude Code
on:
issue_comment:
types: [created]
pull_request_review_comment:
types: [created]
issues:
types: [opened, assigned]
pull_request_review:
types: [submitted]
jobs:
claude:
if: |
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
issues: read
id-token: write
actions: read # Required for Claude to read CI results on PRs
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Run Claude Code
id: claude
uses: anthropics/claude-code-action@beta
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
# This is an optional setting that allows Claude to read CI results on PRs
additional_permissions: |
actions: read
# Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1)
# model: "claude-opus-4-1-20250805"
# Optional: Customize the trigger phrase (default: @claude)
# trigger_phrase: "/claude"
# Optional: Trigger when specific user is assigned to an issue
# assignee_trigger: "claude-bot"
# Optional: Allow Claude to run specific commands
# allowed_tools: "Bash(npm install),Bash(npm run build),Bash(npm run test:*),Bash(npm run lint:*)"
# Optional: Add custom instructions for Claude to customize its behavior for your project
# custom_instructions: |
# Follow our coding standards
# Ensure all new code has tests
# Use TypeScript for new files
# Optional: Custom environment variables for Claude
# claude_env: |
# NODE_ENV: test
================================================
FILE: .github/workflows/deploy-docs.yml
================================================
name: Deploy Documentation
on:
push:
branches:
- master
- main
- dev
paths:
- "docs/**"
- ".github/workflows/deploy-docs.yml"
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: pages
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: 20
- name: Install dependencies
run: npm ci
working-directory: docs
- name: Build documentation
run: npm run docs:build
working-directory: docs
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: docs/.vitepress/dist
deploy:
if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
needs: build
runs-on: ubuntu-latest
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
================================================
FILE: .gitignore
================================================
# win 二进制文件资源目录
/resource/bin/
!/resource/bin/bin_environment.txt
# 开发环境
.idea/
*.pyc
*/__pycache__/
*.env
*.env.local
*.env.*.local
.env.test
**/.env
**/.env.local
venv/
.venv/
# 系统文件
.DS_Store
# 测试和脚本
/test/
/release/
/my_content/
# 媒体文件
*.srt
*.mp4
*.exe
# 应用数据
/AppData/
**/settings.json
!**/settings.json.example
/output/
/work-dir/
.vscode/
.claude/
# 敏感文件
cookies.txt
**/cookies.txt
*.key
*.pem
*.p12
*.pfx
*secret*
*credential*
# 测试相关
.pytest_cache/
.coverage
htmlcov/
*.log
# 项目文档
CLAUDE.md
# Node.js 和 VitePress
node_modules/
docs/.vitepress/cache/
docs/.vitepress/dist/
/package-lock.json
!docs/package-lock.json
================================================
FILE: CHANGELOG.md
================================================
# 更新日志
## 2025.02.07
### Bug 修复与其他改进
- 修复谷歌翻译语言不正确的问题。
- 修部微软翻译不准确的问题。
- 修复运行设备不选择cuda时显示报 winError的错误
- 修复合成失败的问题
- 修复ass单语字幕没有内容的问题
## 2024.2.06
### 核心功能增强
- 完整重构代码架构,优化整体性能
- 字幕优化与翻译功能模块分离,提供更灵活的处理选项
- 新增批量处理功能:支持批量字幕、批量转录、批量字幕视频合成
- 全面优化 UI 界面与交互细节
### AI 模型与翻译升级
- 扩展 LLM 支持:新增 SiliconCloud、DeepSeek、Ollama、Gemini、ChatGLM 等模型
- 集成多种翻译服务:DeepLx、Bing、Google、LLM
- 新增 faster-whisper-large-v3-turbo 模型支持
- 新增多种 VAD(语音活动检测)方法
- 支持自定义反思翻译开关
- 字幕断句支持语义/句子两种模式
- 字幕断句、优化、翻译提示词的优化
- 字幕、转录缓存机制的优化
- 优化中文字幕自动换行功能
- 新增竖屏字幕样式
- 改进字幕时间轴切换机制,消除闪烁问题
### Bug 修复与其他改进
- 修复 Whisper API 无法使用问题
- 新增多种字幕视频格式支持
- 修复部分情况转录错误的问题
- 优化视频工作目录结构
- 新增日志查看功能
- 新增泰语、德语等语言的字幕优化
- 修复诸多Bug...
## 2024.12.07
- 新增 Faster-whisper 支持,音频转字幕质量更优
- 支持Vad语音断点检测,大大减少幻觉现象
- 支持人声音分离,分离视频背景噪音
- 支持关闭视频合成
- 新增字幕最大长度设置
- 新增字幕末尾标点去除设置
- 优化和翻译的提示词优化
- 优化LLM字幕断句错误的情况
- 修复音频转换格式不一致问题
## 2024.11.23
- 新增 Whisper-v3 模型支持,大幅提升语音识别准确率
- 优化字幕断句算法,提供更自然的阅读体验
- 修复检测模型可用性时的稳定性问题
## 2024.11.20
- 支持自定义调节字幕位置和样式
- 新增字幕优化和翻译过程的实时日志查看
- 修复使用 API 时的自动翻译问题
- 优化视频工作目录结构,提升文件管理效率
## 2024.11.17
- 支持双语/单语字幕灵活导出
- 新增文稿匹配提示对齐功能
- 修复字幕导入时的稳定性问题
- 修复非中文路径下载模型的兼容性问题
## 2024.11.13
- 新增 Whisper API 调用支持
- 支持导入 cookie.txt 下载各大视频平台资源
- 字幕文件名自动与视频保持一致
- 软件主页新增运行日志实时查看
- 统一和完善软件内部功能
================================================
FILE: LICENSE
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
VideoCaptioner - A desktop application for video subtitle processing based on LLM.
Copyright (C) 2025 Weifeng
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
VideoCaptioner Copyright (C) 2025 Weifeng
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.
================================================
FILE: README.md
================================================
<div align="center">
<img src="./legacy-docs/images/logo.png"alt="VideoCaptioner Logo" width="100">
<p>卡卡字幕助手</p>
<h1>VideoCaptioner</h1>
<p>一款基于大语言模型(LLM)的视频字幕处理助手,支持语音识别、字幕断句、优化、翻译全流程处理</p>
简体中文 / [正體中文](./legacy-docs/README_TW.md) / [English](./legacy-docs/README_EN.md) / [日本語](./legacy-docs/README_JA.md)
📚 **[在线文档](https://weifeng2333.github.io/VideoCaptioner/)** | 🚀 **[快速开始](https://weifeng2333.github.io/VideoCaptioner/guide/getting-started)** | ⚙️ **[配置指南](https://weifeng2333.github.io/VideoCaptioner/config/llm)**
</div>
## 项目介绍
卡卡字幕助手(VideoCaptioner)操作简单且无需高配置,支持 API 和本地离线两种方式进行语音识别,利用大语言模型进行字幕智能断句、校正、翻译,字幕视频全流程一键处理。为视频配上效果惊艳的字幕。
- 支持词级时间戳与 VAD 语音活动检测,识别准确率高
- 基于 LLM 的语义理解,自动将逐字字幕重组为自然流畅的句子段落
- 结合上下文的 AI 翻译,支持反思优化机制,译文地道专业
- 支持批量视频字幕合成,提升处理效率
- 直观的字幕编辑查看界面,支持实时预览和快捷编辑
## 界面预览
<div align="center">
<img src="https://h1.appinn.me/file/1731487405884_main.png" alt="软件界面预览" width="90%" style="border-radius: 5px;">
</div>


## 测试
全流程处理一个14分钟1080P的 [B站英文 TED 视频](https://www.bilibili.com/video/BV1jT411X7Dz),调用本地 Whisper 模型进行语音识别,使用 `gpt-5-mini` 模型优化和翻译为中文,总共消耗时间约 **4 分钟**。
近后台计算,模型优化和翻译消耗费用不足 ¥0.01(以OpenAI官方价格为计算)
具体字幕和视频合成的效果的测试结果图片,请参考 [TED视频测试](./legacy-docs/test.md)
## 快速开始
### Windows 用户
#### 方式一:使用打包程序(推荐)
软件较为轻量,打包大小不足 60M,已集成所有必要环境,下载后可直接运行。
1. 从 [Release](https://github.com/WEIFENG2333/VideoCaptioner/releases) 页面下载最新版本的可执行程序。或者:[蓝奏盘下载](https://wwwm.lanzoue.com/ii14G2pdsbej)
2. 打开安装包进行安装
3. LLM API 配置,(用于字幕断句、校正),可使用[本项目的中转站](https://api.videocaptioner.cn)
4. 翻译配置,选择是否启用翻译,翻译服务(默认使用微软翻译,质量一般,推荐配置自己的 API KEY 使用大模型翻译)
5. 语音识别配置(默认使用B接口网络调用语音识别服务,中英以外的语言请使用本地转录)
### macOS 用户
#### 一键安装运行(推荐)
```bash
# 方式一:直接运行(自动安装 uv、克隆项目、安装相关依赖)
curl -fsSL https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/main/scripts/run.sh | bash
# 方式二:先克隆再运行
git clone https://github.com/WEIFENG2333/VideoCaptioner.git
cd VideoCaptioner
./scripts/run.sh
```
脚本会自动:
1. 安装 [uv](https://docs.astral.sh/uv/) 包管理器(如果未安装)
2. 克隆项目到 `~/VideoCaptioner`(如果不在项目目录中运行)
3. 安装所有 Python 依赖
4. 启动应用
<details>
<summary>手动安装步骤</summary>
#### 1. 安装 uv 包管理器
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```
#### 2. 安装系统依赖(macOS)
```bash
brew install ffmpeg
```
#### 3. 克隆并运行
```bash
git clone https://github.com/WEIFENG2333/VideoCaptioner.git
cd VideoCaptioner
uv sync # 安装依赖
uv run python main.py # 运行
```
</details>
### 开发者指南
```bash
# 安装依赖(包括开发依赖)
uv sync
# 运行应用
uv run python main.py
# 类型检查
uv run pyright
# 代码检查
uv run ruff check .
```
## 基本配置
### 1. LLM API 配置说明
LLM 大模型是用来字幕段句、字幕优化、以及字幕翻译(如果选择了LLM 大模型翻译)。
| 配置项 | 说明 |
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
| SiliconCloud | [SiliconCloud 官网](https://cloud.siliconflow.cn/i/onCHcaDx)配置方法请参考[配置文档](https://weifeng2333.github.io/VideoCaptioner/config/llm)<br>该并发较低,建议把线程设置为5以下。 |
| DeepSeek | [DeepSeek 官网](https://platform.deepseek.com),建议使用 `deepseek-v3` 模型,<br>官方网站最近服务好像并不太稳定。 |
| OpenAI兼容接口 | 如果有其他服务商的API,可直接在软件中填写。base_url 和api_key [VideoCaptioner API](https://api.videocaptioner.cn) |
注:如果用的 API 服务商不支持高并发,请在软件设置中将“线程数”调低,避免请求错误。
---
如果希望高并发,或者希望在在软件内使用使用 OpenAI 或者 Claude 等优质大模型进行字幕校正和翻译。
可使用本项目的✨LLM API中转站✨: [https://api.videocaptioner.cn](https://api.videocaptioner.cn)
其支持高并发,性价比极高,且有国内外大量模型可挑选。
注册获取key之后,设置中按照下面配置:
BaseURL: `https://api.videocaptioner.cn/v1`
API-key: `个人中心-API 令牌页面自行获取。`
💡 模型选择建议 (本人在各质量层级中精选出的高性价比模型):
- 高质量之选: `gemini-3-pro`、`claude-sonnet-4-5-20250929` (耗费比例:3)
- 较高质量之选: `gpt-5-2025-08-07`、 `claude-haiku-4-5-20251001` (耗费比例:1.2)
- 中质量之选: `gpt-5-mini`、`gemini-3-flash` (耗费比例:0.3)
本站支持超高并发,软件中线程数直接拉满即可~ 处理速度非常快~
更详细的API配置教程:[中转站配置](https://weifeng2333.github.io/VideoCaptioner/config/llm)
---
## 2. 翻译配置
| 配置项 | 说明 |
| -------------- | ----------------------------------------------------------------------------------------------------------------------------- |
| LLM 大模型翻译 | 🌟 翻译质量最好的选择。使用 AI 大模型进行翻译,能更好理解上下文,翻译更自然。需要在设置中配置 LLM API(比如 OpenAI、DeepSeek 等) |
| 微软翻译 | 使用微软的翻译服务, 速度非常快 |
| 谷歌翻译 | 谷歌的翻译服务,速度快,但需要能访问谷歌的网络环境 |
推荐使用 `LLM 大模型翻译` ,翻译质量最好。
### 3. 语音识别接口说明
| 接口名称 | 支持语言 | 运行方式 | 说明 |
| ---------------- | -------------------------------------------------- | -------- | ----------------------------------------------------------------------------------------------------------------- |
| B接口 | 仅支持中文、英文 | 在线 | 免费、速度较快 |
| J接口 | 仅支持中文、英文 | 在线 | 免费、速度较快 |
| WhisperCpp | 中文、日语、韩语、英文等 99 种语言,外语效果较好 | 本地 | (实际使用不稳定)需要下载转录模型<br>中文建议medium以上模型<br>英文等使用较小模型即可达到不错效果。 |
| fasterWhisper 👍 | 中文、英文等多99种语言,外语效果优秀,时间轴更准确 | 本地 | (🌟推荐🌟)需要下载程序和转录模型<br>支持CUDA,速度更快,转录准确。<br>超级准确的时间戳字幕。<br>仅支持 window |
### 4. 本地 Whisper 语音识别模型
Whisper 版本有 WhisperCpp 和 fasterWhisper(推荐) 两种,后者效果更好,都需要自行在软件内下载模型。
| 模型 | 磁盘空间 | 内存占用 | 说明 |
| ----------- | -------- | -------- | ----------------------------------- |
| Tiny | 75 MiB | ~273 MB | 转录很一般,仅用于测试 |
| Small | 466 MiB | ~852 MB | 英文识别效果已经不错 |
| Medium | 1.5 GiB | ~2.1 GB | 中文识别建议至少使用此版本 |
| Large-v2 👍 | 2.9 GiB | ~3.9 GB | 效果好,配置允许情况推荐使用 |
| Large-v3 | 2.9 GiB | ~3.9 GB | 社区反馈可能会出现幻觉/字幕重复问题 |
推荐模型: `Large-v2` 稳定且质量较好。
### 5. 文稿匹配
- 在"字幕优化与翻译"页面,包含"文稿匹配"选项,支持以下**一种或者多种**内容,辅助校正字幕和翻译:
| 类型 | 说明 | 填写示例 |
| ---------- | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
| 术语表 | 专业术语、人名、特定词语的修正对照表 | 机器学习->Machine Learning<br>马斯克->Elon Musk<br>打call -> 应援<br>图灵斑图<br>公交车悖论 |
| 原字幕文稿 | 视频的原有文稿或相关内容 | 完整的演讲稿、课程讲义等 |
| 修正要求 | 内容相关的具体修正要求 | 统一人称代词、规范专业术语等<br>填写**内容相关**的要求即可,[示例参考](https://github.com/WEIFENG2333/VideoCaptioner/issues/59#issuecomment-2495849752) |
- 如果需要文稿进行字幕优化辅助,全流程处理时,先填写文稿信息,再进行开始任务处理
- 注意: 使用上下文参数量不高的小型LLM模型时,建议控制文稿内容在1千字内,如果使用上下文较大的模型,则可以适当增加文稿内容。
无特殊需求,可不填写。
### 6. Cookie 配置说明
如果使用URL下载功能时,如果遇到以下情况:
1. 下载视频网站需要登录信息才可以下载;
2. 只能下载较低分辨率的视频;
3. 网络条件较差时需要验证;
- 请参考 [Cookie 配置说明](https://weifeng2333.github.io/VideoCaptioner/guide/cookies-config) 获取Cookie信息,并将cookies.txt文件放置到软件安装目录的 `AppData` 目录下,即可正常下载高质量视频。
## 软件流程介绍
程序简单的处理流程如下:
```
语音识别转录 -> 字幕断句(可选) -> 字幕优化翻译(可选) -> 字幕视频合成
```
## 软件主要功能
软件利用大语言模型(LLM)在理解上下文方面的优势,对语音识别生成的字幕进一步处理。有效修正错别字、统一专业术语,让字幕内容更加准确连贯,为用户带来出色的观看体验!
#### 1. 多平台视频下载与处理
- 支持国内外主流视频平台(B站、Youtube、小红书、TikTok、X、西瓜视频、抖音等)
- 自动提取视频原有字幕处理
#### 2. 专业的语音识别引擎
- 提供多种接口在线识别,效果媲美剪映(免费、高速)
- 支持本地Whisper模型(保护隐私、可离线)
#### 3. 字幕智能纠错
- 自动优化专业术语、代码片段和数学公式格式
- 上下文进行断句优化,提升阅读体验
- 支持文稿提示,使用原有文稿或者相关提示优化字幕断句
#### 4. 高质量字幕翻译
- 结合上下文的智能翻译,确保译文兼顾全文
- 通过Prompt指导大模型反思翻译,提升翻译质量
- 使用序列模糊匹配算法、保证时间轴完全一致
#### 5. 字幕样式调整
- 丰富的字幕样式模板(科普风、新闻风、番剧风等等)
- 多种格式字幕视频(SRT、ASS、VTT、TXT)
针对小白用户,对一些软件内的选项说明:
#### 1. 语音转录页面
- `VAD过滤`:开启后,VAD(语音活动检测)将过滤无人声的语音片段,从而减少幻觉现象。建议保持默认开启状态。如果不懂,其他VAD选项建议直接保持默认即可。
- `音频分离`:开启后,使用MDX-Net进行降噪处理,能够有效分离人声和背景音乐,从而提升音频质量。建议只在嘈杂的视频中开启。
#### 2. 字幕优化与翻译页面
- `智能断句`:开启后,全流程处理时生成字级时间戳,然后通过LLM大模型进行断句,从而在视频有更完美的观看体验。有按照句子断句和按照语义断句两种模式。可根据自己的需求配置。
- `字幕校正`:开启后,会通过LLM大模型对字幕内容进行校正(如:英文单词大小写、标点符号、错别字、数学公式和代码的格式等),提升字幕的质量。
- `反思翻译`:开启后,会通过LLM大模型进行反思翻译,提升翻译的质量。相应的会增加请求的时间和消耗的Token。(选项在 设置页-LLM大模型翻译-反思翻译 中开启。)
- `文稿提示`:填写后,这部分也将作为提示词发送给大模型,辅助字幕优化和翻译。
#### 3. 字幕视频合成页面
- `视频合成`:开启后,会根据合成字幕视频;关闭将跳过视频合成的流程。
- `软字幕`:开启后,字幕不会烧录到视频中,处理速度极快。但是软字幕需要一些播放器(如PotPlayer)支持才可以进行显示播放。而且软字幕的样式不是软件内调整的字幕样式,而是播放器默认的白色样式。
项目主要目录结构说明如下:
```
VideoCaptioner/
├── app/ # 应用源代码目录
│ ├── common/ # 公共模块(配置、信号总线)
│ ├── components/ # UI 组件
│ ├── core/ # 核心业务逻辑(ASR、翻译、优化等)
│ ├── thread/ # 异步线程
│ └── view/ # 界面视图
├── resource/ # 资源文件目录
│ ├── assets/ # 图标、Logo 等
│ ├── bin/ # 二进制程序(FFmpeg、Whisper 等)
│ ├── fonts/ # 字体文件
│ ├── subtitle_style/ # 字幕样式模板
│ └── translations/ # 多语言翻译文件
├── work-dir/ # 工作目录(处理完成的视频和字幕)
├── AppData/ # 应用数据目录
│ ├── cache/ # 缓存目录(转录、LLM 请求)
│ ├── models/ # Whisper 模型文件
│ ├── logs/ # 日志文件
│ └── settings.json # 用户设置
├── scripts/ # 安装和运行脚本
├── main.py # 程序入口
└── pyproject.toml # 项目配置和依赖
```
## 📝 说明
1. 字幕断句的质量对观看体验至关重要。软件能将逐字字幕智能重组为符合自然语言习惯的段落,并与视频画面完美同步。
2. 在处理过程中,仅向大语言模型发送文本内容,不包含时间轴信息,这大大降低了处理开销。
3. 在翻译环节,我们采用吴恩达提出的"翻译-反思-翻译"方法论。这种迭代优化的方式确保了翻译的准确性。
4. 填入 YouTube 链接时进行处理时,会自动下载视频的字幕,从而省去转录步骤,极大地节省操作时间。
## 🤝 贡献指南
项目在不断完善中,如果在使用过程遇到的Bug,欢迎提交 [Issue](https://github.com/WEIFENG2333/VideoCaptioner/issues) 和 Pull Request 帮助改进项目。
## 📝 更新日志
查看完整的更新历史,请访问 [CHANGELOG.md](./CHANGELOG.md)
## 💖 支持作者
如果觉得项目对你有帮助,可以给项目点个Star!
<details>
<summary>捐助支持</summary>
<div align="center">
<img src="./legacy-docs/images/alipay.jpg" alt="支付宝二维码" width="30%">
<img src="./legacy-docs/images/wechat.jpg" alt="微信二维码" width="30%">
</div>
</details>
## ⭐ Star History
[](https://star-history.com/#WEIFENG2333/VideoCaptioner&Date)
================================================
FILE: app/__init__.py
================================================
================================================
FILE: app/common/config.py
================================================
# coding:utf-8
from enum import Enum
from PyQt5.QtCore import QLocale
from PyQt5.QtGui import QColor
from qfluentwidgets import (
BoolValidator,
ConfigItem,
ConfigSerializer,
EnumSerializer,
FolderValidator,
OptionsConfigItem,
OptionsValidator,
QConfig,
RangeConfigItem,
RangeValidator,
Theme,
qconfig,
)
from app.config import SETTINGS_PATH, WORK_PATH
from app.core.utils.platform_utils import get_available_transcribe_models
from ..core.entities import (
FasterWhisperModelEnum,
LLMServiceEnum,
SubtitleLayoutEnum,
SubtitleRenderModeEnum,
TranscribeLanguageEnum,
TranscribeModelEnum,
TranscribeOutputFormatEnum,
TranslatorServiceEnum,
VadMethodEnum,
VideoQualityEnum,
WhisperModelEnum,
)
from ..core.translate.types import TargetLanguage
class Language(Enum):
"""软件语言"""
CHINESE_SIMPLIFIED = QLocale(QLocale.Chinese, QLocale.China)
CHINESE_TRADITIONAL = QLocale(QLocale.Chinese, QLocale.HongKong)
ENGLISH = QLocale(QLocale.English)
AUTO = QLocale()
class LanguageSerializer(ConfigSerializer):
"""Language serializer"""
def serialize(self, language):
return language.value.name() if language != Language.AUTO else "Auto"
def deserialize(self, value: str):
return Language(QLocale(value)) if value != "Auto" else Language.AUTO
class PlatformAwareTranscribeModelValidator(OptionsValidator):
"""平台相关的转录模型验证器,在 macOS 上自动过滤掉 FasterWhisper"""
def __init__(self):
# 不调用父类的 __init__,因为我们要自定义 options
self._options = get_available_transcribe_models()
@property
def options(self):
return self._options
def validate(self, value):
return value in self._options
def correct(self, value):
return value if self.validate(value) else self._options[0]
class Config(QConfig):
"""应用配置"""
# LLM配置
llm_service = OptionsConfigItem(
"LLM",
"LLMService",
LLMServiceEnum.OPENAI,
OptionsValidator(LLMServiceEnum),
EnumSerializer(LLMServiceEnum),
)
openai_model = ConfigItem("LLM", "OpenAI_Model", "gpt-4o-mini")
openai_api_key = ConfigItem("LLM", "OpenAI_API_Key", "")
openai_api_base = ConfigItem("LLM", "OpenAI_API_Base", "https://api.openai.com/v1")
silicon_cloud_model = ConfigItem("LLM", "SiliconCloud_Model", "gpt-4o-mini")
silicon_cloud_api_key = ConfigItem("LLM", "SiliconCloud_API_Key", "")
silicon_cloud_api_base = ConfigItem(
"LLM", "SiliconCloud_API_Base", "https://api.siliconflow.cn/v1"
)
deepseek_model = ConfigItem("LLM", "DeepSeek_Model", "deepseek-chat")
deepseek_api_key = ConfigItem("LLM", "DeepSeek_API_Key", "")
deepseek_api_base = ConfigItem(
"LLM", "DeepSeek_API_Base", "https://api.deepseek.com/v1"
)
ollama_model = ConfigItem("LLM", "Ollama_Model", "llama2")
ollama_api_key = ConfigItem("LLM", "Ollama_API_Key", "ollama")
ollama_api_base = ConfigItem("LLM", "Ollama_API_Base", "http://localhost:11434/v1")
lm_studio_model = ConfigItem("LLM", "LmStudio_Model", "qwen2.5:7b")
lm_studio_api_key = ConfigItem("LLM", "LmStudio_API_Key", "lmstudio")
lm_studio_api_base = ConfigItem(
"LLM", "LmStudio_API_Base", "http://localhost:1234/v1"
)
gemini_model = ConfigItem("LLM", "Gemini_Model", "gemini-pro")
gemini_api_key = ConfigItem("LLM", "Gemini_API_Key", "")
gemini_api_base = ConfigItem(
"LLM",
"Gemini_API_Base",
"https://generativelanguage.googleapis.com/v1beta/openai/",
)
chatglm_model = ConfigItem("LLM", "ChatGLM_Model", "glm-4")
chatglm_api_key = ConfigItem("LLM", "ChatGLM_API_Key", "")
chatglm_api_base = ConfigItem(
"LLM", "ChatGLM_API_Base", "https://open.bigmodel.cn/api/paas/v4"
)
# ------------------- 翻译配置 -------------------
translator_service = OptionsConfigItem(
"Translate",
"TranslatorServiceEnum",
TranslatorServiceEnum.BING,
OptionsValidator(TranslatorServiceEnum),
EnumSerializer(TranslatorServiceEnum),
)
need_reflect_translate = ConfigItem(
"Translate", "NeedReflectTranslate", False, BoolValidator()
)
deeplx_endpoint = ConfigItem("Translate", "DeeplxEndpoint", "")
batch_size = RangeConfigItem("Translate", "BatchSize", 10, RangeValidator(5, 50))
thread_num = RangeConfigItem("Translate", "ThreadNum", 10, RangeValidator(1, 50))
# ------------------- 转录配置 -------------------
transcribe_model = OptionsConfigItem(
"Transcribe",
"TranscribeModel",
TranscribeModelEnum.BIJIAN,
PlatformAwareTranscribeModelValidator(),
EnumSerializer(TranscribeModelEnum),
)
transcribe_output_format = OptionsConfigItem(
"Transcribe",
"OutputFormat",
TranscribeOutputFormatEnum.SRT,
OptionsValidator(TranscribeOutputFormatEnum),
EnumSerializer(TranscribeOutputFormatEnum),
)
transcribe_language = OptionsConfigItem(
"Transcribe",
"TranscribeLanguage",
TranscribeLanguageEnum.AUTO,
OptionsValidator(TranscribeLanguageEnum),
EnumSerializer(TranscribeLanguageEnum),
)
# ------------------- Whisper Cpp 配置 -------------------
whisper_model = OptionsConfigItem(
"Whisper",
"WhisperModel",
WhisperModelEnum.TINY,
OptionsValidator(WhisperModelEnum),
EnumSerializer(WhisperModelEnum),
)
# ------------------- Faster Whisper 配置 -------------------
faster_whisper_program = ConfigItem(
"FasterWhisper",
"Program",
"faster-whisper-xxl.exe",
)
faster_whisper_model = OptionsConfigItem(
"FasterWhisper",
"Model",
FasterWhisperModelEnum.TINY,
OptionsValidator(FasterWhisperModelEnum),
EnumSerializer(FasterWhisperModelEnum),
)
faster_whisper_model_dir = ConfigItem("FasterWhisper", "ModelDir", "")
faster_whisper_device = OptionsConfigItem(
"FasterWhisper", "Device", "cuda", OptionsValidator(["cuda", "cpu"])
)
# VAD 参数
faster_whisper_vad_filter = ConfigItem(
"FasterWhisper", "VadFilter", True, BoolValidator()
)
faster_whisper_vad_threshold = RangeConfigItem(
"FasterWhisper", "VadThreshold", 0.4, RangeValidator(0, 1)
)
faster_whisper_vad_method = OptionsConfigItem(
"FasterWhisper",
"VadMethod",
VadMethodEnum.SILERO_V4,
OptionsValidator(VadMethodEnum),
EnumSerializer(VadMethodEnum),
)
# 人声提取
faster_whisper_ff_mdx_kim2 = ConfigItem(
"FasterWhisper", "FfMdxKim2", False, BoolValidator()
)
# 文本处理参数
faster_whisper_one_word = ConfigItem(
"FasterWhisper", "OneWord", True, BoolValidator()
)
# 提示词
faster_whisper_prompt = ConfigItem("FasterWhisper", "Prompt", "")
# ------------------- Whisper API 配置 -------------------
whisper_api_base = ConfigItem("WhisperAPI", "WhisperApiBase", "")
whisper_api_key = ConfigItem("WhisperAPI", "WhisperApiKey", "")
whisper_api_model = OptionsConfigItem("WhisperAPI", "WhisperApiModel", "")
whisper_api_prompt = ConfigItem("WhisperAPI", "WhisperApiPrompt", "")
# ------------------- 字幕配置 -------------------
need_optimize = ConfigItem("Subtitle", "NeedOptimize", False, BoolValidator())
need_translate = ConfigItem("Subtitle", "NeedTranslate", False, BoolValidator())
need_split = ConfigItem("Subtitle", "NeedSplit", False, BoolValidator())
target_language = OptionsConfigItem(
"Subtitle",
"TargetLanguage",
TargetLanguage.SIMPLIFIED_CHINESE,
OptionsValidator(TargetLanguage),
EnumSerializer(TargetLanguage),
)
max_word_count_cjk = ConfigItem(
"Subtitle", "MaxWordCountCJK", 28, RangeValidator(8, 100)
)
max_word_count_english = ConfigItem(
"Subtitle", "MaxWordCountEnglish", 20, RangeValidator(8, 100)
)
custom_prompt_text = ConfigItem("Subtitle", "CustomPromptText", "")
# ------------------- 字幕合成配置 -------------------
soft_subtitle = ConfigItem("Video", "SoftSubtitle", False, BoolValidator())
need_video = ConfigItem("Video", "NeedVideo", True, BoolValidator())
video_quality = OptionsConfigItem(
"Video",
"VideoQuality",
VideoQualityEnum.MEDIUM,
OptionsValidator(VideoQualityEnum),
EnumSerializer(VideoQualityEnum),
)
use_subtitle_style = ConfigItem("Video", "UseSubtitleStyle", False, BoolValidator())
# ------------------- 字幕样式配置 -------------------
subtitle_style_name = ConfigItem("SubtitleStyle", "StyleName", "default")
subtitle_layout = OptionsConfigItem(
"SubtitleStyle",
"Layout",
SubtitleLayoutEnum.TRANSLATE_ON_TOP,
OptionsValidator(SubtitleLayoutEnum),
EnumSerializer(SubtitleLayoutEnum),
)
subtitle_preview_image = ConfigItem("SubtitleStyle", "PreviewImage", "")
# 字幕渲染模式
subtitle_render_mode = OptionsConfigItem(
"SubtitleStyle",
"RenderMode",
SubtitleRenderModeEnum.ROUNDED_BG,
OptionsValidator(SubtitleRenderModeEnum),
EnumSerializer(SubtitleRenderModeEnum),
)
# 圆角背景模式配置
rounded_bg_font_name = ConfigItem("RoundedBgStyle", "FontName", "LXGW WenKai")
rounded_bg_font_size = RangeConfigItem(
"RoundedBgStyle", "FontSize", 52, RangeValidator(16, 120)
)
# 背景色:深灰半透明 (R=25, G=25, B=25, A=200)
rounded_bg_color = ConfigItem("RoundedBgStyle", "BgColor", "#191919C8")
rounded_bg_text_color = ConfigItem("RoundedBgStyle", "TextColor", "#FFFFFF")
rounded_bg_corner_radius = RangeConfigItem(
"RoundedBgStyle", "CornerRadius", 12, RangeValidator(0, 50)
)
rounded_bg_padding_h = RangeConfigItem(
"RoundedBgStyle", "PaddingH", 28, RangeValidator(4, 100)
)
rounded_bg_padding_v = RangeConfigItem(
"RoundedBgStyle", "PaddingV", 14, RangeValidator(4, 50)
)
rounded_bg_margin_bottom = RangeConfigItem(
"RoundedBgStyle", "MarginBottom", 60, RangeValidator(20, 300)
)
rounded_bg_line_spacing = RangeConfigItem(
"RoundedBgStyle", "LineSpacing", 10, RangeValidator(0, 50)
)
rounded_bg_letter_spacing = RangeConfigItem(
"RoundedBgStyle", "LetterSpacing", 0, RangeValidator(0, 20)
)
# ------------------- 保存配置 -------------------
work_dir = ConfigItem("Save", "Work_Dir", WORK_PATH, FolderValidator())
# ------------------- 软件页面配置 -------------------
micaEnabled = ConfigItem("MainWindow", "MicaEnabled", False, BoolValidator())
dpiScale = OptionsConfigItem(
"MainWindow",
"DpiScale",
"Auto",
OptionsValidator([1, 1.25, 1.5, 1.75, 2, "Auto"]),
restart=True,
)
language = OptionsConfigItem(
"MainWindow",
"Language",
Language.AUTO,
OptionsValidator(Language),
LanguageSerializer(),
restart=True,
)
# ------------------- 更新配置 -------------------
checkUpdateAtStartUp = ConfigItem(
"Update", "CheckUpdateAtStartUp", True, BoolValidator()
)
# ------------------- 缓存配置 -------------------
cache_enabled = ConfigItem("Cache", "CacheEnabled", True, BoolValidator())
cfg = Config()
cfg.themeMode.value = Theme.DARK
cfg.themeColor.value = QColor("#ff28f08b")
qconfig.load(SETTINGS_PATH, cfg)
================================================
FILE: app/common/signal_bus.py
================================================
from PyQt5.QtCore import QObject, QUrl, pyqtSignal
class SignalBus(QObject):
# 字幕排布信号
subtitle_layout_changed = pyqtSignal(str)
# 字幕优化信号
subtitle_optimization_changed = pyqtSignal(bool)
# 字幕翻译信号
subtitle_translation_changed = pyqtSignal(bool)
# 翻译语言
target_language_changed = pyqtSignal(str)
# 转录模型
transcription_model_changed = pyqtSignal(str)
# 软字幕信号
soft_subtitle_changed = pyqtSignal(bool)
# 视频合成信号
need_video_changed = pyqtSignal(bool)
# 视频质量信号
video_quality_changed = pyqtSignal(str)
# 使用样式信号
use_subtitle_style_changed = pyqtSignal(bool)
# 渲染模式变更信号
subtitle_render_mode_changed = pyqtSignal(str)
# 新增视频控制相关信号
video_play = pyqtSignal() # 播放信号
video_pause = pyqtSignal() # 暂停信号
video_stop = pyqtSignal() # 停止信号
video_source_changed = pyqtSignal(QUrl) # 视频源改变信号
video_segment_play = pyqtSignal(int, int) # 播放片段信号,参数为开始和结束时间(ms)
video_subtitle_added = pyqtSignal(str) # 添加字幕文件信号
# 新增视频控制相关方法
def play_video(self):
"""触发视频播放"""
self.video_play.emit()
def pause_video(self):
"""触发视频暂停"""
self.video_pause.emit()
def stop_video(self):
"""触发视频停止"""
self.video_stop.emit()
def set_video_source(self, url: QUrl):
"""设置视频源
Args:
url: 视频文件的URL
"""
self.video_source_changed.emit(url)
def play_video_segment(self, start_time: int, end_time: int):
"""播放指定时间段的视频
Args:
start_time: 开始时间(毫秒)
end_time: 结束时间(毫秒)
"""
self.video_segment_play.emit(start_time, end_time)
def add_subtitle(self, subtitle_file: str):
"""添加字幕文件
Args:
subtitle_file: 字幕文件路径
"""
self.video_subtitle_added.emit(subtitle_file)
signalBus = SignalBus()
================================================
FILE: app/components/DonateDialog.py
================================================
import os
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QPixmap
from PyQt5.QtWidgets import QHBoxLayout, QLabel, QVBoxLayout
from qfluentwidgets import BodyLabel, MessageBoxBase
from app.config import ASSETS_PATH
class DonateDialog(MessageBoxBase):
def __init__(self, parent=None):
super().__init__(parent)
# 定义二维码路径
self.WECHAT_QR_PATH = os.path.join(ASSETS_PATH, "donate_green.jpg")
self.ALIPAY_QR_PATH = os.path.join(ASSETS_PATH, "donate_blue.jpg")
self.setup_ui()
self.setWindowTitle(self.tr("支持作者"))
def setup_ui(self):
# 创建标题标签
self.titleLabel = BodyLabel(self.tr("感谢支持"), self)
# 创建说明文本
self.descLabel = BodyLabel(
self.tr(
"目前本人精力有限,您的支持让我有动力继续折腾这个项目!\n感谢您对开源事业的热爱与支持!"
),
self,
)
self.descLabel.setAlignment(Qt.AlignCenter) # type: ignore
# 创建水平布局放置两个二维码
self.qrLayout = QHBoxLayout()
# 创建支付宝二维码标签
self.alipayContainer = QVBoxLayout()
self.alipayQR = QLabel()
self.alipayQR.setPixmap(
QPixmap(self.ALIPAY_QR_PATH).scaled(
300,
300,
Qt.AspectRatioMode.KeepAspectRatio,
Qt.SmoothTransformation, # type: ignore
)
)
self.alipayLabel = BodyLabel(self.tr("支付宝"))
self.alipayLabel.setAlignment(Qt.AlignCenter) # type: ignore
self.alipayContainer.addWidget(self.alipayQR, alignment=Qt.AlignCenter) # type: ignore
self.alipayContainer.addWidget(self.alipayLabel)
# 创建微信二维码标签
self.wechatContainer = QVBoxLayout()
self.wechatQR = QLabel()
self.wechatQR.setPixmap(
QPixmap(self.WECHAT_QR_PATH).scaled(
300,
300,
Qt.AspectRatioMode.KeepAspectRatio,
Qt.SmoothTransformation, # type: ignore
)
)
self.wechatLabel = BodyLabel(self.tr("微信"))
self.wechatLabel.setAlignment(Qt.AlignCenter) # type: ignore
self.wechatContainer.addWidget(self.wechatQR, alignment=Qt.AlignCenter) # type: ignore
self.wechatContainer.addWidget(self.wechatLabel)
# 将二维码添加到水平布局
self.qrLayout.addLayout(self.alipayContainer)
self.qrLayout.addLayout(self.wechatContainer)
self.viewLayout.setSpacing(30)
# 添加到主布局
self.viewLayout.addWidget(self.titleLabel)
self.viewLayout.addWidget(self.descLabel)
# 添加垂直间距
self.viewLayout.addLayout(self.qrLayout)
# 设置对话框最小宽度
self.widget.setMinimumWidth(800)
# 设置对话框最小高度
self.widget.setMinimumHeight(500)
# 隐藏是按钮,只显示取消按钮
self.yesButton.hide()
self.cancelButton.setText(self.tr("关闭"))
================================================
FILE: app/components/EditComboBoxSettingCard.py
================================================
from typing import List, Optional, Union
from PyQt5.QtCore import Qt, pyqtSignal
from PyQt5.QtGui import QIcon
from PyQt5.QtWidgets import QCompleter
from qfluentwidgets import EditableComboBox, SettingCard
from qfluentwidgets.common.config import ConfigItem, qconfig
class EditComboBoxSettingCard(SettingCard):
"""可编辑的下拉框设置卡片"""
currentTextChanged = pyqtSignal(str)
def __init__(
self,
configItem: ConfigItem,
icon: Union[str, QIcon],
title: str,
content: Optional[str] = None,
items: Optional[List[str]] = None,
parent=None,
):
super().__init__(icon, title, content, parent)
self.configItem = configItem
self.items = items or []
# 创建可编辑的组合框
self.comboBox = EditableComboBox(self)
for item in self.items:
self.comboBox.addItem(item)
# 设置搜索功能
self._setupCompleter()
# 设置布局
self.hBoxLayout.addWidget(self.comboBox, 1, Qt.AlignRight) # type: ignore
self.hBoxLayout.addSpacing(16)
# 设置最小宽度
self.comboBox.setMinimumWidth(280)
# 设置初始值
self.setValue(qconfig.get(configItem))
# 连接信号
self.comboBox.currentTextChanged.connect(self.__onTextChanged)
configItem.valueChanged.connect(self.setValue)
def _setupCompleter(self):
"""设置搜索自动完成功能"""
if not self.items:
return
completer = QCompleter(self.items, self)
completer.setCaseSensitivity(Qt.CaseInsensitive) # type: ignore # 不区分大小写
completer.setFilterMode(Qt.MatchContains) # type: ignore # 包含匹配
self.comboBox.setCompleter(completer)
def __onTextChanged(self, text: str):
"""当文本改变时触发"""
self.setValue(text)
self.currentTextChanged.emit(text)
def setValue(self, value: str):
"""设置值"""
qconfig.set(self.configItem, value)
self.comboBox.setText(value)
def addItems(self, items: List[str]):
"""添加选项"""
for item in items:
self.comboBox.addItem(item)
self.items.extend(items)
self._setupCompleter()
def setItems(self, items: List[str]):
"""重新设置选项列表"""
self.comboBox.clear()
self.items = items
for item in items:
self.comboBox.addItem(item)
self._setupCompleter()
================================================
FILE: app/components/FasterWhisperSettingWidget.py
================================================
import os
import subprocess
from pathlib import Path
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from PyQt5.QtGui import QShowEvent
from PyQt5.QtWidgets import (
QHBoxLayout,
QHeaderView,
QTableWidgetItem,
QVBoxLayout,
QWidget,
)
from qfluentwidgets import (
BodyLabel,
ComboBox,
ComboBoxSettingCard,
HyperlinkButton,
HyperlinkCard,
InfoBar,
InfoBarPosition,
MessageBoxBase,
ProgressBar,
PushButton,
SettingCardGroup,
SingleDirectionScrollArea,
SubtitleLabel,
SwitchSettingCard,
TableItemDelegate,
TableWidget,
)
from qfluentwidgets import FluentIcon as FIF
from app.common.config import cfg
from app.components.LineEditSettingCard import LineEditSettingCard
from app.components.SpinBoxSettingCard import DoubleSpinBoxSettingCard
from app.config import BIN_PATH, MODEL_PATH
from app.core.entities import (
FasterWhisperModelEnum,
TranscribeLanguageEnum,
VadMethodEnum,
)
from app.core.utils.platform_utils import open_folder
from app.thread.file_download_thread import FileDownloadThread
from app.thread.modelscope_download_thread import ModelscopeDownloadThread
# 在文件开头添加常量定义
FASTER_WHISPER_PROGRAMS = [
{
"label": "GPU(cuda) + CPU 版本",
"value": "faster-whisper-gpu.7z",
"type": "GPU",
"size": "1.35 GB",
"downloadLink": "https://modelscope.cn/models/bkfengg/whisper-cpp/resolve/master/Faster-Whisper-XXL_r245.2_windows.7z",
},
{
"label": "CPU版本",
"value": "faster-whisper.exe",
"type": "CPU",
"size": "78.7 MB",
"downloadLink": "https://modelscope.cn/models/bkfengg/whisper-cpp/resolve/master/whisper-faster.exe",
},
]
FASTER_WHISPER_MODELS = [
{
"label": "Tiny",
"value": "faster-whisper-tiny",
"size": "77824",
"downloadLink": "https://huggingface.co/Systran/faster-whisper-tiny",
"modelScopeLink": "pengzhendong/faster-whisper-tiny",
},
{
"label": "Base",
"value": "faster-whisper-base",
"size": "148480",
"downloadLink": "https://huggingface.co/Systran/faster-whisper-base",
"modelScopeLink": "pengzhendong/faster-whisper-base",
},
{
"label": "Small",
"value": "faster-whisper-small",
"size": "495616",
"downloadLink": "https://huggingface.co/Systran/faster-whisper-small",
"modelScopeLink": "pengzhendong/faster-whisper-small",
},
{
"label": "Medium",
"value": "faster-whisper-medium",
"size": "1572864",
"downloadLink": "https://huggingface.co/Systran/faster-whisper-medium",
"modelScopeLink": "pengzhendong/faster-whisper-medium",
},
{
"label": "Large-v1",
"value": "faster-whisper-large-v1",
"size": "3145728",
"downloadLink": "https://huggingface.co/Systran/faster-whisper-large-v1",
"modelScopeLink": "pengzhendong/faster-whisper-large-v1",
},
{
"label": "Large-v2",
"value": "faster-whisper-large-v2",
"size": "3145728",
"downloadLink": "https://huggingface.co/Systran/faster-whisper-large-v2",
"modelScopeLink": "pengzhendong/faster-whisper-large-v2",
},
{
"label": "Large-v3",
"value": "faster-whisper-large-v3",
"size": "3145728",
"downloadLink": "https://huggingface.co/Systran/faster-whisper-large-v3",
"modelScopeLink": "pengzhendong/faster-whisper-large-v3",
},
{
"label": "Large-v3-turbo",
"value": "faster-whisper-large-v3-turbo",
"size": "1720320",
"downloadLink": "https://huggingface.co/Systran/faster-whisper-large-v3-turbo",
"modelScopeLink": "pengzhendong/faster-whisper-large-v3-turbo",
},
]
# 在类外添加这个工具函数
def check_faster_whisper_exists() -> tuple[bool, list[str]]:
"""检查 faster-whisper 程序是否存在
检查以下两种情况:
1. bin目录下是否有 faster-whisper.exe
2. bin目录下是否有 Faster-Whisper-XXL/faster-whisper-xxl.exe
Returns:
tuple[bool, list[str]]: (是否存在程序, 已安装的版本列表)
"""
bin_path = Path(BIN_PATH)
installed_versions = []
# 检查 faster-whisper.exe(CPU版本)
if (bin_path / "faster-whisper.exe").exists():
installed_versions.append("CPU")
# 检查 Faster-Whisper-XXL/faster-whisper-xxl.exe(GPU版本)
xxl_path = bin_path / "Faster-Whisper-XXL" / "faster-whisper-xxl.exe"
if xxl_path.exists():
installed_versions.extend(["GPU", "CPU"])
installed_versions = list(set(installed_versions))
return bool(installed_versions), installed_versions
# 添加新的解压线程类
class UnzipThread(QThread):
"""7z解压线程"""
finished = pyqtSignal() # 解压完成信号
error = pyqtSignal(str) # 解压错误信号
def __init__(self, zip_file, extract_path):
super().__init__()
self.zip_file = zip_file
self.extract_path = extract_path
def run(self):
try:
subprocess.run(
["7z", "x", self.zip_file, f"-o{self.extract_path}", "-y"],
check=True,
creationflags=subprocess.CREATE_NO_WINDOW if os.name == "nt" else 0,
)
# 删除压缩包
os.remove(self.zip_file)
self.finished.emit()
except subprocess.CalledProcessError as e:
self.error.emit(f"解压失败: {str(e)}")
except Exception as e:
self.error.emit(str(e))
class FasterWhisperDownloadDialog(MessageBoxBase):
"""Faster Whisper 下载对话框"""
# 添加类变量跟踪下载状态
is_downloading = False
def __init__(self, parent=None, setting_widget=None):
super().__init__(parent)
self.widget.setMinimumWidth(600)
self.program_download_thread = None
self.model_download_thread = None
self._setup_ui()
self._connect_signals()
self.setting_widget = setting_widget
def _setup_ui(self):
"""设置UI"""
layout = QVBoxLayout()
self._setup_program_section(layout)
layout.addSpacing(20)
self._setup_model_section(layout)
self._setup_progress_section(layout)
self.viewLayout.addLayout(layout)
self.cancelButton.setText(self.tr("关闭"))
self.yesButton.hide()
def _setup_program_section(self, layout):
"""设置程序下载部分UI"""
# 标题和按钮的水平布局
title_layout = QHBoxLayout()
# 标题
faster_whisper_title = SubtitleLabel(self.tr("Faster Whisper 下载"), self)
title_layout.addWidget(faster_whisper_title)
# 添加打开文件夹按钮
open_folder_btn = HyperlinkButton("", self.tr("打开程序文件夹"), parent=self)
open_folder_btn.setIcon(FIF.FOLDER)
open_folder_btn.clicked.connect(self._open_program_folder)
title_layout.addStretch()
title_layout.addWidget(open_folder_btn)
layout.addLayout(title_layout)
layout.addSpacing(8)
# 检查已安装的版本
has_program, installed_versions = check_faster_whisper_exists()
if has_program:
# 显示已安装版本
versions_text = " + ".join(installed_versions)
program_status = BodyLabel(self.tr(f"已安装版本: {versions_text}"), self)
program_status.setStyleSheet("color: green")
layout.addWidget(program_status)
# 添加说明标签
if len(installed_versions) == 1:
desc_label = BodyLabel(self.tr("您可以继续下载其他版本:"), self)
layout.addWidget(desc_label)
else:
desc_label = BodyLabel(self.tr("未下载Faster Whisper 程序"), self)
layout.addWidget(desc_label)
# 下载控件
program_layout = QHBoxLayout()
self.program_combo = ComboBox(self)
self.program_combo.setFixedWidth(300)
self.program_combo.hide()
# 只显示未安装的版本
for program in FASTER_WHISPER_PROGRAMS:
version_type = program["type"]
if version_type not in installed_versions:
self.program_combo.addItem(f"{program['label']} ({program['size']})")
# 如果还有可下载的版本,显示下载控件
if self.program_combo.count() > 0:
self.program_combo.show()
self.program_download_btn = PushButton(self.tr("下载程序"), self)
self.program_download_btn.clicked.connect(self._start_download)
program_layout.addWidget(self.program_combo)
program_layout.addWidget(self.program_download_btn)
program_layout.addStretch()
layout.addLayout(program_layout)
def _setup_model_section(self, layout):
"""设置模型下载部分UI"""
# 标题和按钮的水平布局
title_layout = QHBoxLayout()
# 标题
model_title = SubtitleLabel(self.tr("模型下载"), self)
title_layout.addWidget(model_title)
# 添加打开文件夹按钮
open_folder_btn = HyperlinkButton("", self.tr("打开模型文件夹"), parent=self)
open_folder_btn.setIcon(FIF.FOLDER)
open_folder_btn.clicked.connect(self._open_model_folder)
title_layout.addStretch()
title_layout.addWidget(open_folder_btn)
layout.addLayout(title_layout)
layout.addSpacing(8)
# 模型表格
self.model_table = self._create_model_table()
self._populate_model_table()
layout.addWidget(self.model_table)
def _create_model_table(self):
"""创建模型表格"""
table = TableWidget(self)
table.setEditTriggers(TableWidget.NoEditTriggers)
table.setSelectionMode(TableWidget.NoSelection)
table.setColumnCount(4)
table.setHorizontalHeaderLabels(
[self.tr("模型名称"), self.tr("大小"), self.tr("状态"), self.tr("操作")]
)
# 设置表格样式
table.setBorderVisible(True)
table.setBorderRadius(8)
table.setItemDelegate(TableItemDelegate(table))
# 设置列宽
header = table.horizontalHeader()
header.setSectionResizeMode(0, QHeaderView.Stretch)
header.setSectionResizeMode(1, QHeaderView.Fixed)
header.setSectionResizeMode(2, QHeaderView.Fixed)
header.setSectionResizeMode(3, QHeaderView.Fixed)
table.setColumnWidth(1, 100)
table.setColumnWidth(2, 80)
table.setColumnWidth(3, 150)
# 设置行高
row_height = 45
table.verticalHeader().setDefaultSectionSize(row_height)
# 设置表格高度
header_height = 20
max_visible_rows = 6
table_height = row_height * max_visible_rows + header_height + 15
table.setFixedHeight(table_height)
return table
def _setup_progress_section(self, layout):
"""设置进度显示部分UI"""
self.progress_bar = ProgressBar(self)
self.progress_label = BodyLabel("", self)
self.progress_bar.hide()
self.progress_label.hide()
layout.addWidget(self.progress_bar)
layout.addWidget(self.progress_label)
def _populate_model_table(self):
"""填充模型表格数据"""
self.model_table.setRowCount(len(FASTER_WHISPER_MODELS))
for i, model in enumerate(FASTER_WHISPER_MODELS):
self._add_model_row(i, model)
def _add_model_row(self, row, model):
"""添加模型表格行"""
# 模型名称
name_item = QTableWidgetItem(model["label"])
name_item.setTextAlignment(Qt.AlignCenter) # type: ignore
self.model_table.setItem(row, 0, name_item)
# 大小
size_item = QTableWidgetItem(f"{int(model['size']) / 1024:.1f} MB")
size_item.setTextAlignment(Qt.AlignCenter) # type: ignore
self.model_table.setItem(row, 1, size_item)
# 状态 - 检查model.bin文件是否存在
model_path = os.path.join(MODEL_PATH, model["value"])
model_bin_path = os.path.join(model_path, "model.bin")
is_downloaded = os.path.exists(model_bin_path)
status_item = QTableWidgetItem(
self.tr("已下载") if is_downloaded else self.tr("未下载")
)
if is_downloaded:
status_item.setForeground(Qt.green) # type: ignore
status_item.setTextAlignment(Qt.AlignCenter) # type: ignore
self.model_table.setItem(row, 2, status_item)
# 下载按钮
button_container = QWidget()
button_layout = QHBoxLayout(button_container)
button_layout.setContentsMargins(4, 4, 4, 4)
download_btn = HyperlinkButton(
"",
self.tr("重新下载") if is_downloaded else self.tr("下载"),
parent=self,
)
download_btn.setIcon(FIF.DOWNLOAD)
download_btn.clicked.connect(lambda checked, r=row: self._download_model(r))
button_layout.addStretch()
button_layout.addWidget(download_btn)
button_layout.addStretch()
self.model_table.setCellWidget(row, 3, button_container)
def _connect_signals(self):
"""连接信号"""
self.rejected.connect(self._on_dialog_reject)
def _start_download(self):
"""开始下载"""
if FasterWhisperDownloadDialog.is_downloading:
InfoBar.warning(
self.tr("下载进行中"),
self.tr("请等待当前下载任务完成"),
duration=3000,
parent=self,
)
return
FasterWhisperDownloadDialog.is_downloading = True
# 禁用所有下载按钮
self._set_all_download_buttons_enabled(False)
# 获取选中的文本
selected_text = self.program_combo.currentText()
# 从显示文本中提取程序标签
selected_label = selected_text.split(" (")[0]
# 根据标签找到对应的程序配置
program = next(
(p for p in FASTER_WHISPER_PROGRAMS if p["label"] == selected_label), None
)
if not program:
InfoBar.error(
self.tr("下载错误"),
self.tr("未找到对应的程序配置"),
duration=3000,
parent=self,
)
FasterWhisperDownloadDialog.is_downloading = False
self._set_all_download_buttons_enabled(True)
return
# 确保 BIN_PATH 目录存在
os.makedirs(BIN_PATH, exist_ok=True)
self.progress_bar.show()
self.progress_label.show()
self.program_download_btn.setEnabled(False)
self.program_combo.setEnabled(False)
# 直接下载到bin目录
save_path = os.path.join(BIN_PATH, program["value"])
self.program_download_thread = FileDownloadThread(
program["downloadLink"], save_path
)
self.program_download_thread.progress.connect(
self._on_program_download_progress
)
self.program_download_thread.finished.connect(
lambda: self._on_program_download_finished(save_path)
)
self.program_download_thread.error.connect(self._on_program_download_error)
self.program_download_thread.start()
def _on_program_download_progress(self, value, status_msg):
"""更新程序下载进度"""
self.progress_bar.setValue(int(value))
self.progress_label.setText(status_msg)
def _on_program_download_finished(self, save_path):
"""程序下载完成处理"""
try:
# 检查是否是 CPU 版本的直接下载
if save_path.endswith(".exe"):
# 如果是exe文件,重命名为faster-whisper.exe
os.rename(save_path, os.path.join(BIN_PATH, "faster-whisper.exe"))
self._finish_program_installation()
else:
# GPU 版本需要解压
self.progress_label.setText(self.tr("正在解压文件..."))
# 创建并启动解压线程
self.unzip_thread = UnzipThread(save_path, BIN_PATH)
self.unzip_thread.finished.connect(self._finish_program_installation)
self.unzip_thread.error.connect(self._on_unzip_error)
self.unzip_thread.start()
return # 提前返回,等待解压完成
except Exception as e:
InfoBar.error(self.tr("安装失败"), str(e), duration=3000, parent=self)
self._cleanup_installation()
def _on_program_download_error(self, error):
"""程序下载错误处理"""
InfoBar.error(self.tr("下载失败"), error, duration=3000, parent=self)
FasterWhisperDownloadDialog.is_downloading = False
self._set_all_download_buttons_enabled(True)
self.program_download_btn.setEnabled(True)
self.program_combo.setEnabled(True)
self.progress_bar.hide()
self.progress_label.hide()
def _on_dialog_reject(self):
"""对话框关闭处理"""
if self.program_download_thread and self.program_download_thread.isRunning():
self.program_download_thread.stop()
if self.model_download_thread and self.model_download_thread.isRunning():
self.model_download_thread.terminate()
FasterWhisperDownloadDialog.is_downloading = False
self.reject()
def closeEvent(self, event):
"""窗口关闭事件处理"""
self._on_dialog_reject()
super().closeEvent(event)
def _download_model(self, row):
"""下载选中的模型"""
if FasterWhisperDownloadDialog.is_downloading:
InfoBar.warning(
self.tr("下载进行中"),
self.tr("请等待当前下载任务完成"),
duration=3000,
parent=self,
)
return
FasterWhisperDownloadDialog.is_downloading = True
self._set_all_download_buttons_enabled(False)
model = FASTER_WHISPER_MODELS[row]
self.progress_bar.show()
self.progress_label.show()
self.progress_label.setText(self.tr(f"正在下载 {model['label']} 模型..."))
# 禁用当前行的下载按钮
button_container = self.model_table.cellWidget(row, 3)
download_btn = button_container.findChild(HyperlinkButton)
if download_btn:
download_btn.setEnabled(False)
# 创建并启动下载线程,保存到类属性
self.model_download_thread = ModelscopeDownloadThread(
model["modelScopeLink"], os.path.join(MODEL_PATH, model["value"])
)
def _on_model_download_progress(value, msg):
self.progress_bar.setValue(value)
self.progress_label.setText(msg)
def _on_model_download_finished():
FasterWhisperDownloadDialog.is_downloading = False
self._set_all_download_buttons_enabled(True)
# 更新状态
status_item = QTableWidgetItem(self.tr("已下载"))
status_item.setForeground(Qt.green) # type: ignore
status_item.setTextAlignment(Qt.AlignCenter) # type: ignore
self.model_table.setItem(row, 2, status_item)
# 更新下载按钮文本
if download_btn:
download_btn.setText(self.tr("重新下载"))
download_btn.setEnabled(True)
model = FASTER_WHISPER_MODELS[row]
# 更新主设置对话框的模型选择
if self.setting_widget:
# 保存当前值并清空
current_value = cfg.faster_whisper_model.value
combo = self.setting_widget.model_card.comboBox
combo.clear()
# 找出已下载的模型
available = []
model_map = {
m["label"].lower(): m["value"] for m in FASTER_WHISPER_MODELS
}
for enum_val in FasterWhisperModelEnum:
if enum_val.value in model_map:
if (MODEL_PATH / model_map[enum_val.value]).exists():
available.append(enum_val)
# 重建下拉框
self.setting_widget.model_card.optionToText = {
e: e.value for e in available
}
for enum_val in available:
combo.addItem(enum_val.value, userData=enum_val)
# 恢复选择
if current_value in available:
combo.setCurrentText(current_value.value)
elif combo.count() > 0:
combo.setCurrentIndex(0)
InfoBar.success(
self.tr("下载成功"),
self.tr(f"{model['label']} 模型已下载完成"),
duration=3000,
parent=self,
)
self.progress_bar.hide()
self.progress_label.hide()
def _on_model_download_error(error):
FasterWhisperDownloadDialog.is_downloading = False
self._set_all_download_buttons_enabled(True)
if download_btn:
download_btn.setEnabled(True)
InfoBar.error(self.tr("下载失败"), str(error), duration=3000, parent=self)
self.progress_bar.hide()
self.progress_label.hide()
self.model_download_thread.progress.connect(_on_model_download_progress)
self.model_download_thread.finished.connect(_on_model_download_finished)
self.model_download_thread.error.connect(_on_model_download_error)
self.model_download_thread.start()
def _set_all_download_buttons_enabled(self, enabled: bool):
"""设置所有下载按钮的启用状态"""
# 设置程序下载按钮
if hasattr(self, "program_download_btn"):
self.program_download_btn.setEnabled(enabled)
self.program_combo.setEnabled(enabled)
# 设置所有模型下载按钮
for row in range(self.model_table.rowCount()):
button_container = self.model_table.cellWidget(row, 3)
if button_container:
download_btn = button_container.findChild(HyperlinkButton)
if download_btn:
download_btn.setEnabled(enabled)
def _open_model_folder(self):
"""打开模型文件夹"""
if os.path.exists(MODEL_PATH):
# 根据操作系统打开文件夹
open_folder(str(MODEL_PATH))
def _open_program_folder(self):
"""打开程序文件夹"""
if os.path.exists(BIN_PATH):
# 根据操作系统打开文件夹
open_folder(str(BIN_PATH))
def _finish_program_installation(self):
"""完成程序安装"""
InfoBar.success(
self.tr("安装完成"),
self.tr("Faster Whisper 程序已安装成功"),
duration=3000,
parent=self,
)
self.accept()
self._cleanup_installation()
def _on_unzip_error(self, error_msg):
"""处理解压错误"""
InfoBar.error(self.tr("安装失败"), error_msg, duration=3000, parent=self)
self._cleanup_installation()
def _cleanup_installation(self):
"""清理安装状态"""
FasterWhisperDownloadDialog.is_downloading = False
self._set_all_download_buttons_enabled(True)
self.progress_bar.hide()
self.progress_label.hide()
class FasterWhisperSettingWidget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.setup_ui()
self._connect_signals()
def showEvent(self, a0: QShowEvent) -> None:
super().showEvent(a0)
# 检查Faster Whisper模型是否存在
is_faster_whisper_exists, _ = check_faster_whisper_exists()
if not is_faster_whisper_exists:
self.show_error_info(self.tr("Faster Whisper程序不存在,请先下载程序"))
self._show_model_manager()
return
def setup_ui(self):
self.main_layout = QVBoxLayout(self)
# 创建单向滚动区域和容器
self.scrollArea = SingleDirectionScrollArea(orient=Qt.Vertical, parent=self) # type: ignore
self.scrollArea.setStyleSheet(
"QScrollArea{background: transparent; border: none}"
)
self.container = QWidget(self)
self.container.setStyleSheet("QWidget{background: transparent}")
self.containerLayout = QVBoxLayout(self.container)
self.setting_group = SettingCardGroup(
self.tr("Faster Whisper 设置"), self
)
# 模型选择
self.model_card = ComboBoxSettingCard(
cfg.faster_whisper_model,
FIF.ROBOT,
self.tr("模型"),
self.tr("选择 Faster Whisper 模型"),
[model.value for model in FasterWhisperModelEnum],
self.setting_group,
)
# 检查未下载的模型并从下拉框中移除
for i in range(self.model_card.comboBox.count() - 1, -1, -1):
model_text = self.model_card.comboBox.itemText(i).lower()
model_config = next(
(
model
for model in FASTER_WHISPER_MODELS
if model["label"].lower() == model_text
),
None,
)
if model_config:
model_path = Path(MODEL_PATH) / model_config["value"]
model_bin_path = model_path / "model.bin"
if model_bin_path.exists():
continue
self.model_card.comboBox.removeItem(i)
# 创建管理模型卡片
self.manage_model_card = HyperlinkCard(
"", # 无链接
self.tr("管理模型"),
FIF.DOWNLOAD, # 使用下载图标
self.tr("模型管理"),
self.tr("下载或更新 Faster Whisper 模型"),
self.setting_group, # 添加到设置组
)
# 语言选择
self.language_card = ComboBoxSettingCard(
cfg.transcribe_language,
FIF.LANGUAGE,
self.tr("源语言"),
self.tr("音视频中说话的语言,默认根据前30秒自动识别"),
[lang.value for lang in TranscribeLanguageEnum],
self.setting_group,
)
self.language_card.comboBox.setMaxVisibleItems(6)
# 设备选择
self.device_card = ComboBoxSettingCard(
cfg.faster_whisper_device,
FIF.IOT,
self.tr("运行设备"),
self.tr("模型运行设备"),
["cuda", "cpu"],
self.setting_group,
)
# _, available_devices = check_faster_whisper_exists()
# if "GPU" not in available_devices:
# self.device_card.comboBox.removeItem(0)
# VAD设置组
self.vad_group = SettingCardGroup(self.tr("VAD设置"), self)
# VAD过滤开关
self.vad_filter_card = SwitchSettingCard(
FIF.CHECKBOX,
self.tr("VAD过滤"),
self.tr("过滤无人声语音片断,减少幻觉"),
cfg.faster_whisper_vad_filter,
self.vad_group,
)
# VAD阈值
self.vad_threshold_card = DoubleSpinBoxSettingCard(
cfg.faster_whisper_vad_threshold,
FIF.VOLUME, # type: ignore
self.tr("VAD阈值"),
self.tr("语音概率阈值,高于此值视为语音"),
minimum=0.00,
maximum=1.00,
decimals=2,
step=0.05,
)
# VAD方法
self.vad_method_card = ComboBoxSettingCard(
cfg.faster_whisper_vad_method,
FIF.MUSIC,
self.tr("VAD方法"),
self.tr("选择VAD检测方法"),
[method.value for method in VadMethodEnum],
self.vad_group,
)
# 其他设置组
self.other_group = SettingCardGroup(self.tr("其他设置"), self)
# 音频降噪
self.ff_mdx_kim2_card = SwitchSettingCard(
FIF.MUSIC,
self.tr("人声分离"),
self.tr("处理前使用MDX-Net降噪,分离人声和背景音乐"),
cfg.faster_whisper_ff_mdx_kim2,
self.other_group,
)
# 单词时间戳
self.one_word_card = SwitchSettingCard(
FIF.UNIT,
self.tr("单字时间戳"),
self.tr("开启生成单字级时间戳;关闭后使用原始分段断句"),
cfg.faster_whisper_one_word,
self.other_group,
)
# 提示词
self.prompt_card = LineEditSettingCard(
cfg.faster_whisper_prompt,
FIF.CHAT,
self.tr("提示词"),
self.tr("可选的提示词,默认空"),
"",
self.other_group,
)
# 添加模型设置组的卡片
self.setting_group.addSettingCard(self.model_card)
self.setting_group.addSettingCard(self.manage_model_card)
self.setting_group.addSettingCard(self.device_card)
self.setting_group.addSettingCard(self.language_card)
# 添加VAD设置组的卡片
self.vad_group.addSettingCard(self.vad_filter_card)
self.vad_group.addSettingCard(self.vad_threshold_card)
self.vad_group.addSettingCard(self.vad_method_card)
# 添加其他设置的卡片
self.other_group.addSettingCard(self.ff_mdx_kim2_card)
self.other_group.addSettingCard(self.one_word_card)
self.other_group.addSettingCard(self.prompt_card)
# 将所有设置组添加到容器布局
self.containerLayout.addWidget(self.setting_group)
self.containerLayout.addWidget(self.vad_group)
self.containerLayout.addWidget(self.other_group)
self.containerLayout.addStretch(1)
# 设置组件最小宽度
self.model_card.comboBox.setMinimumWidth(200)
self.device_card.comboBox.setMinimumWidth(200)
self.language_card.comboBox.setMinimumWidth(200)
self.vad_method_card.comboBox.setMinimumWidth(200)
self.prompt_card.lineEdit.setMinimumWidth(200)
# 设置滚动区域
self.scrollArea.setWidget(self.container)
self.scrollArea.setWidgetResizable(True)
# 将滚动区域添加到主布局
self.main_layout.addWidget(self.scrollArea)
def _connect_signals(self):
"""连接信号"""
self.manage_model_card.linkButton.clicked.connect(self._show_model_manager)
self.vad_filter_card.checkedChanged.connect(self._on_vad_filter_changed)
def _on_vad_filter_changed(self, checked: bool):
"""VAD过滤开关状态改变时的处理"""
self.vad_threshold_card.setEnabled(checked)
self.vad_method_card.setEnabled(checked)
def _show_model_manager(self):
"""显示模型管理对话框"""
dialog = FasterWhisperDownloadDialog(self.window(), self)
dialog.exec_()
def show_error_info(self, error_msg):
"""显示错误信息"""
InfoBar.error(
title=self.tr("错误"),
content=error_msg,
parent=self.window(),
duration=5000,
position=InfoBarPosition.BOTTOM,
)
def check_faster_whisper_model(self):
"""检查选定的Faster Whisper模型是否存在
Returns:
bool: 如果模型存在且配置正确返回True,否则返回False
"""
# 检查程序是否存在
has_program, _ = check_faster_whisper_exists()
if not has_program:
self.show_error_info(self.tr("Faster Whisper程序不存在,请先下载程序"))
return False
model_value = cfg.faster_whisper_model.value.value
# 检查模型配置是否存在
model_config = next(
(
m
for m in FASTER_WHISPER_MODELS
if m["label"].lower() == model_value.lower()
),
None,
)
if not model_config:
self.show_error_info(self.tr("模型配置不存在"))
return False
model_path = MODEL_PATH / model_config["value"]
model_files = model_path / "model.bin"
# 检查模型文件是否存在
if not model_path.exists() and not model_files.exists():
self.show_error_info(self.tr("模型文件不存在: ") + model_value)
return False
return True
================================================
FILE: app/components/LanguageSettingDialog.py
================================================
from PyQt5.QtWidgets import QVBoxLayout
from qfluentwidgets import (
ComboBox,
InfoBar,
InfoBarPosition,
MessageBoxBase,
SettingCard,
)
from qfluentwidgets import FluentIcon as FIF
from app.common.config import cfg
from app.core.entities import (
TranscribeLanguageEnum,
TranscribeModelEnum,
get_asr_language_capability,
)
class LanguageSettingDialog(MessageBoxBase):
"""语言设置对话框"""
def __init__(self, model: TranscribeModelEnum, parent=None):
self.model = model
super().__init__(parent)
self.widget.setMinimumWidth(500)
self._setup_ui()
self._connect_signals()
def _get_available_languages(self) -> list[str]:
"""获取当前模型支持的语言列表"""
capability = get_asr_language_capability(self.model)
languages = [lang.value for lang in capability.supported_languages]
if capability.supports_auto:
languages.insert(0, TranscribeLanguageEnum.AUTO.value)
return languages
def _setup_ui(self):
"""设置UI"""
self.yesButton.setText(self.tr("确定"))
self.cancelButton.setText(self.tr("取消"))
# 主布局
layout = QVBoxLayout()
# 使用自定义 SettingCard 代替 ComboBoxSettingCard(因为需要动态选项)
self.language_card = SettingCard(
FIF.LANGUAGE,
self.tr("源语言"),
self.tr("音视频中说话的语言,默认根据前30秒自动识别"),
self,
)
# 创建 ComboBox
self.language_combo = ComboBox(self)
available_languages = self._get_available_languages()
self.language_combo.addItems(available_languages)
self.language_combo.setMaxVisibleItems(6)
self.language_combo.setMinimumWidth(160)
# 设置当前值
current_lang = cfg.transcribe_language.value
if current_lang.value in available_languages:
self.language_combo.setCurrentText(current_lang.value)
elif available_languages:
# 当前选择的语言不在可选列表中,选择第一个
self.language_combo.setCurrentIndex(0)
# 添加 ComboBox 到卡片
self.language_card.hBoxLayout.addWidget(self.language_combo)
self.language_card.hBoxLayout.addSpacing(16)
layout.addWidget(self.language_card)
layout.addStretch(1)
self.viewLayout.addLayout(layout)
def _connect_signals(self):
"""连接信号"""
self.yesButton.clicked.connect(self.__onYesButtonClicked)
def __onYesButtonClicked(self):
# 保存选中的语言到配置
selected_text = self.language_combo.currentText()
for lang in TranscribeLanguageEnum:
if lang.value == selected_text:
cfg.set(cfg.transcribe_language, lang)
break
self.accept()
InfoBar.success(
self.tr("设置已保存"),
self.tr("语言设置已更新"),
duration=3000,
parent=self.window(),
position=InfoBarPosition.BOTTOM,
)
if cfg.transcribe_language.value == TranscribeLanguageEnum.JAPANESE:
InfoBar.warning(
self.tr("请注意身体!!"),
self.tr("小心肝儿,注意身体哦~"),
duration=2000,
parent=self.window(),
position=InfoBarPosition.BOTTOM,
)
================================================
FILE: app/components/LineEditSettingCard.py
================================================
from typing import Optional
from PyQt5.QtCore import Qt, pyqtSignal
from qfluentwidgets import LineEdit, SettingCard
from qfluentwidgets.common.config import ConfigItem, qconfig
class LineEditSettingCard(SettingCard):
"""行输入卡片"""
textChanged = pyqtSignal(str)
def __init__(
self,
configItem: ConfigItem,
icon,
title: str,
content: Optional[str] = None,
placeholder: str = "",
parent=None,
):
super().__init__(icon, title, content, parent)
self.configItem = configItem
self.lineEdit = LineEdit(self)
self.lineEdit.setPlaceholderText(placeholder)
self.hBoxLayout.addWidget(self.lineEdit, 1, Qt.AlignRight) # type: ignore
self.hBoxLayout.addSpacing(16)
self.lineEdit.setMinimumWidth(280)
self.setValue(qconfig.get(configItem))
self.lineEdit.textChanged.connect(self.__onTextChanged)
configItem.valueChanged.connect(self.setValue)
def __onTextChanged(self, text: str):
self.setValue(text)
self.textChanged.emit(text)
def setValue(self, value: str):
qconfig.set(self.configItem, value)
self.lineEdit.setText(value)
================================================
FILE: app/components/MySettingCard.py
================================================
# coding:utf-8
from typing import List, Optional, Union
from PyQt5.QtCore import Qt, pyqtSignal
from PyQt5.QtGui import QColor, QIcon, QPainter
from PyQt5.QtWidgets import QFrame, QHBoxLayout, QLabel, QToolButton, QVBoxLayout
from qfluentwidgets import ColorDialog, ComboBox, CompactDoubleSpinBox, CompactSpinBox
from qfluentwidgets.common.config import isDarkTheme
from qfluentwidgets.common.icon import FluentIconBase, drawIcon
from qfluentwidgets.common.style_sheet import FluentStyleSheet
from qfluentwidgets.components.widgets.icon_widget import IconWidget
class SettingIconWidget(IconWidget):
def paintEvent(self, e):
painter = QPainter(self)
if not self.isEnabled():
painter.setOpacity(0.36)
painter.setRenderHints(QPainter.Antialiasing | QPainter.SmoothPixmapTransform)
drawIcon(self._icon, painter, self.rect())
class SettingCard(QFrame):
"""Setting card"""
def __init__(
self, icon: Union[str, QIcon, FluentIconBase], title, content=None, parent=None
):
"""
Parameters
----------
icon: str | QIcon | FluentIconBase
the icon to be drawn
title: str
the title of card
content: str
the content of card
parent: QWidget
parent widget
"""
super().__init__(parent=parent)
self.iconLabel = SettingIconWidget(icon, self)
self.titleLabel = QLabel(title, self)
self.contentLabel = QLabel(content or "", self)
self.hBoxLayout = QHBoxLayout(self)
self.vBoxLayout = QVBoxLayout()
if not content:
self.contentLabel.hide()
self.setFixedHeight(70 if content else 50)
self.iconLabel.setFixedSize(16, 16)
# initialize layout
self.hBoxLayout.setSpacing(0)
self.hBoxLayout.setContentsMargins(16, 0, 0, 0)
self.hBoxLayout.setAlignment(Qt.AlignVCenter) # type: ignore
self.vBoxLayout.setSpacing(0)
self.vBoxLayout.setContentsMargins(0, 0, 0, 0)
self.vBoxLayout.setAlignment(Qt.AlignVCenter) # type: ignore
self.hBoxLayout.addWidget(self.iconLabel, 0, Qt.AlignLeft) # type: ignore
self.hBoxLayout.addSpacing(16)
self.hBoxLayout.addLayout(self.vBoxLayout)
self.vBoxLayout.addWidget(self.titleLabel, 0, Qt.AlignLeft) # type: ignore
self.vBoxLayout.addWidget(self.contentLabel, 0, Qt.AlignLeft) # type: ignore
self.hBoxLayout.addSpacing(16)
self.hBoxLayout.addStretch(1)
self.contentLabel.setObjectName("contentLabel")
FluentStyleSheet.SETTING_CARD.apply(self)
def setTitle(self, title: str):
"""set the title of card"""
self.titleLabel.setText(title)
def setContent(self, content: str):
"""set the content of card"""
self.contentLabel.setText(content)
self.contentLabel.setVisible(bool(content))
def setValue(self, value):
"""set the value of config item"""
pass
def setIconSize(self, width: int, height: int):
"""set the icon fixed size"""
self.iconLabel.setFixedSize(width, height)
def paintEvent(self, e):
painter = QPainter(self)
painter.setRenderHints(QPainter.Antialiasing)
if isDarkTheme():
painter.setBrush(QColor(255, 255, 255, 13))
painter.setPen(QColor(0, 0, 0, 50))
else:
painter.setBrush(QColor(255, 255, 255, 170))
painter.setPen(QColor(0, 0, 0, 19))
painter.drawRoundedRect(self.rect().adjusted(1, 1, -1, -1), 6, 6)
class DoubleSpinBoxSettingCard(SettingCard):
"""小数输入设置卡片"""
valueChanged = pyqtSignal(float)
def __init__(
self,
icon: Union[str, QIcon, FluentIconBase],
title: str,
content: Optional[str] = None,
minimum: float = 0.0,
maximum: float = 100.0,
decimals: int = 1,
parent=None,
):
super().__init__(icon, title, content, parent)
# 创建CompactDoubleSpinBox
self.spinBox = CompactDoubleSpinBox(self)
self.spinBox.setRange(minimum, maximum)
self.spinBox.setDecimals(decimals)
self.spinBox.setMinimumWidth(60)
self.spinBox.setSingleStep(0.2) # 设置步长为0.1
# 添加到布局
self.hBoxLayout.addWidget(self.spinBox, 0, Qt.AlignRight) # type: ignore
self.hBoxLayout.addSpacing(8)
# 设置初始值和连接信号
self.spinBox.valueChanged.connect(self.__onValueChanged)
def __onValueChanged(self, value: float):
"""数值改变时的槽函数"""
self.setValue(value)
self.valueChanged.emit(value)
def setValue(self, value: float):
"""设置数值"""
self.spinBox.setValue(value)
class SpinBoxSettingCard(SettingCard):
"""数值输入设置卡片"""
valueChanged = pyqtSignal(int)
def __init__(
self,
icon: Union[str, QIcon],
title: str,
content: Optional[str] = None,
minimum: int = 0,
maximum: int = 100,
step: int = 2,
parent=None,
):
super().__init__(icon, title, content, parent)
# 创建SpinBox
self.spinBox = CompactSpinBox(self)
self.spinBox.setRange(minimum, maximum)
self.spinBox.setMinimumWidth(60)
self.spinBox.setSingleStep(step)
# 添加到布局
self.hBoxLayout.addWidget(self.spinBox, 0, Qt.AlignRight) # type: ignore
self.hBoxLayout.addSpacing(8)
# 设置初始值和连接信号
self.spinBox.valueChanged.connect(self.__onValueChanged)
def __onValueChanged(self, value: int):
"""数值改变时的槽函数"""
self.setValue(value)
self.valueChanged.emit(value)
def setValue(self, value: int):
"""设置数值"""
self.spinBox.setValue(value)
class ComboBoxSettingCard(SettingCard):
"""下拉框设置卡片"""
currentTextChanged = pyqtSignal(str)
currentIndexChanged = pyqtSignal(int)
def __init__(
self,
icon: Union[str, QIcon],
title: str,
content: Optional[str] = None,
texts: Optional[List[str]] = None,
parent=None,
):
super().__init__(icon, title, content, parent)
# 创建ComboBox
self.comboBox = ComboBox(self)
self.hBoxLayout.addWidget(self.comboBox, 0, Qt.AlignRight) # type: ignore
self.hBoxLayout.addSpacing(16)
# 添加选项
if texts:
for text in texts:
self.comboBox.addItem(text)
# 连接信号
self.comboBox.currentTextChanged.connect(self.__onCurrentTextChanged)
self.comboBox.currentIndexChanged.connect(self.__onCurrentIndexChanged)
def __onCurrentTextChanged(self, text: str):
"""当前文本改变时的槽函数"""
self.currentTextChanged.emit(text)
def __onCurrentIndexChanged(self, index: int):
"""当前索引改变时的槽函数"""
self.currentIndexChanged.emit(index)
def setCurrentText(self, text: str):
"""设置当前文本"""
self.comboBox.setCurrentText(text)
def setCurrentIndex(self, index: int):
"""设置当前索引"""
self.comboBox.setCurrentIndex(index)
def addItem(self, text: str):
"""添加选项"""
self.comboBox.addItem(text)
def addItems(self, texts: List[str]):
"""添加多个选项"""
self.comboBox.addItems(texts)
def clear(self):
"""清空所有选项"""
self.comboBox.clear()
class ColorSettingCard(SettingCard):
"""带颜色选择器的设置卡片"""
colorChanged = pyqtSignal(QColor)
def __init__(
self,
color: QColor,
icon: Union[str, QIcon, FluentIconBase],
title: str,
content: Optional[str] = None,
parent=None,
enableAlpha=False,
):
"""
参数
----------
color: QColor
初始颜色
icon: str | QIcon | FluentIconBase
要绘制的图标
title: str
卡片标题
content: str
卡片内容
parent: QWidget
父组件
enableAlpha: bool
是否启用透明通道
"""
super().__init__(icon, title, content, parent)
self.colorPicker = ColorPickerButton(color, title, self, enableAlpha)
self.colorPicker.setFixedWidth(60)
self.hBoxLayout.addWidget(self.colorPicker, 0, Qt.AlignRight) # type: ignore
self.hBoxLayout.addSpacing(16)
self.colorPicker.colorChanged.connect(self.__onColorChanged)
def __onColorChanged(self, color: QColor):
"""颜色改变时的槽函数"""
self.colorChanged.emit(color)
def setColor(self, color: QColor):
"""设置颜色"""
self.colorPicker.setColor(color)
class ColorPickerButton(QToolButton):
"""Color picker button"""
colorChanged = pyqtSignal(QColor)
def __init__(self, color: QColor, title: str, parent=None, enableAlpha=False):
super().__init__(parent=parent)
self.title = title
self.enableAlpha = enableAlpha
self.setFixedSize(96, 32)
self.setAttribute(Qt.WA_TranslucentBackground) # type: ignore
self.setColor(color)
self.setCursor(Qt.PointingHandCursor) # type: ignore
self.clicked.connect(self.__showColorDialog)
def __showColorDialog(self):
"""show color dialog"""
w = ColorDialog(
self.color, self.tr("Choose ") + self.title, self.window(), self.enableAlpha
)
w.colorChanged.connect(self.__onColorChanged)
w.exec()
def __onColorChanged(self, color):
"""color changed slot"""
self.setColor(color)
self.colorChanged.emit(color)
def setColor(self, color):
"""set color"""
self.color = QColor(color)
self.update()
def paintEvent(self, e):
painter = QPainter(self)
painter.setRenderHints(QPainter.Antialiasing)
pc = QColor(255, 255, 255, 10) if isDarkTheme() else QColor(234, 234, 234)
painter.setPen(pc)
color = QColor(self.color)
if not self.enableAlpha:
color.setAlpha(255)
painter.setBrush(color)
painter.drawRoundedRect(self.rect().adjusted(1, 1, -1, -1), 5, 5)
================================================
FILE: app/components/MyVideoWidget.py
================================================
# coding:utf-8
import sys
from enum import Enum
from pathlib import Path
from typing import Optional
import vlc # type: ignore
from PyQt5.QtCore import QObject, Qt, QTimer, QUrl, pyqtSignal
from PyQt5.QtGui import QIcon
from PyQt5.QtWidgets import QApplication, QHBoxLayout, QVBoxLayout, QWidget
# from qfluentwidgets.multimedia.media_player import MediaPlayer, MediaPlayerBase
from qfluentwidgets.common.icon import FluentIcon
from qfluentwidgets.common.style_sheet import FluentStyleSheet
from qfluentwidgets.components.widgets.label import CaptionLabel
from qfluentwidgets.multimedia.media_play_bar import (
MediaPlayBarBase,
MediaPlayBarButton,
)
from app.common.signal_bus import signalBus
from app.config import RESOURCE_PATH
class MediaStatus(Enum):
NoMedia = 0
LoadingMedia = 1
LoadedMedia = 2
BufferingMedia = 3
BufferedMedia = 4
EndOfMedia = 5
InvalidMedia = 6
UnknownMediaStatus = 7
class PlaybackState(Enum):
StoppedState = 0
PlayingState = 1
PausedState = 2
class MediaPlayerBase(QObject):
"""Media player base class"""
mediaStatusChanged = pyqtSignal(MediaStatus)
playbackRateChanged = pyqtSignal(float)
positionChanged = pyqtSignal(int)
durationChanged = pyqtSignal(int)
sourceChanged = pyqtSignal(QUrl)
volumeChanged = pyqtSignal(int)
mutedChanged = pyqtSignal(bool)
def __init__(self, parent=None):
super().__init__(parent=parent)
def isPlaying(self):
"""Whether the media is playing"""
raise NotImplementedError
def mediaStatus(self) -> MediaStatus:
"""Return the status of the current media stream"""
raise NotImplementedError
def playbackState(self) -> PlaybackState:
"""Return the playback status of the current media stream"""
raise NotImplementedError
def duration(self):
"""Returns the duration of the current media in ms"""
raise NotImplementedError
def position(self):
"""Returns the current position inside the media being played back in ms"""
raise NotImplementedError
def volume(self):
"""Return the volume of player"""
raise NotImplementedError
def source(self) -> QUrl:
"""Return the active media source being used"""
raise NotImplementedError
def pause(self):
"""Pause playing the current source"""
raise NotImplementedError
def play(self):
"""Start or resume playing the current source"""
raise NotImplementedError
def stop(self):
"""Stop playing, and reset the play position to the beginning"""
raise NotImplementedError
def playbackRate(self) -> float:
"""Return the playback rate of the current media"""
raise NotImplementedError
def setPosition(self, position: int):
"""Sets the position of media in ms"""
raise NotImplementedError
def setSource(self, media: QUrl):
"""Sets the current source"""
raise NotImplementedError
def setPlaybackRate(self, rate: float):
"""Sets the playback rate of player"""
raise NotImplementedError
def setVolume(self, volume: int):
"""Sets the volume of player"""
raise NotImplementedError
def setMuted(self, isMuted: bool):
raise NotImplementedError
def videoOutput(self) -> QObject:
"""Return the video output to be used by the media player"""
raise NotImplementedError
def setVideoOutput(self, output: QObject) -> None:
"""Sets the video output to be used by the media player"""
raise NotImplementedError
class MediaPlayer(MediaPlayerBase):
def __init__(self, parent=None):
# 确保在主线程中初始化
if parent:
super().__init__(parent)
else:
super().__init__()
# 修改 VLC 参数以减少警告
vlc_args = [
"--no-xlib",
"--quiet",
]
# 在主线程中创建 VLC 实例
self.moveToThread(QApplication.instance().thread())
self.instance = vlc.Instance(vlc_args)
self._player = self.instance.media_player_new()
self._media = None
self._source = None
self._playback_rate = 1.0
# 创建定时器用于更新状态
self._update_timer = QTimer(self)
self._update_timer.setInterval(100) # 100ms更新一次
self._update_timer.timeout.connect(self._on_timer_update)
self._update_timer.start()
# 保存上一次的状态,用于检测变化
self._last_position = 0
self._last_duration = 0
self._last_volume = 100
def _on_timer_update(self):
"""定时更新状态并发送信号"""
if self._player:
# 更新位置
position = self._player.get_time()
if position != self._last_position:
self._last_position = position
self.positionChanged.emit(position)
# 更新时长
duration = self._player.get_length()
if duration != self._last_duration:
self._last_duration = duration
self.durationChanged.emit(duration)
# 更新音量
volume = self._player.audio_get_volume()
if volume != self._last_volume:
self._last_volume = volume
self.volumeChanged.emit(volume)
def isPlaying(self):
return bool(self._player and self._player.is_playing())
def mediaStatus(self) -> MediaStatus:
if not self._player:
return MediaStatus.NoMedia
state = self._player.get_state()
if state == vlc.State.NothingSpecial:
return MediaStatus.NoMedia
elif state == vlc.State.Opening:
return MediaStatus.LoadingMedia
elif state == vlc.State.Playing:
return MediaStatus.BufferedMedia
elif state == vlc.State.Paused:
return MediaStatus.BufferedMedia
elif state == vlc.State.Stopped:
return MediaStatus.LoadedMedia
elif state == vlc.State.Ended:
return MediaStatus.EndOfMedia
elif state == vlc.State.Error:
return MediaStatus.InvalidMedia
return MediaStatus.UnknownMediaStatus
def playbackState(self) -> PlaybackState:
if not self._player:
return PlaybackState.StoppedState
if self._player.is_playing():
return PlaybackState.PlayingState
elif self._player.get_state() == vlc.State.Paused:
return PlaybackState.PausedState
return PlaybackState.StoppedState
def duration(self):
return self._player.get_length() if self._player else 0
def position(self):
return self._player.get_time() if self._player else 0
def volume(self):
return self._player.audio_get_volume() if self._player else 0
def source(self) -> QUrl:
return self._source
def get_subtitle(self):
"""获取当前使用的字幕文件路径
Returns:
str: 当前字幕文件路径,如果没有字幕则返回 None
"""
if not self._player:
return None
try:
# 获取当前字幕轨道ID
current_spu = self._player.video_get_spu()
if current_spu <= 0: # 0 表示禁用字幕,-1 表示错误
return None
# 获取字幕轨道描述信息
spu_description = self._player.video_get_spu_description()
if not spu_description:
return None
# 遍历查找当前使用的字幕轨道
for spu in spu_description:
if spu[0] == current_spu:
# 返回字幕文件路径
return spu[1].decode("utf-8")
return None
except Exception:
return None
def pause(self):
self._player.pause()
def play(self):
self._player.play()
def stop(self):
self._player.stop()
def playbackRate(self) -> float:
return self._playback_rate
def setPosition(self, position: int):
if self._player:
self._player.set_time(position)
self.positionChanged.emit(position)
def setSource(self, media: QUrl):
"""设置媒体源时重置状态"""
path = media.toLocalFile() or media.toString()
self._media = self.instance.media_new(path)
self._player.set_media(self._media)
self._source = media
self.sourceChanged.emit(media)
self.mediaStatusChanged.emit(self.mediaStatus())
def setPlaybackRate(self, rate: float):
if self._player:
self._player.set_rate(rate)
self._playback_rate = rate
self.playbackRateChanged.emit(rate)
def setVolume(self, volume: int):
if self._player:
self._player.audio_set_volume(volume)
self.volumeChanged.emit(volume)
def setMuted(self, isMuted: bool):
if self._player:
self._player.audio_set_mute(isMuted)
self.mutedChanged.emit(isMuted)
def videoOutput(self) -> Optional[QObject]:
return None # VLC不需要这个
def setVideoOutput(self, output: QObject) -> None:
if isinstance(output, QWidget) and hasattr(output, "winId"): # type: ignore
self._player.set_hwnd(output.winId())
def hasMedia(self):
"""检查是否有媒体文件加载"""
return bool(self._media and self._player)
def playSegment(self, start_time: int, end_time: int):
"""播放指定时间段的视频片段
Args:
start_time: 开始时间(毫秒)
end_time: 结束时间(毫秒)
"""
if not self._player or not self.hasMedia():
return
# 确保时间范围有效
if start_time < 0 or end_time > self.duration() or start_time >= end_time:
return
# 创建事件管理器
event_manager = self._player.event_manager()
def on_time_changed(event):
# 当播放位置到达结束时间时停止播放
if self.position() >= end_time:
self.pause()
# 移除事件监听器
event_manager.event_detach(vlc.EventType.MediaPlayerTimeChanged)
# 注册时间变化事件
event_manager.event_attach(
vlc.EventType.MediaPlayerTimeChanged, on_time_changed
)
# 设置开始位置并播放
self.setPosition(start_time)
self.play()
def add_subtitle(self, subtitle_file: str) -> bool:
"""添加字幕文件
Args:
subtitle_file: 字幕文件的路径
Returns:
bool: 是否成功添加字幕
"""
if not self._player or not self.hasMedia():
return False
try:
# 将路径转换为 URI 格式
subtitle_uri = Path(subtitle_file).as_uri()
# 添加字幕轨道
result = self._player.add_slave(
vlc.MediaSlaveType.subtitle, subtitle_uri, True
)
# 获取字幕轨道信息 (unused but potentially useful for debugging)
# spu_description = self._player.video_get_spu_description()
return result == 0
except Exception:
return False
def get_subtitle_tracks(self) -> list:
"""获取所有可用的字幕轨道"""
if not self._player:
return []
tracks = []
spu_count = self._player.video_get_spu_count()
for i in range(spu_count):
track_info = self._player.video_get_spu_description()[i]
tracks.append(track_info)
return tracks
def set_subtitle_track(self, track_id: int):
"""设置当前使用的字幕轨道
Args:
track_id: 字幕轨道ID,-1 表示禁用字幕
"""
if self._player:
self._player.video_set_spu(track_id)
class StandardMediaPlayBar(MediaPlayBarBase):
"""Standard media play bar"""
def __init__(self, parent=None):
super().__init__(parent)
self.vBoxLayout = QVBoxLayout(self)
self.timeLayout = QHBoxLayout()
self.buttonLayout = QHBoxLayout()
self.leftButtonContainer = QWidget()
self.centerButtonContainer = QWidget()
self.rightButtonContainer = QWidget()
self.leftButtonLayout = QHBoxLayout(self.leftButtonContainer)
self.centerButtonLayout = QHBoxLayout(self.centerButtonContainer)
self.rightButtonLayout = QHBoxLayout(self.rightButtonContainer)
self.skipBackButton = MediaPlayBarButton(FluentIcon.SKIP_BACK, self)
self.skipForwardButton = MediaPlayBarButton(FluentIcon.SKIP_FORWARD, self)
self.currentTimeLabel = CaptionLabel("0:00:00", self)
self.remainTimeLabel = CaptionLabel("0:00:00", self)
self.__initWidgets()
def __initWidgets(self):
self.setFixedHeight(102)
self.vBoxLayout.setSpacing(6)
self.vBoxLayout.setContentsMargins(5, 9, 5, 9)
self.vBoxLayout.addWidget(self.progressSlider, 1, Qt.AlignTop) # type: ignore
self.vBoxLayout.addLayout(self.timeLayout)
self.timeLayout.setContentsMargins(10, 0, 10, 0)
self.timeLayout.addWidget(self.currentTimeLabel, 0, Qt.AlignLeft) # type: ignore
self.timeLayout.addWidget(self.remainTimeLabel, 0, Qt.AlignRight) # type: ignore
self.vBoxLayout.addStretch(1)
self.vBoxLayout.addLayout(self.buttonLayout, 1)
self.buttonLayout.setContentsMargins(0, 0, 0, 0)
self.leftButtonLayout.setContentsMargins(4, 0, 0, 0)
self.centerButtonLayout.setContentsMargins(0, 0, 0, 0)
self.rightButtonLayout.setContentsMargins(0, 0, 4, 0)
self.leftButtonLayout.addWidget(self.volumeButton, 0, Qt.AlignLeft) # type: ignore
self.centerButtonLayout.addWidget(self.skipBackButton)
self.centerButtonLayout.addWidget(self.playButton)
self.centerButtonLayout.addWidget(self.skipForwardButton)
self.buttonLayout.addWidget(self.leftButtonContainer, 0, Qt.AlignLeft) # type: ignore
self.buttonLayout.addWidget(self.centerButtonContainer, 0, Qt.AlignHCenter) # type: ignore
self.buttonLayout.addWidget(self.rightButtonContainer, 0, Qt.AlignRight) # type: ignore
self.skipBackButton.clicked.connect(lambda: self.skipBack(5000))
self.skipForwardButton.clicked.connect(lambda: self.skipForward(5000))
def skipBack(self, ms: int):
"""Back up for specified milliseconds"""
self.player.setPosition(self.player.position() - ms)
def skipForward(self, ms: int):
"""Fast forward specified milliseconds"""
self.player.setPosition(self.player.position() + ms)
def _onPositionChanged(self, position: int):
super()._onPositionChanged(position)
self.currentTimeLabel.setText(self._formatTime(position))
self.remainTimeLabel.setText(
self._formatTime(self.player.duration() - position)
)
def _formatTime(self, time: int):
time = int(time / 1000)
s = time % 60
m = int(time / 60)
h = int(time / 3600)
return f"{h}:{m:02}:{s:02}"
def closeEvent(self, event):
self.release()
super().closeEvent(event)
class MyVideoWidget(QWidget):
"""Video widget"""
def __init__(self, parent=None):
super().__init__(parent)
# 设置初始窗口大小
self.resize(800, 600)
self.setWindowTitle("VideoCaptioner")
self.setWindowIcon(QIcon(str(RESOURCE_PATH / "assets" / "logo.png")))
# 创建一个专门用于视频输出的 widget
self.videoWidget = QWidget(self)
self.videoWidget.setStyleSheet("background-color: rgb(24, 24, 24);")
# 添加提示标签
self.tipLabel = CaptionLabel("请拖入视频文件", self.videoWidget)
self.tipLabel.setStyleSheet(
"""
color: rgba(255, 255, 255, 0.5);
font-size: 20px;
font-weight: bold;
letter-spacing: 2px;
"""
)
# 创建布局使标签居中
tipLayout = QVBoxLayout(self.videoWidget)
tipLayout.addWidget(self.tipLabel, 0, Qt.AlignCenter) # type: ignore
# 创建播放控制栏
self.playBar = StandardMediaPlayBar(self)
self.playBar.setAttribute(Qt.WA_TranslucentBackground) # type: ignore
# 设置字幕文件
self.subtitle_file = None
# 创建垂直布局
self.vBoxLayout = QVBoxLayout(self)
self.vBoxLayout.setContentsMargins(0, 0, 0, 0)
self.vBoxLayout.setSpacing(0)
self.vBoxLayout.addWidget(self.videoWidget, 1)
self.vBoxLayout.addWidget(self.playBar, 0)
# 创建播放器并传入优化参数
self.vlc_player = MediaPlayer(self)
# 设置新的播放器
self.playBar.setMediaPlayer(self.vlc_player) # type: ignore
self.playBar.setVolume(80)
self.vlc_player.setVideoOutput(self.videoWidget)
FluentStyleSheet.MEDIA_PLAYER.apply(self)
# 设置焦点和事件过滤
self.setFocusPolicy(Qt.StrongFocus) # type: ignore
self.videoWidget.setFocusPolicy(Qt.StrongFocus) # type: ignore
# 安装事件过滤器
self.videoWidget.installEventFilter(self)
self.playBar.installEventFilter(self)
FluentStyleSheet.MEDIA_PLAYER.apply(self)
self.setAcceptDrops(True)
# 连接 SignalBus 信号
self._connectSignals()
def _connectSignals(self):
"""连接 SignalBus 的信号"""
# 视频控制信号
signalBus.video_play.connect(self.play)
signalBus.video_pause.connect(self.pause)
signalBus.video_stop.connect(self.stop)
signalBus.video_source_changed.connect(self.setVideo)
signalBus.video_segment_play.connect(self.playSegment)
signalBus.video_subtitle_added.connect(self.addSubtitle)
def addSubtitle(self, subtitle_file: str):
"""添加字幕文件的内部方法"""
self.subtitle_file = subtitle_file
self.vlc_player.add_subtitle(subtitle_file)
def setVideo(self, url: QUrl):
"""设置视频源
Args:
url: 视频文件的 QUrl
"""
self.setWindowTitle(url.fileName())
self.vlc_player.setSource(url)
if self.subtitle_file:
self.vlc_player.add_subtitle(self.subtitle_file)
# 隐藏提示标签
self.tipLabel.hide()
def play(self):
"""播放视频"""
self.playBar.play()
def pause(self):
"""暂停视频"""
self.playBar.pause()
def stop(self):
"""停止视频"""
self.playBar.stop()
def playSegment(self, start_time: int, end_time: int):
"""播放指定时间段的视频
Args:
start_time: 开始时间(毫秒)
end_time: 结束时间(毫秒)
"""
self.vlc_player.playSegment(start_time, end_time)
def hideEvent(self, e):
self.stop()
e.accept()
def wheelEvent(self, e):
return
def togglePlayState(self):
"""toggle play state"""
if self.vlc_player.isPlaying():
self.pause()
else:
self.play()
@property
def player(self):
return self.playBar.player
def keyPressEvent(self, event):
"""处理键盘事件"""
if event.key() == Qt.Key_Space: # type: ignore
self.playBar.togglePlayState()
elif event.key() == Qt.Key_Left: # type: ignore
self.playBar.skipBack(3000)
elif event.key() == Qt.Key_Right: # type: ignore
self.playBar.skipForward(3000)
else:
super().keyPressEvent(event)
def dragEnterEvent(self, event):
"""处理拖入事件"""
if event.mimeData().hasUrls():
urls = event.mimeData().urls()
# 检查是否为视频文件或字幕文件
if any(
url.toLocalFile()
.lower()
.endswith(
(".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".srt", ".ass")
)
for url in urls
):
event.acceptProposedAction()
def dropEvent(self, event):
"""处理放下事件"""
urls = event.mimeData().urls()
for url in urls:
file_path = url.toLocalFile().lower()
if file_path.endswith((".srt", ".ass")):
# 处理字幕文件
self.vlc_player.add_subtitle(url.toLocalFile())
elif file_path.endswith((".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv")):
# 处理视频文件
self.setVideo(url)
self.play()
break # 只处理第一个视频文件
def eventFilter(self, obj, event):
"""事件过滤器,用于捕获所有子部件的按键事件"""
if event.type() == event.KeyPress:
if event.key() in (Qt.Key_Left, Qt.Key_Right): # type: ignore
self.keyPressEvent(event)
return True
return super().eventFilter(obj, event)
def showEvent(self, event):
"""窗口显示时设置焦点"""
super().showEvent(event)
self.setFocus()
if __name__ == "__main__":
app = QApplication(sys.argv)
window = MyVideoWidget()
# 设置视频源 - 请替换为您的测试视频路径
# video_path = r"path/to/your/test/video.mp4"
# window.setVideo(QUrl.fromLocalFile(video_path))
# 确保窗口显示在屏幕中央
window.show()
window.activateWindow()
window.raise_()
# 开始播放视频
# window.play()
sys.exit(app.exec_())
================================================
FILE: app/components/SimpleSettingCard.py
================================================
from PyQt5.QtCore import pyqtSignal
from PyQt5.QtWidgets import QHBoxLayout
from qfluentwidgets import (
CaptionLabel,
CardWidget,
ComboBox,
SwitchButton,
ToolTipFilter,
ToolTipPosition,
)
class SimpleSettingCard(CardWidget):
"""基础设置卡片类"""
def __init__(self, title, content, parent=None):
super().__init__(parent)
self.title = title
self.content = content
self.setup_ui()
def setup_ui(self):
self.hBoxLayout = QHBoxLayout(self)
self.hBoxLayout.setContentsMargins(16, 10, 8, 10)
self.hBoxLayout.setSpacing(8)
self.label = CaptionLabel(self)
self.label.setText(self.title)
self.hBoxLayout.addWidget(self.label)
self.hBoxLayout.addStretch(1)
self.setToolTip(self.content)
self.installEventFilter(ToolTipFilter(self, 100, ToolTipPosition.BOTTOM))
class ComboBoxSimpleSettingCard(SimpleSettingCard):
"""下拉框设置卡片"""
valueChanged = pyqtSignal(str)
def __init__(self, title, content, items=None, parent=None):
super().__init__(title, content, parent)
self.items = items or []
self.setup_combobox()
def setup_combobox(self):
self.comboBox = ComboBox(self)
self.comboBox.addItems(self.items)
self.comboBox.setMaxVisibleItems(6)
self.comboBox.currentTextChanged.connect(self.valueChanged) # type: ignore
self.hBoxLayout.addWidget(self.comboBox)
def setValue(self, value):
self.comboBox.setCurrentIndex(self.items.index(value))
def value(self):
return self.comboBox.currentText()
class SwitchButtonSimpleSettingCard(SimpleSettingCard):
"""开关设置卡片"""
checkedChanged = pyqtSignal(bool)
def __init__(self, title, content, parent=None):
super().__init__(title, content, parent)
self.setup_switch()
def setup_switch(self):
self.switchButton = SwitchButton(self)
self.switchButton.setOnText("开")
self.switchButton.setOffText("关")
self.switchButton.checkedChanged.connect(self.checkedChanged) # type: ignore
self.hBoxLayout.addWidget(self.switchButton)
self.clicked.connect( # type: ignore
lambda: self.switchButton.setChecked(not self.switchButton.isChecked())
)
def setChecked(self, checked):
self.switchButton.setChecked(checked)
def isChecked(self):
return self.switchButton.isChecked()
================================================
FILE: app/components/SpinBoxSettingCard.py
================================================
from typing import Optional, Union
from PyQt5.QtCore import Qt, pyqtSignal
from PyQt5.QtGui import QIcon
from qfluentwidgets import CompactDoubleSpinBox, CompactSpinBox, SettingCard
from qfluentwidgets.common.config import ConfigItem, qconfig
class DoubleSpinBoxSettingCard(SettingCard):
"""小数输入设置卡片"""
valueChanged = pyqtSignal(float)
def __init__(
self,
configItem: ConfigItem,
icon: Union[str, QIcon],
title: str,
content: Optional[str] = None,
minimum: float = 0.0,
maximum: float = 100.0,
decimals: int = 1,
step: float = 0.1,
parent=None,
):
super().__init__(icon, title, content, parent)
self.configItem = configItem
# 创建CompactDoubleSpinBox
self.spinBox = CompactDoubleSpinBox(self)
self.spinBox.setRange(minimum, maximum)
self.spinBox.setDecimals(decimals)
self.spinBox.setMinimumWidth(60)
self.spinBox.setSingleStep(step) # 设置步长为0.2
# 添加到布局
self.hBoxLayout.addWidget(self.spinBox, 0, Qt.AlignRight) # type: ignore
self.hBoxLayout.addSpacing(8)
# 设置初始值和连接信号
self.setValue(qconfig.get(configItem))
self.spinBox.valueChanged.connect(self.__onValueChanged)
configItem.valueChanged.connect(self.setValue)
def __onValueChanged(self, value: float):
"""数值改变时的槽函数"""
self.setValue(value)
self.valueChanged.emit(value)
def setValue(self, value: float):
"""设置数值"""
qconfig.set(self.configItem, value)
self.spinBox.setValue(value)
class SpinBoxSettingCard(SettingCard):
"""数值输入设置卡片"""
valueChanged = pyqtSignal(int)
def __init__(
self,
configItem: ConfigItem,
icon: Union[str, QIcon],
title: str,
content: Optional[str] = None,
minimum: int = 0,
maximum: int = 100,
parent=None,
):
super().__init__(icon, title, content, parent)
self.configItem = configItem
# 创建SpinBox
self.spinBox = CompactSpinBox(self)
self.spinBox.setRange(minimum, maximum)
self.spinBox.setMinimumWidth(60)
# 添加到布局
self.hBoxLayout.addWidget(self.spinBox, 0, Qt.AlignRight) # type: ignore
self.hBoxLayout.addSpacing(8)
# 设置初始值和连接信号
self.setValue(qconfig.get(configItem))
self.spinBox.valueChanged.connect(self.__onValueChanged)
configItem.valueChanged.connect(self.setValue)
def __onValueChanged(self, value: int):
"""数值改变时的槽函数"""
self.setValue(value)
self.valueChanged.emit(value)
def setValue(self, value: int):
"""设置数值"""
qconfig.set(self.configItem, value)
self.spinBox.setValue(value)
================================================
FILE: app/components/SubtitleSettingDialog.py
================================================
from qfluentwidgets import (
BodyLabel,
MessageBoxBase,
SwitchSettingCard,
)
from qfluentwidgets import FluentIcon as FIF
from app.common.config import cfg
from app.components.SpinBoxSettingCard import SpinBoxSettingCard
class SubtitleSettingDialog(MessageBoxBase):
"""字幕设置对话框"""
def __init__(self, parent=None):
super().__init__(parent)
self.titleLabel = BodyLabel(self.tr("字幕设置"), self)
# 创建设置卡片
self.split_card = SwitchSettingCard(
FIF.ALIGNMENT,
self.tr("字幕分割"),
self.tr("字幕是否使用大语言模型进行智能断句"),
cfg.need_split,
self,
)
self.word_count_cjk_card = SpinBoxSettingCard(
cfg.max_word_count_cjk,
FIF.TILES, # type: ignore
self.tr("中文最大字数"),
self.tr("单条字幕的最大字数 (对于中日韩等字符)"),
minimum=8,
maximum=50,
parent=self,
)
self.word_count_english_card = SpinBoxSettingCard(
cfg.max_word_count_english,
FIF.TILES, # type: ignore
self.tr("英文最大单词数"),
self.tr("单条字幕的最大单词数 (英文)"),
minimum=8,
maximum=50,
parent=self,
)
# 添加到布局
self.viewLayout.addWidget(self.titleLabel)
self.viewLayout.addWidget(self.split_card)
self.viewLayout.addWidget(self.word_count_cjk_card)
self.viewLayout.addWidget(self.word_count_english_card)
# 设置间距
self.viewLayout.setSpacing(10)
# 设置窗口标题和宽度
self.setWindowTitle(self.tr("字幕设置"))
self.widget.setMinimumWidth(380)
# 只显示取消按钮
self.yesButton.hide()
self.cancelButton.setText(self.tr("关闭"))
================================================
FILE: app/components/TranscriptionOutputDialog.py
================================================
# -*- coding: utf-8 -*-
from qfluentwidgets import (
BodyLabel,
ComboBoxSettingCard,
MessageBoxBase,
)
from qfluentwidgets import FluentIcon as FIF
from app.common.config import cfg
from app.core.entities import TranscribeOutputFormatEnum
class TranscriptionSettingDialog(MessageBoxBase):
"""转录设置对话框"""
def __init__(self, parent=None):
super().__init__(parent)
self.titleLabel = BodyLabel(self.tr("转录设置"), self)
# 创建输出格式选择卡片
self.output_format_card = ComboBoxSettingCard(
cfg.transcribe_output_format,
FIF.SAVE,
self.tr("输出格式"),
self.tr("选择转录字幕的输出格式"),
texts=[fmt.value for fmt in TranscribeOutputFormatEnum],
parent=self,
)
self.output_format_card.setMinimumWidth(420)
# 添加到布局
self.viewLayout.addWidget(self.titleLabel)
self.viewLayout.addWidget(self.output_format_card)
# 设置间距
self.viewLayout.setSpacing(10)
# 设置窗口标题
self.setWindowTitle(self.tr("转录设置"))
# 只显示取消按钮
self.yesButton.hide()
self.cancelButton.setText(self.tr("关闭"))
================================================
FILE: app/components/TranscriptionSettingDialog.py
================================================
# -*- coding: utf-8 -*-
from qfluentwidgets import (
BodyLabel,
ComboBoxSettingCard,
MessageBoxBase,
)
from qfluentwidgets import FluentIcon as FIF
from app.common.config import cfg
from app.core.entities import TranscribeOutputFormatEnum
class TranscriptionSettingDialog(MessageBoxBase):
"""转录设置对话框"""
def __init__(self, parent=None):
super().__init__(parent)
self.titleLabel = BodyLabel(self.tr("转录设置"), self)
# 创建输出格式选择卡片
self.output_format_card = ComboBoxSettingCard(
cfg.transcribe_output_format,
FIF.SAVE,
self.tr("输出格式"),
self.tr("选择转录字幕的输出格式"),
texts=[fmt.value for fmt in TranscribeOutputFormatEnum],
parent=self,
)
# 添加到布局
self.viewLayout.addWidget(self.titleLabel)
self.viewLayout.addWidget(self.output_format_card)
# 设置间距
self.viewLayout.setSpacing(10)
# 设置窗口标题和宽度
self.setWindowTitle(self.tr("转录设置"))
self.widget.setMinimumWidth(380)
# 只显示取消按钮
self.yesButton.hide()
self.cancelButton.setText(self.tr("关闭"))
================================================
FILE: app/components/WhisperAPISettingWidget.py
================================================
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from PyQt5.QtWidgets import (
QVBoxLayout,
QWidget,
)
from qfluentwidgets import (
ComboBoxSettingCard,
InfoBar,
InfoBarPosition,
PushSettingCard,
SettingCardGroup,
SingleDirectionScrollArea,
)
from qfluentwidgets import FluentIcon as FIF
from ..common.config import cfg
from ..core.constant import INFOBAR_DURATION_ERROR, INFOBAR_DURATION_SUCCESS
from ..core.entities import TranscribeLanguageEnum
from ..core.llm import check_whisper_connection
from .EditComboBoxSettingCard import EditComboBoxSettingCard
from .LineEditSettingCard import LineEditSettingCard
class WhisperAPISettingWidget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.setup_ui()
def setup_ui(self):
self.main_layout = QVBoxLayout(self)
# 创建单向滚动区域和容器
self.scrollArea = SingleDirectionScrollArea(orient=Qt.Vertical, parent=self) # type: ignore
self.scrollArea.setStyleSheet(
"QScrollArea{background: transparent; border: none}"
)
self.container = QWidget(self)
self.container.setStyleSheet("QWidget{background: transparent}")
self.containerLayout = QVBoxLayout(self.container)
self.setting_group = SettingCardGroup(self.tr("Whisper API 设置"), self)
# API Base URL
self.base_url_card = LineEditSettingCard(
cfg.whisper_api_base,
FIF.LINK,
self.tr("API Base URL"),
self.tr("输入 Whisper API Base URL"),
"https://api.openai.com/v1",
self.setting_group,
)
# API Key
self.api_key_card = LineEditSettingCard(
cfg.whisper_api_key,
FIF.FINGERPRINT,
self.tr("API Key"),
self.tr("输入 Whisper API Key"),
"sk-",
self.setting_group,
)
# Model
self.model_card = EditComboBoxSettingCard(
cfg.whisper_api_model,
FIF.ROBOT, # type: ignore
self.tr("Whisper 模型"),
self.tr("选择 Whisper 模型"),
["whisper-large-v3", "whisper-large-v3-turbo", "whisper-1"],
self.setting_group,
)
# 添加 Language 选择
self.language_card = ComboBoxSettingCard(
cfg.transcribe_language,
FIF.LANGUAGE,
self.tr("源语言"),
self.tr("音视频中说话的语言,默认根据前30秒自动识别"),
[lang.value for lang in TranscribeLanguageEnum],
self.setting_group,
)
# 添加 Prompt
self.prompt_card = LineEditSettingCard(
cfg.whisper_api_prompt,
FIF.CHAT,
self.tr("提示词"),
self.tr("可选的提示词,默认空"),
"",
self.setting_group,
)
# 添加测试连接按钮
self.check_connection_card = PushSettingCard(
self.tr("测试连接"),
FIF.CONNECT,
self.tr("测试 Whisper API 连接"),
self.tr("点击测试 API 连接是否正常"),
self.setting_group,
)
# 设置最小宽度
self.base_url_card.lineEdit.setMinimumWidth(200)
self.api_key_card.lineEdit.setMinimumWidth(200)
self.model_card.comboBox.setMinimumWidth(200)
self.language_card.comboBox.setMinimumWidth(200)
self.prompt_card.lineEdit.setMinimumWidth(200)
# 使用 addSettingCard 添加所有卡片到组
self.setting_group.addSettingCard(self.base_url_card)
self.setting_group.addSettingCard(self.api_key_card)
self.setting_group.addSettingCard(self.model_card)
self.setting_group.addSettingCard(self.language_card)
self.setting_group.addSettingCard(self.prompt_card)
self.setting_group.addSettingCard(self.check_connection_card)
# 连接测试按钮信号
self.check_connection_card.clicked.connect(self.on_check_connection)
# 将设置组添加到容器布局
self.containerLayout.addWidget(self.setting_group)
self.containerLayout.addStretch(1)
# 设置滚动区域
self.scrollArea.setWidget(self.container)
self.scrollArea.setWidgetResizable(True)
# 将滚动区域添加到主布局
self.main_layout.addWidget(self.scrollArea)
def on_check_connection(self):
"""测试 Whisper API 连接"""
# 获取配置
base_url = self.base_url_card.lineEdit.text().strip()
api_key = self.api_key_card.lineEdit.text().strip()
model = self.model_card.comboBox.currentText().strip()
# 验证必填字段
if not base_url or not api_key or not model:
InfoBar.warning(
self.tr("配置不完整"),
self.tr("请输入 API Base URL、API Key 和 model"),
duration=INFOBAR_DURATION_ERROR,
position=InfoBarPosition.TOP,
parent=self.window(),
)
return
# 禁用按钮,显示加载状态
self.check_connection_card.button.setEnabled(False)
self.check_connection_card.button.setText(self.tr("正在测试..."))
# 创建并启动测试线程
self.connection_thread = WhisperConnectionThread(base_url, api_key, model)
self.connection_thread.finished.connect(self.on_connection_check_finished)
self.connection_thread.error.connect(self.on_connection_check_error)
self.connection_thread.start()
def on_connection_check_finished(self, success, result):
"""处理连接检查完成事件"""
# 恢复按钮状态
self.check_connection_card.button.setEnabled(True)
self.check_connection_card.button.setText(self.tr("测试连接"))
if success:
InfoBar.success(
self.tr("连接成功"),
self.tr("Whisper API 连接成功!") + "\n" + result,
duration=INFOBAR_DURATION_SUCCESS,
position=InfoBarPosition.BOTTOM,
parent=self.window(),
)
else:
InfoBar.error(
self.tr("连接失败"),
self.tr(f"Whisper API 连接失败!\n{result}"),
duration=INFOBAR_DURATION_ERROR,
position=InfoBarPosition.BOTTOM,
parent=self.window(),
)
def on_connection_check_error(self, message):
"""处理连接检查错误事件"""
# 恢复按钮状态
self.check_connection_card.button.setEnabled(True)
self.check_connection_card.button.setText(self.tr("测试连接"))
InfoBar.error(
self.tr("测试错误"),
message,
duration=INFOBAR_DURATION_ERROR,
position=InfoBarPosition.BOTTOM,
parent=self.window(),
)
class WhisperConnectionThread(QThread):
"""Whisper API 连接测试线程"""
finished = pyqtSignal(bool, str)
error = pyqtSignal(str)
def __init__(self, base_url, api_key, model):
super().__init__()
self.base_url = base_url
self.api_key = api_key
self.model = model
def run(self):
"""执行连接测试"""
try:
success, result = check_whisper_connection(
self.base_url, self.api_key, self.model
)
self.finished.emit(success, result)
except Exception as e:
self.error.emit(str(e))
================================================
FILE: app/components/WhisperCppSettingWidget.py
================================================
import os
from PyQt5.QtCore import Qt
from PyQt5.QtWidgets import (
QHBoxLayout,
QHeaderView,
QTableWidgetItem,
QVBoxLayout,
QWidget,
)
from qfluentwidgets import (
BodyLabel,
ComboBox,
ComboBoxSettingCard,
HyperlinkButton,
HyperlinkCard,
InfoBar,
MessageBoxBase,
ProgressBar,
PushButton,
SettingCardGroup,
SingleDirectionScrollArea,
SubtitleLabel,
TableItemDelegate,
TableWidget,
)
from qfluentwidgets import FluentIcon as FIF
from app.common.config import cfg
from app.config import MODEL_PATH
from app.core.entities import (
TranscribeLanguageEnum,
WhisperModelEnum,
)
from app.core.utils.logger import setup_logger
from app.core.utils.platform_utils import open_folder
from app.thread.file_download_thread import FileDownloadThread
logger = setup_logger("whisper_download")
# 使用阿里云镜像定义模型配置
# https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-tiny.bin
# "mirrorLink": "https://hf-mirror.com/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin?download=true"
# 使用阿里云镜像定义模型配置
WHISPER_CPP_MODELS = [
{
"label": "Tiny",
"value": "ggml-tiny.bin",
"size": "77.7 MB",
"downloadLink": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin",
"mirrorLink": "https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-tiny.bin",
"sha": "bd577a113a864445d4c299885e0cb97d4ba92b5f",
},
{
"label": "Base",
"value": "ggml-base.bin",
"size": "148 MB",
"downloadLink": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin",
"mirrorLink": "https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-base.bin",
"sha": "465707469ff3a37a2b9b8d8f89f2f99de7299dac",
},
{
"label": "Small",
"value": "ggml-small.bin",
"size": "488 MB",
"downloadLink": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
"mirrorLink": "https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-small.bin",
"sha": "55356645c2b361a969dfd0ef2c5a50d530afd8d5",
},
{
"label": "Medium",
"value": "ggml-medium.bin",
"size": "1.53 GB",
"downloadLink": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin",
"mirrorLink": "https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-medium.bin",
"sha": "fd9727b6e1217c2f614f9b698455c4ffd82463b4",
},
{
"label": "large-v1",
"value": "ggml-large-v1.bin",
"size": "3.09 GB",
"downloadLink": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v1.bin",
"mirrorLink": "https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-large-v1.bin",
"sha": "b1caaf735c4cc1429223d5a74f0f4d0b9b59a299",
},
{
"label": "large-v2",
"value": "ggml-large-v2.bin",
"size": "3.09 GB",
"downloadLink": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2.bin",
"mirrorLink": "https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-large-v2.bin",
"sha": "0f4c8e34f21cf1a914c59d8b3ce882345ad349d6",
},
# {
# "label": "Large(v3)",
# "value": "ggml-large-v3.bin",
# "size": "3.09 GB",
# "downloadLink": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
# "mirrorLink": "https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-large-v3.bin",
# "sha": "ad82bf6a9043ceed055076d0fd39f5f186ff8062"
# },
# {
# "label": "Distil Large(v3)",
# "value": "ggml-distil-large-v3.bin",
# "size": "1.52 GB",
# "downloadLink": "https://huggingface.co/distil-whisper/distil-large-v3-ggml/resolve/main/ggml-distil-large-v3.bin?download=true",
# "mirrorLink": "https://www.modelscope.cn/models/cjc1887415157/whisper.cpp/resolve/master/ggml-distil-large-v3.bin",
# "sha": "5e61e98bdcf3b9a78516c59bf7d1a10d64cae67a"
# }
]
def check_whisper_cpp_exists():
"""检查WhisperCpp程序是否存在"""
return True, []
class DownloadDialog(MessageBoxBase):
def __init__(self, parent=None):
super().__init__(parent)
self.setup_ui()
self.setWindowTitle(self.tr("下载模型"))
self.download_thread = None
def setup_ui(self):
self.titleLabel = BodyLabel(self.tr("下载模型"), self)
# 添加模型选择下拉框
self.model_combo = ComboBox(self)
self.model_combo.setFixedWidth(300)
for model in WHISPER_CPP_MODELS:
# 检查模型是否已下载
model_path = os.path.join(MODEL_PATH, model["value"])
downloaded = "✓ " if os.path.exists(model_path) else " "
self.model_combo.addItem(f"{downloaded}{model['label']} ({model['size']})")
# 进度条
self.progress_bar = ProgressBar()
self.progress_bar.hide()
# 进度标签
self.progress_label = BodyLabel()
self.progress_label.hide()
# 下载按钮
self.download_button = PushButton(self.tr("下载"), self)
self.download_button.clicked.connect(self.start_download)
# 添加到布局
self.viewLayout.addWidget(self.titleLabel)
self.viewLayout.addWidget(self.model_combo)
self.viewLayout.addWidget(self.progress_bar)
self.viewLayout.addWidget(self.progress_label)
self.viewLayout.addWidget(self.download_button)
# 设置间距
self.viewLayout.setSpacing(10)
# 只显示取消按钮
self.yesButton.hide()
self.cancelButton.setText(self.tr("关闭"))
def start_download(self):
selected_index = self.model_combo.currentIndex()
model = WHISPER_CPP_MODELS[selected_index]
save_path = os.path.join(MODEL_PATH, model["value"])
# 检查模型文件是否已存在
if os.path.exists(save_path):
InfoBar.warning(
title=self.tr("提示"),
content=self.tr("模型文件已存在,无需重复下载"),
parent=self.window(),
duration=3000,
)
return
self.progress_bar.show()
self.progress_label.show()
self.download_button.setEnabled(False)
self.download_thread = FileDownloadThread(model["mirrorLink"], save_path)
self.download_thread.progress.connect(self.update_progress)
self.download_thread.finished.connect(self.download_finished)
self.download_thread.error.connect(self.download_error)
self.download_thread.start()
def update_progress(self, value, status_msg):
self.progress_bar.setValue(int(value))
self.progress_label.setText(status_msg)
def download_finished(self):
InfoBar.success(
title=self.tr("完成"),
content=self.tr("模型下载完成!"),
parent=self.window(),
duration=3000,
)
self.download_button.setEnabled(True)
self.progress_label.setText(self.tr("下载完成"))
def download_error(self, error):
InfoBar.error(
title=self.tr("下载错误"),
content=error,
parent=self.window(),
duration=5000,
)
self.download_button.setEnabled(True)
self.progress_label.hide()
def reject(self):
if self.download_thread and self.download_thread.isRunning():
logger.info("关闭下载对话框,终止下载")
self.download_thread.stop()
super().reject()
class WhisperCppDownloadDialog(MessageBoxBase):
"""WhisperCpp 下载对话框"""
# 添加类变量跟踪下载状态
is_downloading = False
def __init__(self, parent=None, setting_widget=None):
super().__init__(parent)
self.widget.setMinimumWidth(600)
self.program_download_thread = None
self.model_download_thread = None
self._setup_ui()
self.setting_widget = setting_widget
def _setup_ui(self):
"""设置UI"""
layout = QVBoxLayout()
self._setup_program_section(layout)
layout.addSpacing(20)
self._setup_model_section(layout)
self._setup_progress_section(layout)
self.viewLayout.addLayout(layout)
self.cancelButton.setText(self.tr("关闭"))
self.yesButton.hide()
def _setup_program_section(self, layout):
"""设置程序下载部分UI"""
# 标题
whisper_cpp_title = SubtitleLabel(self.tr("WhisperCpp程序"), self)
layout.addWidget(whisper_cpp_title)
layout.addSpacing(8)
# 检查已安装的版本
has_program, installed_versions = check_whisper_cpp_exists()
if has_program:
# 显示已安装版本
versions_text = " + ".join(installed_versions)
program_status = BodyLabel(self.tr(f"已安装版本: {versions_text}"), self)
program_status.setStyleSheet("color: green")
layout.addWidget(program_status)
else:
desc_label = BodyLabel(self.tr("未下载 WhisperCpp 程序"), self)
layout.addWidget(desc_label)
def _setup_model_section(self, layout):
"""设置模型下载部分UI"""
# 标题和按钮的水平布局
title_layout = QHBoxLayout()
# 标题
model_title = SubtitleLabel(self.tr("模型下载"), self)
title_layout.addWidget(model_title)
# 添加打开文件夹按钮
open_folder_btn = HyperlinkButton("", self.tr("打开模型文件夹"), parent=self)
open_folder_btn.setIcon(FIF.FOLDER)
open_folder_btn.clicked.connect(self._open_model_folder)
title_layout.addStretch()
title_layout.addWidget(open_folder_btn)
layout.addLayout(title_layout)
layout.addSpacing(8)
# 模型表格
self.model_table = self._create_model_table()
self._populate_model_table()
layout.addWidget(self.model_table)
def _create_model_table(self):
"""创建模型表格"""
table = TableWidget(self)
table.setEditTriggers(TableWidget.NoEditTriggers)
table.setSelectionMode(TableWidget.NoSelection)
table.setColumnCount(4)
table.setHorizontalHeaderLabels(
[self.tr("模型名称"), self.tr("大小"), self.tr("状态"), self.tr("操作")]
)
# 设置表格样式
table.setBorderVisible(True)
table.setBorderRadius(8)
table.setItemDelegate(TableItemDelegate(table))
# 设置列宽
header = table.horizontalHeader()
header.setSectionResizeMode(0, QHeaderView.Stretch)
header.setSectionResizeMode(1, QHeaderView.Fixed)
header.setSectionResizeMode(2, QHeaderView.Fixed)
header.setSectionResizeMode(3, QHeaderView.Fixed)
table.setColumnWidth(1, 100)
table.setColumnWidth(2, 80)
table.setColumnWidth(3, 150)
# 设置行高
row_height = 45
table.verticalHeader().setDefaultSectionSize(row_height)
# 设置表格高度
header_height = 20
max_visible_rows = 6
table_height = row_height * max_visible_rows + header_height + 15
table.setFixedHeight(table_height)
return table
def _setup_progress_section(self, layout):
"""设置进度显示部分UI"""
self.progress_bar = ProgressBar(self)
self.progress_label = BodyLabel("", self)
self.progress_bar.hide()
self.progress_label.hide()
layout.addWidget(self.progress_bar)
layout.addWidget(self.progress_label)
def _populate_model_table(self):
"""填充模型表格数据"""
self.model_table.setRowCount(len(WHISPER_CPP_MODELS))
for i, model in enumerate(WHISPER_CPP_MODELS):
self._add_model_row(i, model)
def _add_model_row(self, row, model):
"""添加模型表格行"""
# 模型名称
name_item = QTableWidgetItem(model["label"])
name_item.setTextAlignment(Qt.AlignCenter) # type: ignore
self.model_table.setItem(row, 0, name_item)
# 大小
size_item = QTableWidgetItem(f"{model['size']}")
size_item.setTextAlignment(Qt.AlignCenter) # type: ignore
self.model_table.setItem(row, 1, size_item)
# 状态
model_bin_path = os.path.join(MODEL_PATH, model["value"])
status_item = QTableWidgetItem(
self.tr("已下载") if os.path.exists(model_bin_path) else self.tr("未下载")
)
if os.path.exists(model_bin_path):
status_item.setForeground(Qt.green) # type: ignore
status_item.setTextAlignment(Qt.AlignCenter) # type: ignore
self.model_table.setItem(row, 2, status_item)
# 下载按钮
button_container = QWidget()
button_layout = QHBoxLayout(button_container)
button_layout.setContentsMargins(4, 4, 4, 4)
download_btn = HyperlinkButton(
"",
self.tr("重新下载") if os.path.exists(model_bin_path) else self.tr("下载"),
parent=self,
)
download_btn.setIcon(FIF.DOWNLOAD)
download_btn.clicked.connect(lambda checked, r=row: self._download_model(r))
button_layout.addStretch()
button_layout.addWidget(download_btn)
button_layout.addStretch()
self.model_table.setCellWidget(row, 3, button_container)
def _download_model(self, row):
"""下载选中的模型"""
if WhisperCppDownloadDialog.is_downloading:
InfoBar.warning(
self.tr("下载进行中"),
self.tr("请等待当前下载任务完成"),
duration=3000,
parent=self,
)
return
WhisperCppDownloadDialog.is_downloading = True
self._set_all_download_buttons_enabled(False)
model = WHISPER_CPP_MODELS[row]
self.progress_bar.show()
self.progress_label.show()
self.progress_label.setText(self.tr(f"正在下载 {model['label']} 模型..."))
# 禁用当前行的下载按钮
button_container = self.model_table.cellWidget(row, 3)
download_btn = button_container.findChild(HyperlinkButton)
if download_btn:
download_btn.setEnabled(False)
def _on_model_download_progress(value, msg):
self.progress_bar.setValue(int(value))
self.progress_label.setText(msg)
def _on_model_download_finished():
WhisperCppDownloadDialog.is_downloading = False
self._set_all_download_buttons_enabled(True)
# 更新状态
status_item = QTableWidgetItem(self.tr("已下载"))
status_item.setForeground(Qt.green) # type: ignore
status_item.setTextAlignment(Qt.AlignCenter) # type: ignore
self.model_table.setItem(row, 2, status_item)
# 更新下载按钮文本
if download_btn:
download_btn.setText(self.tr("重新下载"))
download_btn.setEnabled(True)
# 获取当前下载的模型信息
model = WHISPER_CPP_MODELS[row]
# 更新主设置对话框的模型选择
if self.setting_widget:
try:
# 保存当前值并清空
current_value = cfg.whisper_model.value
combo = self.setting_widget.model_card.comboBox
combo.clear()
# 找出已下载的模型
available = []
model_map = {
m["label"].lower(): m["value"] for m in WHISPER_CPP_MODELS
}
for enum_val in WhisperModelEnum:
if enum_val.value in model_map:
if (MODEL_PATH / model_map[enum_val.value]).exists():
available.append(enum_val)
# 重建下拉框
self.setting_widget.model_card.optionToText = {
e: e.value for e in available
}
for enum_val in available:
combo.addItem(enum_val.value, userData=enum_val)
# 恢复选择
if current_value in available:
combo.setCurrentText(current_value.value)
elif combo.count() > 0:
combo.setCurrentIndex(0)
except Exception as e:
logger.error(f"更新模型选择失败: {e}")
InfoBar.success(
self.tr("下载成功"),
self.tr(f"{model['label']} 模型已下载完成"),
duration=3000,
parent=self,
)
self.progress_bar.hide()
self.progress_label.hide()
def _on_model_download_error(error):
WhisperCppDownloadDialog.is_downloading = False
self._set_all_download_buttons_enabled(True)
if download_btn:
download_btn.setEnabled(True)
InfoBar.error(self.tr("下载失败"), str(error), duration=3000, parent=self)
self.progress_bar.hide()
self.progress_label.hide()
self.model_download_thread = FileDownloadThread(
model["mirrorLink"], os.path.join(MODEL_PATH, model["value"])
)
self.model_download_thread.progress.connect(_on_model_download_progress)
self.model_download_thread.finished.connect(_on_model_download_finished)
self.model_download_thread.error.connect(_on_model_download_error)
self.model_download_thread.start()
def _set_all_download_buttons_enabled(self, enabled: bool):
"""设置所有下载按钮的启用状态"""
# 设置程序下载按钮
if hasattr(self, "program_download_btn"):
self.program_download_btn.setEnabled(enabled)
self.program_combo.setEnabled(enabled)
# 设置所有模型下载按钮
for row in range(self.model_table.rowCount()):
button_container = self.model_table.cellWidget(row, 3)
if button_container:
download_btn = button_container.findChild(HyperlinkButton)
if download_btn:
download_btn.setEnabled(enabled)
def _open_model_folder(self):
"""打开模型文件夹"""
if os.path.exists(MODEL_PATH):
# 根据操作系统打开文件夹
open_folder(str(MODEL_PATH))
class WhisperCppSettingWidget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.setup_ui()
self.setup_signals()
def setup_ui(self):
self.main_layout = QVBoxLayout(self)
# 创建单向滚动区域和容器
self.scrollArea = SingleDirectionScrollArea(orient=Qt.Vertical, parent=self) # type: ignore
self.scrollArea.setStyleSheet(
"QScrollArea{background: transparent; border: none}"
)
self.container = QWidget(self)
self.container.setStyleSheet("QWidget{background: transparent}")
self.containerLayout = QVBoxLayout(self.container)
self.setting_group = SettingCardGroup(self.tr("Whisper CPP 设置"), self)
# 模型选择
self.model_card = ComboBoxSettingCard(
cfg.whisper_model,
FIF.ROBOT,
self.tr("模型"),
self.tr("选择Whisper模型"),
[model.value for model in WhisperModelEnum],
self.setting_group,
)
# 检查未下载的模型并从下拉框中移除
for i in range(self.model_card.comboBox.count() - 1, -1, -1):
model_text = self.model_card.comboBox.itemText(i).lower()
model_configs = {
model["label"].lower(): model for model in WHISPER_CPP_MODELS
}
model_config = model_configs.get(model_text)
if model_config and (MODEL_PATH / model_config["value"]).exists():
continue
self.model_card.comboBox.removeItem(i)
# 语言选择
self.language_card = ComboBoxSettingCard(
cfg.transcribe_language,
FIF.LANGUAGE,
self.tr("源语言"),
self.tr("音视频中说话的语言,默认根据前30秒自动识别"),
[language.value for language in TranscribeLanguageEnum],
self.setting_group,
)
# 添加模型管理卡片
self.manage_model_card = HyperlinkCard(
"", # 无链接
self.tr("管理模型"),
FIF.DOWNLOAD, # 使用下载图标
self.tr("模型管理"),
self.tr("下载或更新 Whisper CPP 模型"),
self.setting_group, # 添加到设置组
)
# 添加 setMaxVisibleItems
self.language_card.comboBox.setMaxVisibleItems(6)
# 使用 addSettingCard 添加卡片到组
self.setting_group.addSettingCard(self.model_card)
self.setting_group.addSettingCard(self.language_card)
self.setting_group.addSettingCard(self.manage_model_card)
# 将设置组添加到容器布局
self.containerLayout.addWidget(self.setting_group)
self.containerLayout.addStretch(1)
# 设置组件最小宽度
self.model_card.comboBox.setMinimumWidth(200)
self.language_card.comboBox.setMinimumWidth(200)
# 设置滚动区域
self.scrollArea.setWidget(self.container)
self.scrollArea.setWidgetResizable(True)
# 将滚动区域添加到主布局
self.main_layout.addWidget(self.scrollArea)
def setup_signals(self):
self.manage_model_card.linkButton.clicked.connect(self.show_download_dialog)
def show_download_dialog(self):
"""显示下载对话框"""
download_dialog = WhisperCppDownloadDialog(self.window(), self)
download_dialog.show()
================================================
FILE: app/components/transcription_setting_card.py
================================================
from typing import Optional
from PyQt5.QtWidgets import (
QStackedWidget,
QVBoxLayout,
QWidget,
)
from ..core.entities import (
TranscribeModelEnum,
)
from ..core.utils.platform_utils import is_macos
from .FasterWhisperSettingWidget import FasterWhisperSettingWidget
from .WhisperAPISettingWidget import WhisperAPISettingWidget
from .WhisperCppSettingWidget import WhisperCppSettingWidget
class TranscriptionSettingCard(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.setup_ui()
def setup_ui(self):
self.main_layout = QVBoxLayout(self)
self.main_layout.setContentsMargins(0, 0, 0, 0)
# 设置界面堆叠
self.stacked_widget = QStackedWidget(self)
# 添加各个设置界面
self.empty_widget = QWidget(self) # 添加空白页面作为默认显示
self.whisper_cpp_widget = WhisperCppSettingWidget(self)
self.whisper_api_widget = WhisperAPISettingWidget(self)
# FasterWhisper 在 macOS 上不可用
self.faster_whisper_widget: Optional[FasterWhisperSettingWidget] = None
if not is_macos():
self.faster_whisper_widget = FasterWhisperSettingWidget(self)
self.stacked_widget.addWidget(self.empty_widget) # 添加空白页面
self.stacked_widget.addWidget(self.whisper_cpp_widget)
self.stacked_widget.addWidget(self.whisper_api_widget)
if self.faster_whisper_widget is not None:
self.stacked_widget.addWidget(self.faster_whisper_widget)
self.main_layout.addWidget(self.stacked_widget)
def on_model_changed(self, value):
# 切换对应的设置界面
if value == TranscribeModelEnum.WHISPER_CPP.value:
self.stacked_widget.setCurrentWidget(self.whisper_cpp_widget)
elif value == TranscribeModelEnum.WHISPER_API.value:
self.stacked_widget.setCurrentWidget(self.whisper_api_widget)
elif value == TranscribeModelEnum.FASTER_WHISPER.value:
self.stacked_widget.setCurrentWidget(self.faster_whisper_widget)
else:
self.stacked_widget.setCurrentWidget(self.empty_widget)
================================================
FILE: app/config.py
================================================
import logging
import os
from pathlib import Path
VERSION = "v1.4.0"
YEAR = 2025
APP_NAME = "VideoCaptioner"
AUTHOR = "Weifeng"
HELP_URL = "https://github.com/WEIFENG2333/VideoCaptioner"
GITHUB_REPO_URL = "https://github.com/WEIFENG2333/VideoCaptioner"
RELEASE_URL = "https://github.com/WEIFENG2333/VideoCaptioner/releases/latest"
FEEDBACK_URL = "https://github.com/WEIFENG2333/VideoCaptioner/issues"
# 路径
ROOT_PATH = Path(__file__).parent.parent
RESOURCE_PATH = ROOT_PATH / "resource"
APPDATA_PATH = ROOT_PATH / "AppData"
WORK_PATH = ROOT_PATH / "work-dir"
BIN_PATH = RESOURCE_PATH / "bin"
ASSETS_PATH = RESOURCE_PATH / "assets"
SUBTITLE_STYLE_PATH = RESOURCE_PATH / "subtitle_style"
TRANSLATIONS_PATH = RESOURCE_PATH / "translations"
FONTS_PATH = RESOURCE_PATH / "fonts"
LOG_PATH = APPDATA_PATH / "logs"
LLM_LOG_FILE = LOG_PATH / "llm_requests.jsonl"
SETTINGS_PATH = APPDATA_PATH / "settings.json"
CACHE_PATH = APPDATA_PATH / "cache"
MODEL_PATH = APPDATA_PATH / "models"
FASER_WHISPER_PATH = BIN_PATH / "Faster-Whisper-XXL"
# 日志配置
LOG_LEVEL = logging.INFO
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
# 环境变量添加 bin 路径,添加到PATH开头以优先使用
os.environ["PATH"] = str(FASER_WHISPER_PATH) + os.pathsep + os.environ["PATH"]
os.environ["PATH"] = str(BIN_PATH) + os.pathsep + os.environ["PATH"]
# 添加 VLC 路径
os.environ["PYTHON_VLC_MODULE_PATH"] = str(BIN_PATH / "vlc")
# 创建路径
for p in [CACHE_PATH, LOG_PATH, WORK_PATH, MODEL_PATH]:
p.mkdir(parents=True, exist_ok=True)
================================================
FILE: app/core/asr/__init__.py
================================================
from .bcut import BcutASR
from .chunked_asr import ChunkedASR
from .faster_whisper import FasterWhisperASR
from .jianying import JianYingASR
from .status import ASRStatus
from .transcribe import transcribe
from .whisper_api import WhisperAPI
from .whisper_cpp import WhisperCppASR
__all__ = [
"BcutASR",
"ChunkedASR",
"FasterWhisperASR",
"JianYingASR",
"WhisperAPI",
"WhisperCppASR",
"transcribe",
"ASRStatus",
]
================================================
FILE: app/core/asr/asr_data.py
================================================
import json
import math
import os
import platform
import re
from pathlib import Path
from typing import List, Optional, Tuple
from langdetect import LangDetectException, detect
from ..entities import SubtitleLayoutEnum
from ..utils.text_utils import is_mainly_cjk
# 多语言分词模式(支持词级和字符级语言)
_WORD_SPLIT_PATTERN = (
r"[a-zA-Z\u00c0-\u00ff\u0100-\u017f']+" # 拉丁字符(含扩展)
r"|[\u0400-\u04ff]+" # 西里尔字母(俄文)
r"|[\u0370-\u03ff]+" # 希腊字母
r"|[\u0600-\u06ff]+" # 阿拉伯文
r"|[\u0590-\u05ff]+" # 希伯来文
r"|\d+" # 数字
r"|[\u4e00-\u9fff]" # 中文
r"|[\u3040-\u309f]" # 日文平假名
r"|[\u30a0-\u30ff]" # 日文片假名
r"|[\uac00-\ud7af]" # 韩文
r"|[\u0e00-\u0e7f][\u0e30-\u0e3a\u0e47-\u0e4e]*" # 泰文
r"|[\u0900-\u097f]" # 天城文(印地语)
r"|[\u0980-\u09ff]" # 孟加拉文
r"|[\u0e80-\u0eff]" # 老挝文
r"|[\u1000-\u109f]" # 缅甸文
)
def handle_long_path(path: str) -> str:
r"""Handle Windows long path limitation by adding \\?\ prefix.
Args:
path: Original file path
Returns:
Path with \\?\ prefix if needed (Windows only)
"""
if (
platform.system() == "Windows"
and len(path) > 260
and not path.startswith(r"\\?\ ")
):
return rf"\\?\{os.path.abspath(path)}"
return path
class ASRDataSeg:
def __init__(
self, text: str, start_time: int, end_time: int, translated_text: str = ""
):
self.text = text
self.translated_text = translated_text
self.start_time = start_time
self.end_time = end_time
def to_srt_ts(self) -> str:
"""Convert to SRT timestamp format"""
return f"{self._ms_to_srt_time(self.start_time)} --> {self._ms_to_srt_time(self.end_time)}"
def to_lrc_ts(self) -> str:
"""Convert to LRC timestamp format"""
return f"[{self._ms_to_lrc_time(self.start_time)}]"
def to_ass_ts(self) -> Tuple[str, str]:
"""Convert to ASS timestamp format"""
return self._ms_to_ass_ts(self.start_time), self._ms_to_ass_ts(self.end_time)
@staticmethod
def _ms_to_lrc_time(ms: int) -> str:
"""Convert milliseconds to LRC time format (MM:SS.cc)"""
seconds = ms / 1000
minutes, seconds = divmod(seconds, 60)
return f"{int(minutes):02}:{seconds:.2f}"
@staticmethod
def _ms_to_srt_time(ms: int) -> str:
"""Convert milliseconds to SRT time format (HH:MM:SS,mmm)"""
total_seconds, milliseconds = divmod(ms, 1000)
minutes, seconds = divmod(total_seconds, 60)
hours, minutes = divmod(minutes, 60)
return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{int(milliseconds):03}"
@staticmethod
def _ms_to_ass_ts(ms: int) -> str:
"""Convert milliseconds to ASS timestamp format (H:MM:SS.cc)"""
total_seconds, milliseconds = divmod(ms, 1000)
minutes, seconds = divmod(total_seconds, 60)
hours, minutes = divmod(minutes, 60)
centiseconds = int(milliseconds / 10)
return f"{int(hours):01}:{int(minutes):02}:{int(seconds):02}.{centiseconds:02}"
@property
def transcript(self) -> str:
"""Return segment text"""
return self.text
def __str__(self) -> str:
return f"ASRDataSeg({self.text}, {self.start_time}, {self.end_time})"
class ASRData:
def __init__(self, segments: List[ASRDataSeg]):
filtered_segments = [seg for seg in segments if seg.text and seg.text.strip()]
filtered_segments.sort(key=lambda x: x.start_time)
self.segments = filtered_segments
def __iter__(self):
return iter(self.segments)
def __len__(self) -> int:
return len(self.segments)
def has_data(self) -> bool:
"""Check if there are any utterances"""
return len(self.segments) > 0
def _is_word_level_segment(self, segment: ASRDataSeg) -> bool:
"""判断单个片段是否为词级
Args:
segment: 待判断的字幕片段
Returns:
True 如果片段符合词级模式
"""
text = segment.text.strip()
# CJK语言:1-2个字符
if is_mainly_cjk(text):
return len(text) <= 2
# 非CJK语言(如英文):单个单词
words = text.split()
return len(words) == 1
def is_word_timestamp(self) -> bool:
"""检查时间戳是否为词级(非句子级)
词级判定标准:
- 英文: 单个单词
- CJK/亚洲语言: 1-2个字符
- 允许20%误差容忍
Returns:
True 如果80%+的片段符合词级模式
"""
if not self.segments:
return False
# 统计符合词级模式的片段数量
word_level_count = sum(
1 for seg in self.segments if self._is_word_level_segment(seg)
)
WORD_LEVEL_THRESHOLD = 0.8
word_level_ratio = word_level_count / len(self.segments)
return word_level_ratio >= WORD_LEVEL_THRESHOLD
def split_to_word_segments(self) -> "ASRData":
"""将句子级字幕分割为词级字幕,并按音素估算分配时间戳
时间戳分配基于音素估算(每4个字符约1个音素)
Returns:
修改后的ASRData实例
"""
CHARS_PER_PHONEME = 4
new_segments = []
for seg in self.segments:
text = seg.text
duration = seg.end_time - seg.start_time
# 使用统一的多语言分词模式
words_list = list(re.finditer(_WORD_SPLIT_PATTERN, text))
if not words_list:
continue
# 计算总音素数
total_phonemes = sum(
math.ceil(len(w.group()) / CHARS_PER_PHONEME) for w in words_list
)
time_per_phoneme = duration / max(total_phonemes, 1)
# 为每个词分配时间戳
current_time = seg.start_time
for word_match in words_list:
word = word_match.group()
word_phonemes = math.ceil(len(word) / CHARS_PER_PHONEME)
word_duration = int(time_per_phoneme * word_phonemes)
word_end_time = min(current_time + word_duration, seg.end_time)
new_segments.append(
ASRDataSeg(
text=word, start_time=current_time, end_time=word_end_time
)
)
current_time = word_end_time
self.segments = new_segments
return self
def remove_punctuation(self) -> "ASRData":
"""Remove trailing Chinese punctuation (comma, period) from segments."""
punctuation = r"[,。]"
for seg in self.segments:
seg.text = re.sub(f"{punctuation}+$", "", seg.text.strip())
seg.translated_text = re.sub(
f"{punctuation}+$", "", seg.translated_text.strip()
)
return self
def save(
self,
save_path: str,
ass_style: Optional[str] = None,
layout: SubtitleLayoutEnum = SubtitleLayoutEnum.ORIGINAL_ON_TOP,
) -> None:
"""Save ASRData to file in specified format.
Args:
save_path: Output file path
ass_style: ASS style string (optional, uses default if None)
layout: Subtitle layout mode
"""
save_path = handle_long_path(save_path)
Path(save_path).parent.mkdir(parents=True, exist_ok=True)
if save_path.endswith(".srt"):
self.to_srt(save_path=save_path, layout=layout)
elif save_path.endswith(".txt"):
self.to_txt(save_path=save_path, layout=layout)
elif save_path.endswith(".json"):
with open(save_path, "w", encoding="utf-8") as f:
json.dump(self.to_json(), f, ensure_ascii=False)
elif save_path.endswith(".ass"):
self.to_ass(save_path=save_path, style_str=ass_style, layout=layout)
else:
raise ValueError(f"Unsupported file extension: {save_path}")
def to_txt(
self,
save_path=None,
layout: SubtitleLayoutEnum = SubtitleLayoutEnum.ORIGINAL_ON_TOP,
) -> str:
"""Convert to plain text subtitle format (without timestamps)"""
result = []
for seg in self.segments:
original = seg.text
translated = seg.translated_text
if layout == SubtitleLayoutEnum.ORIGINAL_ON_TOP:
text = f"{original}\n{translated}" if translated else original
elif layout == SubtitleLayoutEnum.TRANSLATE_ON_TOP:
text = f"{translated}\n{original}" if translated else original
elif layout == SubtitleLayoutEnum.ONLY_ORIGINAL:
text = original
else: # ONLY_TRANSLATE
text = translated if translated else original
result.append(text)
text = "\n".join(result)
if save_path:
save_path = handle_long_path(save_path)
with open(save_path, "w", encoding="utf-8") as f:
f.write("\n".join(result))
return text
def to_srt(
self,
layout: SubtitleLayoutEnum = SubtitleLayoutEnum.ORIGINAL_ON_TOP,
save_path=None,
) -> str:
"""Convert to SRT subtitle format"""
srt_lines = []
for n, seg in enumerate(self.segments, 1):
original = seg.text
translated = seg.translated_text
if layout == SubtitleLayoutEnum.ORIGINAL_ON_TOP:
text = f"{original}\n{translated}" if translated else original
elif layout == SubtitleLayoutEnum.TRANSLATE_ON_TOP:
text = f"{translated}\n{original}" if translated else original
elif layout == SubtitleLayoutEnum.ONLY_ORIGINAL:
text = original
else: # ONLY_TRANSLATE
text = translated if translated else original
srt_lines.append(f"{n}\n{seg.to_srt_ts()}\n{text}\n")
srt_text = "\n".join(srt_lines)
if save_path:
save_path = handle_long_path(save_path)
with open(save_path, "w", encoding="utf-8") as f:
f.write(srt_text)
return srt_text
def to_lrc(self, save_path=None) -> str:
"""Convert to LRC subtitle format"""
raise NotImplementedError("LRC format is not supported")
def to_json(self) -> dict:
"""Convert to JSON format"""
result_json = {}
for i, segment in enumerate(self.segments, 1):
result_json[str(i)] = {
"start_time": segment.start_time,
"end_time": segment.end_time,
"original_subtitle": segment.text,
"translated_subtitle": segment.translated_text,
}
return result_json
def to_ass(
self,
style_str: Optional[str] = None,
layout: SubtitleLayoutEnum = SubtitleLayoutEnum.ORIGINAL_ON_TOP,
save_path: Optional[str] = None,
video_width: int = 1280,
video_height: int = 720,
) -> str:
"""Convert to ASS subtitle format
Args:
style_str: ASS style string (optional, uses default if None)
layout: Subtitle layout mode
save_path: Save path for ASS file (optional)
video_width: Video width (default 1280)
video_height: Video height (default 720)
Returns:
ASS format subtitle content
"""
if not style_str:
style_str = (
"[V4+ Styles]\n"
"Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,"
"Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,"
"Alignment,MarginL,MarginR,MarginV,Encoding\n"
"Style: Default,MicrosoftYaHei-Bold,40,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,"
"0,0,1,2,0,2,10,10,15,1\n"
"Style: Secondary,MicrosoftYaHei-Bold,30,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,"
"0,0,1,2,0,2,10,10,15,1"
)
ass_content = (
"[Script Info]\n"
"; Script generated by VideoCaptioner\n"
"; https://github.com/weifeng2333\n"
"ScriptType: v4.00+\n"
f"PlayResX: {video_width}\n"
f"PlayResY: {video_height}\n\n"
f"{style_str}\n\n"
"[Events]\n"
"Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"
)
dialogue_template = "Dialogue: 0,{},{},{},,0,0,0,,{}\n"
for seg in self.segments:
start_time, end_time = seg.to_ass_ts()
original = seg.text
translated = seg.translated_text
has_translation = bool(translated and translated.strip())
if layout == SubtitleLayoutEnum.TRANSLATE_ON_TOP:
if has_translation:
# 先写译文(Default)显示在上,后写原文(Secondary)显示在下
ass_content += dialogue_template.format(
start_time, end_time, "Default", translated
)
ass_content += dialogue_template.format(
start_time, end_time, "Secondary", original
)
else:
ass_content += dialogue_template.format(
start_time, end_time, "Default", original
)
elif layout == SubtitleLayoutEnum.ORIGINAL_ON_TOP:
if has_translation:
# 先写原文(Default)显示在上,后写译文(Secondary)显示在下
ass_content += dialogue_template.format(
start_time, end_time, "Default", original
)
ass_content += dialogue_template.format(
gitextract_7v46ty1z/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── 01_bug.yaml
│ │ ├── 02_request.yaml
│ │ └── 03_question.yaml
│ └── workflows/
│ ├── claude-code-review.yml
│ ├── claude.yml
│ └── deploy-docs.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── app/
│ ├── __init__.py
│ ├── common/
│ │ ├── config.py
│ │ └── signal_bus.py
│ ├── components/
│ │ ├── DonateDialog.py
│ │ ├── EditComboBoxSettingCard.py
│ │ ├── FasterWhisperSettingWidget.py
│ │ ├── LanguageSettingDialog.py
│ │ ├── LineEditSettingCard.py
│ │ ├── MySettingCard.py
│ │ ├── MyVideoWidget.py
│ │ ├── SimpleSettingCard.py
│ │ ├── SpinBoxSettingCard.py
│ │ ├── SubtitleSettingDialog.py
│ │ ├── TranscriptionOutputDialog.py
│ │ ├── TranscriptionSettingDialog.py
│ │ ├── WhisperAPISettingWidget.py
│ │ ├── WhisperCppSettingWidget.py
│ │ └── transcription_setting_card.py
│ ├── config.py
│ ├── core/
│ │ ├── asr/
│ │ │ ├── __init__.py
│ │ │ ├── asr_data.py
│ │ │ ├── base.py
│ │ │ ├── bcut.py
│ │ │ ├── chunk_merger.py
│ │ │ ├── chunked_asr.py
│ │ │ ├── faster_whisper.py
│ │ │ ├── jianying.py
│ │ │ ├── status.py
│ │ │ ├── transcribe.py
│ │ │ ├── whisper_api.py
│ │ │ └── whisper_cpp.py
│ │ ├── constant.py
│ │ ├── entities.py
│ │ ├── llm/
│ │ │ ├── __init__.py
│ │ │ ├── check_llm.py
│ │ │ ├── check_whisper.py
│ │ │ ├── client.py
│ │ │ ├── context.py
│ │ │ └── request_logger.py
│ │ ├── optimize/
│ │ │ └── optimize.py
│ │ ├── prompts/
│ │ │ ├── __init__.py
│ │ │ ├── analysis/
│ │ │ │ └── video.md
│ │ │ ├── optimize/
│ │ │ │ └── subtitle.md
│ │ │ ├── split/
│ │ │ │ ├── semantic.md
│ │ │ │ └── sentence.md
│ │ │ └── translate/
│ │ │ ├── reflect.md
│ │ │ ├── single.md
│ │ │ └── standard.md
│ │ ├── split/
│ │ │ ├── alignment.py
│ │ │ ├── split.py
│ │ │ └── split_by_llm.py
│ │ ├── subtitle/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── ass_renderer.py
│ │ │ ├── ass_utils.py
│ │ │ ├── font_utils.py
│ │ │ ├── rounded_renderer.py
│ │ │ ├── styles.py
│ │ │ └── text_utils.py
│ │ ├── task_factory.py
│ │ ├── translate/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── bing_translator.py
│ │ │ ├── deeplx_translator.py
│ │ │ ├── factory.py
│ │ │ ├── google_translator.py
│ │ │ ├── llm_translator.py
│ │ │ └── types.py
│ │ ├── tts/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── openai_fm.py
│ │ │ ├── openai_tts.py
│ │ │ ├── siliconflow.py
│ │ │ ├── status.py
│ │ │ └── tts_data.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── cache.py
│ │ ├── logger.py
│ │ ├── platform_utils.py
│ │ ├── subprocess_helper.py
│ │ ├── text_utils.py
│ │ └── video_utils.py
│ ├── thread/
│ │ ├── batch_process_thread.py
│ │ ├── file_download_thread.py
│ │ ├── modelscope_download_thread.py
│ │ ├── subtitle_pipeline_thread.py
│ │ ├── subtitle_thread.py
│ │ ├── transcript_thread.py
│ │ ├── version_checker_thread.py
│ │ ├── video_download_thread.py
│ │ ├── video_info_thread.py
│ │ └── video_synthesis_thread.py
│ └── view/
│ ├── batch_process_interface.py
│ ├── home_interface.py
│ ├── llm_logs_interface.py
│ ├── log_window.py
│ ├── main_window.py
│ ├── setting_interface.py
│ ├── subtitle_interface.py
│ ├── subtitle_style_interface.py
│ ├── task_creation_interface.py
│ ├── transcription_interface.py
│ └── video_synthesis_interface.py
├── docs/
│ ├── .vitepress/
│ │ ├── config.mts
│ │ └── theme/
│ │ ├── CustomHome.vue
│ │ ├── custom.css
│ │ └── index.ts
│ ├── README.md
│ ├── config/
│ │ ├── asr.md
│ │ ├── cookies.md
│ │ ├── llm.md
│ │ └── translator.md
│ ├── dev/
│ │ ├── api.md
│ │ ├── architecture.md
│ │ ├── asr-chunk-merger.md
│ │ ├── asr-chunked-usage.md
│ │ ├── contributing.md
│ │ ├── translate-module.md
│ │ └── view-structure.md
│ ├── en/
│ │ ├── config/
│ │ │ ├── asr.md
│ │ │ ├── cookies.md
│ │ │ ├── llm.md
│ │ │ └── translator.md
│ │ ├── dev/
│ │ │ ├── api.md
│ │ │ ├── architecture.md
│ │ │ └── contributing.md
│ │ ├── guide/
│ │ │ ├── batch-processing.md
│ │ │ ├── configuration.md
│ │ │ ├── faq.md
│ │ │ ├── getting-started.md
│ │ │ ├── manuscript.md
│ │ │ ├── subtitle-style.md
│ │ │ └── workflow.md
│ │ └── index.md
│ ├── guide/
│ │ ├── configuration.md
│ │ ├── cookies-config.md
│ │ ├── faq.md
│ │ ├── getting-started.md
│ │ ├── llm-config.md
│ │ ├── quick-example.md
│ │ └── workflow.md
│ ├── index.md
│ ├── package-lock.json
│ ├── package.json
│ └── public/
│ ├── BingSiteAuth.xml
│ └── robots.txt
├── legacy-docs/
│ ├── README_EN.md
│ ├── README_JA.md
│ ├── README_TW.md
│ ├── about_chunk_merge.md
│ ├── get_cookies.md
│ ├── llm_config.md
│ └── test.md
├── main.py
├── pyproject.toml
├── resource/
│ ├── assets/
│ │ └── qss/
│ │ ├── dark/
│ │ │ └── demo.qss
│ │ └── light/
│ │ └── demo.qss
│ ├── subtitle_style/
│ │ ├── default.json
│ │ ├── default.txt
│ │ ├── 毕导科普风.txt
│ │ ├── 番剧可爱风.txt
│ │ └── 竖屏.txt
│ └── translations/
│ ├── VideoCaptioner_en_US.qm
│ ├── VideoCaptioner_en_US.ts
│ ├── VideoCaptioner_zh_CN.qm
│ ├── VideoCaptioner_zh_CN.ts
│ ├── VideoCaptioner_zh_HK.qm
│ └── VideoCaptioner_zh_HK.ts
├── scripts/
│ ├── lint.sh
│ ├── run.bat
│ ├── run.sh
│ ├── trans-compile.sh
│ ├── trans-extract.sh
│ └── translate_llm.py
└── tests/
├── README.md
├── __init__.py
├── conftest.py
├── fixtures/
│ └── README.md
├── test_asr/
│ ├── README.md
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_asr_data.py
│ ├── test_bcut_asr.py
│ ├── test_chunk_merger.py
│ ├── test_chunked_asr.py
│ ├── test_chunking.py
│ ├── test_jianying_asr.py
│ └── test_whisper_api_asr.py
├── test_optimize/
│ └── test_optimize.py
├── test_split/
│ ├── __init__.py
│ ├── test_alignment.py
│ ├── test_split.py
│ ├── test_split_by_llm.py
│ ├── test_split_core.py
│ └── test_split_realistic.py
├── test_subtitle/
│ ├── __init__.py
│ ├── conftest.py
│ └── test_subtitle_thread.py
├── test_thread/
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_subtitle_pipeline_thread.py
│ ├── test_transcript_thread.py
│ ├── test_video_info_thread.py
│ └── test_video_synthesis_thread.py
├── test_translate/
│ ├── __init__.py
│ ├── test_bing_translator.py
│ ├── test_cache_validation.py
│ ├── test_deeplx_translator.py
│ ├── test_google_translator.py
│ └── test_llm_translator.py
└── test_tts/
├── __init__.py
├── test_tts_core.py
└── test_tts_integration.py
SYMBOL INDEX (1434 symbols across 119 files)
FILE: app/common/config.py
class Language (line 40) | class Language(Enum):
class LanguageSerializer (line 49) | class LanguageSerializer(ConfigSerializer):
method serialize (line 52) | def serialize(self, language):
method deserialize (line 55) | def deserialize(self, value: str):
class PlatformAwareTranscribeModelValidator (line 59) | class PlatformAwareTranscribeModelValidator(OptionsValidator):
method __init__ (line 62) | def __init__(self):
method options (line 67) | def options(self):
method validate (line 70) | def validate(self, value):
method correct (line 73) | def correct(self, value):
class Config (line 77) | class Config(QConfig):
FILE: app/common/signal_bus.py
class SignalBus (line 4) | class SignalBus(QObject):
method play_video (line 35) | def play_video(self):
method pause_video (line 39) | def pause_video(self):
method stop_video (line 43) | def stop_video(self):
method set_video_source (line 47) | def set_video_source(self, url: QUrl):
method play_video_segment (line 55) | def play_video_segment(self, start_time: int, end_time: int):
method add_subtitle (line 64) | def add_subtitle(self, subtitle_file: str):
FILE: app/components/DonateDialog.py
class DonateDialog (line 11) | class DonateDialog(MessageBoxBase):
method __init__ (line 12) | def __init__(self, parent=None):
method setup_ui (line 21) | def setup_ui(self):
FILE: app/components/EditComboBoxSettingCard.py
class EditComboBoxSettingCard (line 10) | class EditComboBoxSettingCard(SettingCard):
method __init__ (line 15) | def __init__(
method _setupCompleter (line 51) | def _setupCompleter(self):
method __onTextChanged (line 61) | def __onTextChanged(self, text: str):
method setValue (line 66) | def setValue(self, value: str):
method addItems (line 71) | def addItems(self, items: List[str]):
method setItems (line 78) | def setItems(self, items: List[str]):
FILE: app/components/FasterWhisperSettingWidget.py
function check_faster_whisper_exists (line 126) | def check_faster_whisper_exists() -> tuple[bool, list[str]]:
class UnzipThread (line 153) | class UnzipThread(QThread):
method __init__ (line 159) | def __init__(self, zip_file, extract_path):
method run (line 164) | def run(self):
class FasterWhisperDownloadDialog (line 180) | class FasterWhisperDownloadDialog(MessageBoxBase):
method __init__ (line 186) | def __init__(self, parent=None, setting_widget=None):
method _setup_ui (line 195) | def _setup_ui(self):
method _setup_program_section (line 207) | def _setup_program_section(self, layout):
method _setup_model_section (line 266) | def _setup_model_section(self, layout):
method _create_model_table (line 290) | def _create_model_table(self):
method _setup_progress_section (line 328) | def _setup_progress_section(self, layout):
method _populate_model_table (line 338) | def _populate_model_table(self):
method _add_model_row (line 344) | def _add_model_row(self, row, model):
method _connect_signals (line 387) | def _connect_signals(self):
method _start_download (line 391) | def _start_download(self):
method _on_program_download_progress (line 451) | def _on_program_download_progress(self, value, status_msg):
method _on_program_download_finished (line 456) | def _on_program_download_finished(self, save_path):
method _on_program_download_error (line 479) | def _on_program_download_error(self, error):
method _on_dialog_reject (line 489) | def _on_dialog_reject(self):
method closeEvent (line 498) | def closeEvent(self, event):
method _download_model (line 503) | def _download_model(self, row):
method _set_all_download_buttons_enabled (line 607) | def _set_all_download_buttons_enabled(self, enabled: bool):
method _open_model_folder (line 622) | def _open_model_folder(self):
method _open_program_folder (line 628) | def _open_program_folder(self):
method _finish_program_installation (line 634) | def _finish_program_installation(self):
method _on_unzip_error (line 645) | def _on_unzip_error(self, error_msg):
method _cleanup_installation (line 650) | def _cleanup_installation(self):
class FasterWhisperSettingWidget (line 658) | class FasterWhisperSettingWidget(QWidget):
method __init__ (line 659) | def __init__(self, parent=None):
method showEvent (line 664) | def showEvent(self, a0: QShowEvent) -> None:
method setup_ui (line 673) | def setup_ui(self):
method _connect_signals (line 853) | def _connect_signals(self):
method _on_vad_filter_changed (line 858) | def _on_vad_filter_changed(self, checked: bool):
method _show_model_manager (line 863) | def _show_model_manager(self):
method show_error_info (line 868) | def show_error_info(self, error_msg):
method check_faster_whisper_model (line 878) | def check_faster_whisper_model(self):
FILE: app/components/LanguageSettingDialog.py
class LanguageSettingDialog (line 19) | class LanguageSettingDialog(MessageBoxBase):
method __init__ (line 22) | def __init__(self, model: TranscribeModelEnum, parent=None):
method _get_available_languages (line 29) | def _get_available_languages(self) -> list[str]:
method _setup_ui (line 37) | def _setup_ui(self):
method _connect_signals (line 77) | def _connect_signals(self):
method __onYesButtonClicked (line 81) | def __onYesButtonClicked(self):
FILE: app/components/LineEditSettingCard.py
class LineEditSettingCard (line 8) | class LineEditSettingCard(SettingCard):
method __init__ (line 13) | def __init__(
method __onTextChanged (line 38) | def __onTextChanged(self, text: str):
method setValue (line 42) | def setValue(self, value: str):
FILE: app/components/MySettingCard.py
class SettingIconWidget (line 14) | class SettingIconWidget(IconWidget):
method paintEvent (line 15) | def paintEvent(self, e):
class SettingCard (line 25) | class SettingCard(QFrame):
method __init__ (line 28) | def __init__(
method setTitle (line 80) | def setTitle(self, title: str):
method setContent (line 84) | def setContent(self, content: str):
method setValue (line 89) | def setValue(self, value):
method setIconSize (line 93) | def setIconSize(self, width: int, height: int):
method paintEvent (line 97) | def paintEvent(self, e):
class DoubleSpinBoxSettingCard (line 111) | class DoubleSpinBoxSettingCard(SettingCard):
method __init__ (line 116) | def __init__(
method __onValueChanged (line 142) | def __onValueChanged(self, value: float):
method setValue (line 147) | def setValue(self, value: float):
class SpinBoxSettingCard (line 152) | class SpinBoxSettingCard(SettingCard):
method __init__ (line 157) | def __init__(
method __onValueChanged (line 182) | def __onValueChanged(self, value: int):
method setValue (line 187) | def setValue(self, value: int):
class ComboBoxSettingCard (line 192) | class ComboBoxSettingCard(SettingCard):
method __init__ (line 198) | def __init__(
method __onCurrentTextChanged (line 222) | def __onCurrentTextChanged(self, text: str):
method __onCurrentIndexChanged (line 226) | def __onCurrentIndexChanged(self, index: int):
method setCurrentText (line 230) | def setCurrentText(self, text: str):
method setCurrentIndex (line 234) | def setCurrentIndex(self, index: int):
method addItem (line 238) | def addItem(self, text: str):
method addItems (line 242) | def addItems(self, texts: List[str]):
method clear (line 246) | def clear(self):
class ColorSettingCard (line 251) | class ColorSettingCard(SettingCard):
method __init__ (line 256) | def __init__(
method __onColorChanged (line 293) | def __onColorChanged(self, color: QColor):
method setColor (line 297) | def setColor(self, color: QColor):
class ColorPickerButton (line 302) | class ColorPickerButton(QToolButton):
method __init__ (line 307) | def __init__(self, color: QColor, title: str, parent=None, enableAlpha...
method __showColorDialog (line 318) | def __showColorDialog(self):
method __onColorChanged (line 326) | def __onColorChanged(self, color):
method setColor (line 331) | def setColor(self, color):
method paintEvent (line 336) | def paintEvent(self, e):
FILE: app/components/MyVideoWidget.py
class MediaStatus (line 25) | class MediaStatus(Enum):
class PlaybackState (line 36) | class PlaybackState(Enum):
class MediaPlayerBase (line 42) | class MediaPlayerBase(QObject):
method __init__ (line 53) | def __init__(self, parent=None):
method isPlaying (line 56) | def isPlaying(self):
method mediaStatus (line 60) | def mediaStatus(self) -> MediaStatus:
method playbackState (line 64) | def playbackState(self) -> PlaybackState:
method duration (line 68) | def duration(self):
method position (line 72) | def position(self):
method volume (line 76) | def volume(self):
method source (line 80) | def source(self) -> QUrl:
method pause (line 84) | def pause(self):
method play (line 88) | def play(self):
method stop (line 92) | def stop(self):
method playbackRate (line 96) | def playbackRate(self) -> float:
method setPosition (line 100) | def setPosition(self, position: int):
method setSource (line 104) | def setSource(self, media: QUrl):
method setPlaybackRate (line 108) | def setPlaybackRate(self, rate: float):
method setVolume (line 112) | def setVolume(self, volume: int):
method setMuted (line 116) | def setMuted(self, isMuted: bool):
method videoOutput (line 119) | def videoOutput(self) -> QObject:
method setVideoOutput (line 123) | def setVideoOutput(self, output: QObject) -> None:
class MediaPlayer (line 128) | class MediaPlayer(MediaPlayerBase):
method __init__ (line 129) | def __init__(self, parent=None):
method _on_timer_update (line 161) | def _on_timer_update(self):
method isPlaying (line 182) | def isPlaying(self):
method mediaStatus (line 185) | def mediaStatus(self) -> MediaStatus:
method playbackState (line 206) | def playbackState(self) -> PlaybackState:
method duration (line 216) | def duration(self):
method position (line 219) | def position(self):
method volume (line 222) | def volume(self):
method source (line 225) | def source(self) -> QUrl:
method get_subtitle (line 228) | def get_subtitle(self):
method pause (line 258) | def pause(self):
method play (line 261) | def play(self):
method stop (line 264) | def stop(self):
method playbackRate (line 267) | def playbackRate(self) -> float:
method setPosition (line 270) | def setPosition(self, position: int):
method setSource (line 275) | def setSource(self, media: QUrl):
method setPlaybackRate (line 284) | def setPlaybackRate(self, rate: float):
method setVolume (line 290) | def setVolume(self, volume: int):
method setMuted (line 295) | def setMuted(self, isMuted: bool):
method videoOutput (line 300) | def videoOutput(self) -> Optional[QObject]:
method setVideoOutput (line 303) | def setVideoOutput(self, output: QObject) -> None:
method hasMedia (line 307) | def hasMedia(self):
method playSegment (line 311) | def playSegment(self, start_time: int, end_time: int):
method add_subtitle (line 344) | def add_subtitle(self, subtitle_file: str) -> bool:
method get_subtitle_tracks (line 373) | def get_subtitle_tracks(self) -> list:
method set_subtitle_track (line 385) | def set_subtitle_track(self, track_id: int):
class StandardMediaPlayBar (line 395) | class StandardMediaPlayBar(MediaPlayBarBase):
method __init__ (line 398) | def __init__(self, parent=None):
method __initWidgets (line 418) | def __initWidgets(self):
method skipBack (line 448) | def skipBack(self, ms: int):
method skipForward (line 452) | def skipForward(self, ms: int):
method _onPositionChanged (line 456) | def _onPositionChanged(self, position: int):
method _formatTime (line 463) | def _formatTime(self, time: int):
method closeEvent (line 470) | def closeEvent(self, event):
class MyVideoWidget (line 475) | class MyVideoWidget(QWidget):
method __init__ (line 478) | def __init__(self, parent=None):
method _connectSignals (line 542) | def _connectSignals(self):
method addSubtitle (line 552) | def addSubtitle(self, subtitle_file: str):
method setVideo (line 557) | def setVideo(self, url: QUrl):
method play (line 570) | def play(self):
method pause (line 574) | def pause(self):
method stop (line 578) | def stop(self):
method playSegment (line 582) | def playSegment(self, start_time: int, end_time: int):
method hideEvent (line 591) | def hideEvent(self, e):
method wheelEvent (line 595) | def wheelEvent(self, e):
method togglePlayState (line 598) | def togglePlayState(self):
method player (line 606) | def player(self):
method keyPressEvent (line 609) | def keyPressEvent(self, event):
method dragEnterEvent (line 620) | def dragEnterEvent(self, event):
method dropEvent (line 635) | def dropEvent(self, event):
method eventFilter (line 649) | def eventFilter(self, obj, event):
method showEvent (line 657) | def showEvent(self, event):
FILE: app/components/SimpleSettingCard.py
class SimpleSettingCard (line 13) | class SimpleSettingCard(CardWidget):
method __init__ (line 16) | def __init__(self, title, content, parent=None):
method setup_ui (line 22) | def setup_ui(self):
class ComboBoxSimpleSettingCard (line 37) | class ComboBoxSimpleSettingCard(SimpleSettingCard):
method __init__ (line 42) | def __init__(self, title, content, items=None, parent=None):
method setup_combobox (line 47) | def setup_combobox(self):
method setValue (line 54) | def setValue(self, value):
method value (line 57) | def value(self):
class SwitchButtonSimpleSettingCard (line 61) | class SwitchButtonSimpleSettingCard(SimpleSettingCard):
method __init__ (line 66) | def __init__(self, title, content, parent=None):
method setup_switch (line 70) | def setup_switch(self):
method setChecked (line 81) | def setChecked(self, checked):
method isChecked (line 84) | def isChecked(self):
FILE: app/components/SpinBoxSettingCard.py
class DoubleSpinBoxSettingCard (line 9) | class DoubleSpinBoxSettingCard(SettingCard):
method __init__ (line 14) | def __init__(
method __onValueChanged (line 46) | def __onValueChanged(self, value: float):
method setValue (line 51) | def setValue(self, value: float):
class SpinBoxSettingCard (line 57) | class SpinBoxSettingCard(SettingCard):
method __init__ (line 62) | def __init__(
method __onValueChanged (line 90) | def __onValueChanged(self, value: int):
method setValue (line 95) | def setValue(self, value: int):
FILE: app/components/SubtitleSettingDialog.py
class SubtitleSettingDialog (line 12) | class SubtitleSettingDialog(MessageBoxBase):
method __init__ (line 15) | def __init__(self, parent=None):
FILE: app/components/TranscriptionOutputDialog.py
class TranscriptionSettingDialog (line 13) | class TranscriptionSettingDialog(MessageBoxBase):
method __init__ (line 16) | def __init__(self, parent=None):
FILE: app/components/TranscriptionSettingDialog.py
class TranscriptionSettingDialog (line 13) | class TranscriptionSettingDialog(MessageBoxBase):
method __init__ (line 16) | def __init__(self, parent=None):
FILE: app/components/WhisperAPISettingWidget.py
class WhisperAPISettingWidget (line 24) | class WhisperAPISettingWidget(QWidget):
method __init__ (line 25) | def __init__(self, parent=None):
method setup_ui (line 29) | def setup_ui(self):
method on_check_connection (line 132) | def on_check_connection(self):
method on_connection_check_finished (line 160) | def on_connection_check_finished(self, success, result):
method on_connection_check_error (line 183) | def on_connection_check_error(self, message):
class WhisperConnectionThread (line 197) | class WhisperConnectionThread(QThread):
method __init__ (line 203) | def __init__(self, base_url, api_key, model):
method run (line 209) | def run(self):
FILE: app/components/WhisperCppSettingWidget.py
function check_whisper_cpp_exists (line 114) | def check_whisper_cpp_exists():
class DownloadDialog (line 119) | class DownloadDialog(MessageBoxBase):
method __init__ (line 120) | def __init__(self, parent=None):
method setup_ui (line 126) | def setup_ui(self):
method start_download (line 163) | def start_download(self):
method update_progress (line 188) | def update_progress(self, value, status_msg):
method download_finished (line 192) | def download_finished(self):
method download_error (line 202) | def download_error(self, error):
method reject (line 212) | def reject(self):
class WhisperCppDownloadDialog (line 219) | class WhisperCppDownloadDialog(MessageBoxBase):
method __init__ (line 225) | def __init__(self, parent=None, setting_widget=None):
method _setup_ui (line 233) | def _setup_ui(self):
method _setup_program_section (line 245) | def _setup_program_section(self, layout):
method _setup_model_section (line 265) | def _setup_model_section(self, layout):
method _create_model_table (line 289) | def _create_model_table(self):
method _setup_progress_section (line 327) | def _setup_progress_section(self, layout):
method _populate_model_table (line 337) | def _populate_model_table(self):
method _add_model_row (line 343) | def _add_model_row(self, row, model):
method _download_model (line 383) | def _download_model(self, row):
method _set_all_download_buttons_enabled (line 489) | def _set_all_download_buttons_enabled(self, enabled: bool):
method _open_model_folder (line 504) | def _open_model_folder(self):
class WhisperCppSettingWidget (line 511) | class WhisperCppSettingWidget(QWidget):
method __init__ (line 512) | def __init__(self, parent=None):
method setup_ui (line 517) | def setup_ui(self):
method setup_signals (line 596) | def setup_signals(self):
method show_download_dialog (line 599) | def show_download_dialog(self):
FILE: app/components/transcription_setting_card.py
class TranscriptionSettingCard (line 18) | class TranscriptionSettingCard(QWidget):
method __init__ (line 19) | def __init__(self, parent=None):
method setup_ui (line 23) | def setup_ui(self):
method on_model_changed (line 48) | def on_model_changed(self, value):
FILE: app/core/asr/asr_data.py
function handle_long_path (line 34) | def handle_long_path(path: str) -> str:
class ASRDataSeg (line 52) | class ASRDataSeg:
method __init__ (line 53) | def __init__(
method to_srt_ts (line 61) | def to_srt_ts(self) -> str:
method to_lrc_ts (line 65) | def to_lrc_ts(self) -> str:
method to_ass_ts (line 69) | def to_ass_ts(self) -> Tuple[str, str]:
method _ms_to_lrc_time (line 74) | def _ms_to_lrc_time(ms: int) -> str:
method _ms_to_srt_time (line 81) | def _ms_to_srt_time(ms: int) -> str:
method _ms_to_ass_ts (line 89) | def _ms_to_ass_ts(ms: int) -> str:
method transcript (line 98) | def transcript(self) -> str:
method __str__ (line 102) | def __str__(self) -> str:
class ASRData (line 106) | class ASRData:
method __init__ (line 107) | def __init__(self, segments: List[ASRDataSeg]):
method __iter__ (line 112) | def __iter__(self):
method __len__ (line 115) | def __len__(self) -> int:
method has_data (line 118) | def has_data(self) -> bool:
method _is_word_level_segment (line 122) | def _is_word_level_segment(self, segment: ASRDataSeg) -> bool:
method is_word_timestamp (line 141) | def is_word_timestamp(self) -> bool:
method split_to_word_segments (line 165) | def split_to_word_segments(self) -> "ASRData":
method remove_punctuation (line 210) | def remove_punctuation(self) -> "ASRData":
method save (line 220) | def save(
method to_txt (line 248) | def to_txt(
method to_srt (line 275) | def to_srt(
method to_lrc (line 304) | def to_lrc(self, save_path=None) -> str:
method to_json (line 308) | def to_json(self) -> dict:
method to_ass (line 320) | def to_ass(
method to_vtt (line 413) | def to_vtt(self, save_path=None) -> str:
method merge_segments (line 442) | def merge_segments(
method merge_with_next_segment (line 461) | def merge_with_next_segment(self, index: int) -> None:
method optimize_timing (line 472) | def optimize_timing(self, threshold_ms: int = 1000) -> "ASRData":
method __str__ (line 501) | def __str__(self):
method from_subtitle_file (line 505) | def from_subtitle_file(file_path: str) -> "ASRData":
method from_json (line 543) | def from_json(json_data: dict) -> "ASRData":
method from_srt (line 558) | def from_srt(srt_str: str) -> "ASRData":
method from_vtt (line 627) | def from_vtt(vtt_str: str) -> "ASRData":
method from_youtube_vtt (line 682) | def from_youtube_vtt(vtt_str: str) -> "ASRData":
method from_ass (line 750) | def from_ass(ass_str: str) -> "ASRData":
FILE: app/core/asr/base.py
class BaseASR (line 19) | class BaseASR:
method __init__ (line 37) | def __init__(
method _set_data (line 57) | def _set_data(self):
method _get_audio_duration (line 76) | def _get_audio_duration(self) -> float:
method run (line 87) | def run(
method _get_key (line 120) | def _get_key(self) -> str:
method _make_segments (line 131) | def _make_segments(self, resp_data: dict) -> list[ASRDataSeg]:
method _run (line 144) | def _run(
method _check_rate_limit (line 158) | def _check_rate_limit(self) -> None:
FILE: app/core/asr/bcut.py
class BcutASR (line 20) | class BcutASR(BaseASR):
method __init__ (line 31) | def __init__(
method upload (line 54) | def upload(self) -> None:
method __upload_part (line 83) | def __upload_part(self) -> None:
method __commit_upload (line 106) | def __commit_upload(self) -> None:
method create_task (line 122) | def create_task(self) -> str:
method result (line 134) | def result(self, task_id: Optional[str] = None):
method _run (line 145) | def _run(
method _make_segments (line 180) | def _make_segments(self, resp_data: dict) -> List[ASRDataSeg]:
FILE: app/core/asr/chunk_merger.py
class ChunkMerger (line 20) | class ChunkMerger:
method __init__ (line 27) | def __init__(self, min_match_count: int = 2, fuzzy_threshold: float = ...
method merge_chunks (line 37) | def merge_chunks(
method _merge_two_sequences (line 101) | def _merge_two_sequences(
method _find_best_alignment (line 175) | def _find_best_alignment(
method _adjust_timestamps (line 249) | def _adjust_timestamps(
method _extract_overlap_segments (line 271) | def _extract_overlap_segments(
method _infer_chunk_offsets (line 308) | def _infer_chunk_offsets(
FILE: app/core/asr/chunked_asr.py
class ChunkedASR (line 28) | class ChunkedASR:
method __init__ (line 58) | def __init__(
method run (line 78) | def run(self, callback: Optional[Callable[[int, str], None]] = None) -...
method _split_audio (line 107) | def _split_audio(self) -> List[Tuple[bytes, int]]:
method _transcribe_chunks (line 154) | def _transcribe_chunks(
method _merge_results (line 222) | def _merge_results(
FILE: app/core/asr/faster_whisper.py
class FasterWhisperASR (line 21) | class FasterWhisperASR(BaseASR):
method __init__ (line 28) | def __init__(
method _build_command (line 115) | def _build_command(self, audio_input: str) -> List[str]:
method _make_segments (line 199) | def _make_segments(self, resp_data: str) -> List[ASRDataSeg]:
method _run (line 224) | def _run(
method _get_key (line 323) | def _get_key(self):
function is_rtx_50_series (line 330) | def is_rtx_50_series() -> bool:
FILE: app/core/asr/jianying.py
class JianYingASR (line 19) | class JianYingASR(BaseASR):
method __init__ (line 25) | def __init__(
method submit (line 53) | def submit(self) -> str:
method upload (line 82) | def upload(self):
method query (line 91) | def query(self, query_id: str):
method _run (line 108) | def _run(
method _make_segments (line 132) | def _make_segments(self, resp_data: dict) -> List[ASRDataSeg]:
method _get_key (line 145) | def _get_key(self):
method _get_tid (line 148) | def _get_tid(self):
method _generate_sign_parameters (line 154) | def _generate_sign_parameters(
method _build_headers (line 186) | def _build_headers(self, device_time: str, sign: str) -> Dict[str, str]:
method _uplosd_headers (line 198) | def _uplosd_headers(self):
method _upload_sign (line 206) | def _upload_sign(self):
method _upload_auth (line 222) | def _upload_auth(self):
method _upload_file (line 260) | def _upload_file(self):
method _upload_check (line 269) | def _upload_check(self):
method _upload_commit (line 278) | def _upload_commit(self):
function sign (line 286) | def sign(key: bytes, msg: str) -> bytes:
function get_signature_key (line 291) | def get_signature_key(
function aws_signature (line 302) | def aws_signature(
FILE: app/core/asr/status.py
class ASRStatus (line 5) | class ASRStatus(Enum):
method message (line 38) | def message(self) -> str:
method progress (line 43) | def progress(self) -> int:
method with_progress (line 47) | def with_progress(self, progress: int) -> Tuple[int, str]:
method callback_tuple (line 58) | def callback_tuple(self) -> Tuple[int, str]:
FILE: app/core/asr/transcribe.py
function transcribe (line 11) | def transcribe(audio_path: str, config: TranscribeConfig, callback=None)...
function _create_asr_instance (line 45) | def _create_asr_instance(audio_path: str, config: TranscribeConfig) -> C...
function _create_jianying_asr (line 76) | def _create_jianying_asr(audio_path: str, config: TranscribeConfig) -> C...
function _create_bijian_asr (line 87) | def _create_bijian_asr(audio_path: str, config: TranscribeConfig) -> Chu...
function _create_whisper_cpp_asr (line 96) | def _create_whisper_cpp_asr(audio_path: str, config: TranscribeConfig) -...
function _create_whisper_api_asr (line 113) | def _create_whisper_api_asr(audio_path: str, config: TranscribeConfig) -...
function _create_faster_whisper_asr (line 129) | def _create_faster_whisper_asr(audio_path: str, config: TranscribeConfig...
function progress_callback (line 175) | def progress_callback(progress: int, message: str):
FILE: app/core/asr/whisper_api.py
class WhisperAPI (line 14) | class WhisperAPI(BaseASR):
method __init__ (line 20) | def __init__(
method _run (line 58) | def _run(
method _make_segments (line 64) | def _make_segments(self, resp_data: dict) -> List[ASRDataSeg]:
method _get_key (line 85) | def _get_key(self) -> str:
method _submit (line 89) | def _submit(self) -> dict:
FILE: app/core/asr/whisper_cpp.py
class WhisperCppASR (line 21) | class WhisperCppASR(BaseASR):
method __init__ (line 27) | def __init__(
method _make_segments (line 68) | def _make_segments(self, resp_data: str) -> List[ASRDataSeg]:
method _build_command (line 84) | def _build_command(
method _run (line 114) | def _run(
method _get_key (line 237) | def _get_key(self):
method get_audio_duration (line 240) | def get_audio_duration(self, filepath: str) -> int:
function detect_whisper_executable (line 263) | def detect_whisper_executable() -> str:
FILE: app/core/entities.py
function _generate_task_id (line 11) | def _generate_task_id() -> str:
class SubtitleProcessData (line 17) | class SubtitleProcessData:
class SupportedAudioFormats (line 26) | class SupportedAudioFormats(Enum):
class SupportedVideoFormats (line 48) | class SupportedVideoFormats(Enum):
class SupportedSubtitleFormats (line 76) | class SupportedSubtitleFormats(Enum):
class OutputSubtitleFormatEnum (line 84) | class OutputSubtitleFormatEnum(Enum):
class TranscribeOutputFormatEnum (line 94) | class TranscribeOutputFormatEnum(Enum):
class LLMServiceEnum (line 104) | class LLMServiceEnum(Enum):
class TranscribeModelEnum (line 116) | class TranscribeModelEnum(Enum):
class TranslatorServiceEnum (line 126) | class TranslatorServiceEnum(Enum):
class VadMethodEnum (line 135) | class VadMethodEnum(Enum):
class SubtitleLayoutEnum (line 155) | class SubtitleLayoutEnum(Enum):
class SubtitleRenderModeEnum (line 164) | class SubtitleRenderModeEnum(Enum):
class VideoQualityEnum (line 171) | class VideoQualityEnum(Enum):
method get_crf (line 179) | def get_crf(self) -> int:
method get_preset (line 189) | def get_preset(
class TranscribeLanguageEnum (line 225) | class TranscribeLanguageEnum(Enum):
class WhisperModelEnum (line 332) | class WhisperModelEnum(Enum):
class FasterWhisperModelEnum (line 341) | class FasterWhisperModelEnum(Enum):
class ASRLanguageCapability (line 469) | class ASRLanguageCapability:
function _get_all_languages_except_auto (line 476) | def _get_all_languages_except_auto() -> list[TranscribeLanguageEnum]:
function get_asr_language_capability (line 511) | def get_asr_language_capability(model: TranscribeModelEnum) -> ASRLangua...
class AudioStreamInfo (line 523) | class AudioStreamInfo:
class VideoInfo (line 533) | class VideoInfo:
class TranscribeConfig (line 551) | class TranscribeConfig:
method _mask_key (line 577) | def _mask_key(self, key: Optional[str]) -> str:
method print_config (line 583) | def print_config(self) -> str:
class SubtitleConfig (line 625) | class SubtitleConfig:
method _mask_key (line 649) | def _mask_key(self, key: Optional[str]) -> str:
method print_config (line 655) | def print_config(self) -> str:
class SynthesisConfig (line 694) | class SynthesisConfig:
method print_config (line 706) | def print_config(self) -> str:
class TranscribeTask (line 722) | class TranscribeTask:
class SubtitleTask (line 748) | class SubtitleTask:
class SynthesisTask (line 773) | class SynthesisTask:
class TranscriptAndSubtitleTask (line 797) | class TranscriptAndSubtitleTask:
class FullProcessTask (line 818) | class FullProcessTask:
class BatchTaskType (line 838) | class BatchTaskType(Enum):
method __str__ (line 846) | def __str__(self):
class BatchTaskStatus (line 850) | class BatchTaskStatus(Enum):
method __str__ (line 858) | def __str__(self):
FILE: app/core/llm/check_llm.py
function check_llm_connection (line 10) | def check_llm_connection(
function get_available_models (line 54) | def get_available_models(base_url: str, api_key: str) -> list[str]:
FILE: app/core/llm/check_whisper.py
function check_whisper_connection (line 14) | def check_whisper_connection(
FILE: app/core/llm/client.py
function normalize_base_url (line 29) | def normalize_base_url(base_url: str) -> str:
function get_llm_client (line 52) | def get_llm_client() -> OpenAI:
function before_sleep_log (line 77) | def before_sleep_log(retry_state: RetryCallState) -> None:
function _call_llm_api (line 89) | def _call_llm_api(
function call_llm (line 112) | def call_llm(
FILE: app/core/llm/context.py
class TaskContext (line 13) | class TaskContext:
function generate_task_id (line 25) | def generate_task_id() -> str:
function set_task_context (line 30) | def set_task_context(task_id: str, file_name: str, stage: str) -> None:
function get_task_context (line 37) | def get_task_context() -> Optional[TaskContext]:
function update_stage (line 43) | def update_stage(stage: str) -> None:
function clear_task_context (line 55) | def clear_task_context() -> None:
FILE: app/core/llm/request_logger.py
function _rotate_if_needed (line 23) | def _rotate_if_needed() -> None:
function _write_log (line 36) | def _write_log(entry: Dict[str, Any]) -> None:
function _on_request (line 51) | def _on_request(request: httpx.Request) -> None:
function _on_response (line 68) | def _on_response(response: httpx.Response) -> None:
function create_logging_http_client (line 83) | def create_logging_http_client() -> httpx.Client:
function log_llm_response (line 93) | def log_llm_response(response: Any) -> None:
FILE: app/core/optimize/optimize.py
class SubtitleOptimizer (line 27) | class SubtitleOptimizer:
method __init__ (line 36) | def __init__(
method _init_thread_pool (line 64) | def _init_thread_pool(self) -> None:
method optimize_subtitle (line 69) | def optimize_subtitle(self, subtitle_data: Union[str, ASRData]) -> ASR...
method _split_chunks (line 105) | def _split_chunks(self, subtitle_dict: Dict[str, str]) -> List[Dict[st...
method _parallel_optimize (line 120) | def _parallel_optimize(self, chunks: List[Dict[str, str]]) -> Dict[str...
method _optimize_chunk (line 154) | def _optimize_chunk(self, subtitle_chunk: Dict[str, str]) -> Dict[str,...
method agent_loop (line 187) | def agent_loop(self, subtitle_chunk: Dict[str, str]) -> Dict[str, str]:
method _validate_optimization_result (line 273) | def _validate_optimization_result(
method _repair_subtitle (line 344) | def _repair_subtitle(
method _create_segments (line 383) | def _create_segments(
method stop (line 405) | def stop(self) -> None:
FILE: app/core/prompts/__init__.py
function _load_prompt_file (line 25) | def _load_prompt_file(prompt_path: str) -> str:
function get_prompt (line 48) | def get_prompt(prompt_path: str, **kwargs) -> str:
function list_prompts (line 75) | def list_prompts() -> list[str]:
function reload_cache (line 92) | def reload_cache():
FILE: app/core/split/alignment.py
class SubtitleAligner (line 4) | class SubtitleAligner:
method __init__ (line 22) | def __init__(self):
method align_texts (line 25) | def align_texts(self, source_text, target_text):
method _pair_lines (line 39) | def _pair_lines(self, diff_iterator):
method _line_iterator (line 74) | def _line_iterator(self, diff_iterator):
method _format_line (line 159) | def _format_line(self, lines, format_key, side):
FILE: app/core/split/split.py
function preprocess_segments (line 57) | def preprocess_segments(
class SubtitleSplitter (line 85) | class SubtitleSplitter:
method __init__ (line 91) | def __init__(
method _init_thread_pool (line 113) | def _init_thread_pool(self):
method split_subtitle (line 118) | def split_subtitle(self, subtitle_data: Union[str, ASRData]) -> ASRData:
method _determine_num_segments (line 169) | def _determine_num_segments(
method _split_asr_data (line 186) | def _split_asr_data(self, asr_data: ASRData, num_segments: int) -> Lis...
method _process_segments (line 248) | def _process_segments(self, asr_data_list: List[ASRData]) -> List[List...
method _process_single_segment (line 269) | def _process_single_segment(self, asr_data_part: ASRData) -> List[ASRD...
method _process_by_llm (line 279) | def _process_by_llm(self, segments: List[ASRDataSeg]) -> List[ASRDataS...
method _process_by_rules (line 300) | def _process_by_rules(self, segments: List[ASRDataSeg]) -> List[ASRDat...
method _group_by_time_gaps (line 343) | def _group_by_time_gaps(
method _split_by_common_words (line 397) | def _split_by_common_words(
method _split_long_segment (line 521) | def _split_long_segment(self, segments: List[ASRDataSeg]) -> List[ASRD...
method _merge_processed_segments (line 591) | def _merge_processed_segments(
method merge_short_segment (line 602) | def merge_short_segment(self, segments: List[ASRDataSeg]) -> None:
method _merge_segments_based_on_sentences (line 661) | def _merge_segments_based_on_sentences(
method stop (line 773) | def stop(self):
FILE: app/core/split/split_by_llm.py
function split_by_llm (line 16) | def split_by_llm(
function _split_with_agent_loop (line 42) | def _split_with_agent_loop(
function _validate_split_result (line 111) | def _validate_split_result(
FILE: app/core/subtitle/__init__.py
function get_subtitle_style (line 27) | def get_subtitle_style(style_name: str) -> Optional[str]:
FILE: app/core/subtitle/ass_renderer.py
function _check_cuda_available (line 36) | def _check_cuda_available() -> bool:
function _scale_ass_style (line 75) | def _scale_ass_style(style_str: str, scale_factor: float) -> str:
function render_ass_preview (line 110) | def render_ass_preview(
function _get_video_resolution (line 258) | def _get_video_resolution(video_path: str) -> Tuple[int, int]:
function render_ass_video (line 277) | def render_ass_video(
FILE: app/core/subtitle/ass_utils.py
class AssStyle (line 12) | class AssStyle:
class AssInfo (line 35) | class AssInfo:
method get_style (line 42) | def get_style(self, style_name: str) -> AssStyle:
function parse_ass_info (line 52) | def parse_ass_info(ass_content: str) -> AssInfo:
function wrap_ass_text (line 179) | def wrap_ass_text(
function auto_wrap_ass_file (line 222) | def auto_wrap_ass_file(
FILE: app/core/subtitle/font_utils.py
function _get_font_family_name (line 18) | def _get_font_family_name(font_path: Path, font_index: int = 0) -> Optio...
function get_builtin_fonts (line 53) | def get_builtin_fonts() -> tuple[Dict[str, str], ...]:
function get_font (line 74) | def get_font(size: int, font_name: str = "") -> FontType:
function get_ass_to_pil_ratio (line 121) | def get_ass_to_pil_ratio(font_name: str) -> float:
function clear_font_cache (line 165) | def clear_font_cache():
FILE: app/core/subtitle/rounded_renderer.py
function _get_video_info (line 26) | def _get_video_info(video_path: str) -> Tuple[int, int, float]:
function render_text_block (line 53) | def render_text_block(
function render_subtitle_image (line 133) | def render_subtitle_image(
function render_preview (line 200) | def render_preview(
function render_rounded_video (line 268) | def render_rounded_video(
FILE: app/core/subtitle/styles.py
class RoundedBgStyle (line 9) | class RoundedBgStyle:
FILE: app/core/subtitle/text_utils.py
function is_mainly_cjk (line 12) | def is_mainly_cjk(text: str, threshold: float = 0.5) -> bool:
function hex_to_rgba (line 23) | def hex_to_rgba(hex_color: str) -> Tuple[int, int, int, int]:
function _calculate_text_width (line 44) | def _calculate_text_width(text: str, font: FontType, spacing: float) -> ...
function wrap_text (line 65) | def wrap_text(
function _wrap_cjk_balanced (line 98) | def _wrap_cjk_balanced(
function _wrap_english_balanced (line 172) | def _wrap_english_balanced(
FILE: app/core/task_factory.py
class TaskFactory (line 21) | class TaskFactory:
method get_ass_style (line 25) | def get_ass_style(style_name: str) -> str:
method get_rounded_style (line 33) | def get_rounded_style() -> dict:
method create_transcribe_task (line 49) | def create_transcribe_task(
method create_subtitle_task (line 108) | def create_subtitle_task(
method create_synthesis_task (line 207) | def create_synthesis_task(
method create_transcript_and_subtitle_task (line 248) | def create_transcript_and_subtitle_task(
method create_full_process_task (line 267) | def create_full_process_task(
FILE: app/core/translate/base.py
class BaseTranslator (line 17) | class BaseTranslator(ABC):
method __init__ (line 20) | def __init__(
method _init_thread_pool (line 37) | def _init_thread_pool(self):
method translate_subtitle (line 42) | def translate_subtitle(self, subtitle_data: ASRData) -> ASRData:
method _split_chunks (line 69) | def _split_chunks(
method _parallel_translate (line 78) | def _parallel_translate(
method _get_cache_key (line 101) | def _get_cache_key(self, chunk: List[SubtitleProcessData]) -> str:
method _safe_translate_chunk (line 108) | def _safe_translate_chunk(
method _set_segments_translated_text (line 131) | def _set_segments_translated_text(
method _translate_chunk (line 147) | def _translate_chunk(
method stop (line 153) | def stop(self):
FILE: app/core/translate/bing_translator.py
class BingTranslator (line 13) | class BingTranslator(BaseTranslator):
method __init__ (line 16) | def __init__(
method _init_session (line 41) | def _init_session(self):
method _translate_chunk (line 52) | def _translate_chunk(
method _get_cache_key (line 100) | def _get_cache_key(self, chunk: List[SubtitleProcessData]) -> str:
FILE: app/core/translate/deeplx_translator.py
class DeepLXTranslator (line 13) | class DeepLXTranslator(BaseTranslator):
method __init__ (line 16) | def __init__(
method _translate_chunk (line 34) | def _translate_chunk(
method _get_cache_key (line 58) | def _get_cache_key(self, chunk: List[SubtitleProcessData]) -> str:
FILE: app/core/translate/factory.py
class TranslatorFactory (line 16) | class TranslatorFactory:
method create_translator (line 20) | def create_translator(
FILE: app/core/translate/google_translator.py
class GoogleTranslator (line 15) | class GoogleTranslator(BaseTranslator):
method __init__ (line 18) | def __init__(
method _translate_chunk (line 39) | def _translate_chunk(
method _get_cache_key (line 73) | def _get_cache_key(self, chunk: List[SubtitleProcessData]) -> str:
FILE: app/core/translate/llm_translator.py
class LLMTranslator (line 16) | class LLMTranslator(BaseTranslator):
method __init__ (line 21) | def __init__(
method _translate_chunk (line 42) | def _translate_chunk(
method _agent_loop (line 96) | def _agent_loop(
method _validate_llm_response (line 133) | def _validate_llm_response(
method _translate_chunk_single (line 187) | def _translate_chunk_single(
method _get_cache_key (line 212) | def _get_cache_key(self, chunk: List[SubtitleProcessData]) -> str:
FILE: app/core/translate/types.py
class TranslatorType (line 6) | class TranslatorType(Enum):
class TargetLanguage (line 15) | class TargetLanguage(Enum):
function get_language_code (line 201) | def get_language_code(target_language: TargetLanguage, translator_type: ...
FILE: app/core/tts/base.py
class BaseTTS (line 16) | class BaseTTS(ABC):
method __init__ (line 25) | def __init__(self, config: TTSConfig):
method synthesize (line 34) | def synthesize(
method _synthesize_segment (line 91) | def _synthesize_segment(self, segment: TTSDataSeg, output_path: str) -...
method _synthesize (line 130) | def _synthesize(self, segment: TTSDataSeg, output_path: str) -> None:
method _generate_cache_key_for_segment (line 139) | def _generate_cache_key_for_segment(self, segment: TTSDataSeg) -> str:
method _generate_filename (line 167) | def _generate_filename(self, text: str, index: int) -> str:
FILE: app/core/tts/openai_fm.py
class OpenAIFmTTS (line 18) | class OpenAIFmTTS(BaseTTS):
method __init__ (line 47) | def __init__(self, config: TTSConfig):
method _synthesize (line 61) | def _synthesize(self, segment: TTSDataSeg, output_path: str) -> None:
method _build_prompt (line 103) | def _build_prompt(self) -> str:
method get_available_voices (line 117) | def get_available_voices():
method get_prompt_templates (line 126) | def get_prompt_templates():
FILE: app/core/tts/openai_tts.py
class OpenAITTS (line 12) | class OpenAITTS(BaseTTS):
method __init__ (line 18) | def __init__(self, config: TTSConfig):
method _synthesize (line 34) | def _synthesize(self, segment: TTSDataSeg, output_path: str) -> None:
FILE: app/core/tts/siliconflow.py
class VoiceCloneManager (line 16) | class VoiceCloneManager:
method __init__ (line 19) | def __init__(self, api_key: str, base_url: str):
method upload_voice (line 30) | def upload_voice(
method _generate_cache_key (line 97) | def _generate_cache_key(self, audio_path: str, text: str, model: str) ...
class SiliconFlowTTS (line 106) | class SiliconFlowTTS(BaseTTS):
method __init__ (line 112) | def __init__(self, config: TTSConfig):
method _synthesize (line 125) | def _synthesize(self, segment: TTSDataSeg, output_path: str) -> None:
FILE: app/core/tts/status.py
class TTSStatus (line 5) | class TTSStatus(Enum):
method message (line 26) | def message(self) -> str:
method progress (line 31) | def progress(self) -> int:
method with_progress (line 35) | def with_progress(self, progress: int) -> Tuple[int, str]:
method callback_tuple (line 46) | def callback_tuple(self) -> Tuple[int, str]:
FILE: app/core/tts/tts_data.py
class TTSConfig (line 8) | class TTSConfig:
class TTSDataSeg (line 32) | class TTSDataSeg:
method __str__ (line 47) | def __str__(self) -> str:
class TTSData (line 51) | class TTSData:
method __init__ (line 54) | def __init__(self, segments: Optional[List[TTSDataSeg]] = None):
method __iter__ (line 67) | def __iter__(self):
method __len__ (line 71) | def __len__(self) -> int:
method from_texts (line 76) | def from_texts(
FILE: app/core/utils/cache.py
function enable_cache (line 21) | def enable_cache() -> None:
function disable_cache (line 27) | def disable_cache() -> None:
function is_cache_enabled (line 33) | def is_cache_enabled() -> bool:
function get_llm_cache (line 46) | def get_llm_cache() -> Cache:
function get_asr_cache (line 51) | def get_asr_cache() -> Cache:
function get_translate_cache (line 56) | def get_translate_cache() -> Cache:
function get_tts_cache (line 61) | def get_tts_cache() -> Cache:
function get_version_state_cache (line 66) | def get_version_state_cache() -> Cache:
function memoize (line 71) | def memoize(cache_instance: Cache, **kwargs):
function generate_cache_key (line 107) | def generate_cache_key(data: Any) -> str:
FILE: app/core/utils/logger.py
function setup_logger (line 8) | def setup_logger(
FILE: app/core/utils/platform_utils.py
function open_folder (line 15) | def open_folder(path):
function open_file (line 41) | def open_file(path):
function get_subprocess_kwargs (line 67) | def get_subprocess_kwargs():
function is_macos (line 84) | def is_macos() -> bool:
function is_windows (line 94) | def is_windows() -> bool:
function is_linux (line 104) | def is_linux() -> bool:
function get_available_transcribe_models (line 114) | def get_available_transcribe_models() -> list[TranscribeModelEnum]:
function is_model_available (line 134) | def is_model_available(model: TranscribeModelEnum) -> bool:
FILE: app/core/utils/subprocess_helper.py
class StreamReader (line 13) | class StreamReader:
method __init__ (line 16) | def __init__(self, process: subprocess.Popen):
method start_reading (line 27) | def start_reading(self) -> None:
method _read_stream (line 49) | def _read_stream(self, stream, stream_name: str) -> None:
method get_output (line 60) | def get_output(self, timeout: float = 0.1) -> Optional[Tuple[str, str]]:
method get_remaining_output (line 75) | def get_remaining_output(self) -> list:
method is_empty (line 85) | def is_empty(self) -> bool:
function run_process_with_stream_reader (line 90) | def run_process_with_stream_reader(
FILE: app/core/utils/text_utils.py
function is_pure_punctuation (line 21) | def is_pure_punctuation(text: str) -> bool:
function is_mainly_cjk (line 26) | def is_mainly_cjk(text: str, threshold: float = 0.5) -> bool:
function is_space_separated_language (line 47) | def is_space_separated_language(text: str) -> bool:
function count_words (line 72) | def count_words(text: str) -> int:
FILE: app/core/utils/video_utils.py
function temporary_subtitle_file (line 41) | def temporary_subtitle_file(subtitle_path: str):
function video2audio (line 67) | def video2audio(input_file: str, output: str = "", audio_track_index: in...
function check_cuda_available (line 131) | def check_cuda_available() -> bool:
function add_subtitles (line 174) | def add_subtitles(
function get_video_info (line 373) | def get_video_info(
function _extract_thumbnail (line 481) | def _extract_thumbnail(video_path: str, seek_time: float, thumbnail_path...
function add_subtitles_with_style (line 529) | def add_subtitles_with_style(
FILE: app/thread/batch_process_thread.py
class BatchTask (line 22) | class BatchTask:
method __init__ (line 23) | def __init__(self, file_path: str, task_type: BatchTaskType):
class BatchProcessThread (line 32) | class BatchProcessThread(QThread):
method __init__ (line 38) | def __init__(self):
method add_task (line 47) | def add_task(self, task: BatchTask):
method run (line 54) | def run(self):
method _process_task (line 73) | def _process_task(self, batch_task: BatchTask):
method _on_progress_wrapper (line 95) | def _on_progress_wrapper(self, batch_task: BatchTask, progress: int, m...
method _on_error_wrapper (line 99) | def _on_error_wrapper(self, batch_task: BatchTask, error: str):
method _on_finished_wrapper (line 105) | def _on_finished_wrapper(self, batch_task: BatchTask, task=None):
method _handle_transcribe_task (line 113) | def _handle_transcribe_task(self, batch_task: BatchTask):
method _handle_subtitle_task (line 134) | def _handle_subtitle_task(self, batch_task: BatchTask):
method _handle_trans_sub_task (line 156) | def _handle_trans_sub_task(self, batch_task: BatchTask):
method _on_trans_sub_progress_wrapper (line 177) | def _on_trans_sub_progress_wrapper(
method _on_trans_sub_finished_wrapper (line 184) | def _on_trans_sub_finished_wrapper(
method _on_trans_sub_subtitle_progress_wrapper (line 212) | def _on_trans_sub_subtitle_progress_wrapper(
method _handle_full_process_task (line 219) | def _handle_full_process_task(self, batch_task: BatchTask):
method on_full_process_progress (line 236) | def on_full_process_progress(
method on_full_process_finished (line 244) | def on_full_process_finished(self, batch_task: BatchTask, task: Transc...
method on_full_process_subtitle_progress (line 273) | def on_full_process_subtitle_progress(
method on_full_process_subtitle_finished (line 281) | def on_full_process_subtitle_finished(
method on_full_process_synthesis_progress (line 304) | def on_full_process_synthesis_progress(
method stop_task (line 312) | def stop_task(self, file_path: str):
method stop_all (line 323) | def stop_all(self):
FILE: app/thread/file_download_thread.py
class BaseDownloader (line 16) | class BaseDownloader(ABC):
method __init__ (line 19) | def __init__(self, url: str, save_path: Path, progress_callback):
method download (line 26) | def download(self) -> bool:
method cancel (line 30) | def cancel(self):
class Aria2Downloader (line 35) | class Aria2Downloader(BaseDownloader):
method __init__ (line 38) | def __init__(self, url: str, save_path: Path, progress_callback):
method is_available (line 43) | def is_available() -> bool:
method download (line 47) | def download(self) -> bool:
method _parse_progress (line 103) | def _parse_progress(self, line: str):
method cancel (line 124) | def cancel(self):
class RequestsDownloader (line 131) | class RequestsDownloader(BaseDownloader):
method download (line 136) | def download(self) -> bool:
method _format_size (line 174) | def _format_size(bytes_size: int) -> str:
class FileDownloadThread (line 184) | class FileDownloadThread(QThread):
method __init__ (line 191) | def __init__(self, url: str, save_path: str):
method run (line 197) | def run(self):
method _on_progress (line 223) | def _on_progress(self, percent: float, status: str):
method stop (line 227) | def stop(self):
FILE: app/thread/modelscope_download_thread.py
class SuppressOutput (line 11) | class SuppressOutput:
method __enter__ (line 14) | def __enter__(self):
method __exit__ (line 26) | def __exit__(self, *args):
function create_progress_callback_class (line 33) | def create_progress_callback_class(
class ModelscopeDownloadThread (line 55) | class ModelscopeDownloadThread(QThread):
method __init__ (line 59) | def __init__(self, model_id: str, save_path: str):
method run (line 64) | def run(self):
function on_progress (line 93) | def on_progress(percentage, message):
function on_error (line 96) | def on_error(error_msg):
function on_finished (line 100) | def on_finished():
FILE: app/thread/subtitle_pipeline_thread.py
class SubtitlePipelineThread (line 20) | class SubtitlePipelineThread(QThread):
method __init__ (line 31) | def __init__(self, task: FullProcessTask):
method run (line 36) | def run(self):
FILE: app/thread/subtitle_thread.py
class SubtitleThread (line 31) | class SubtitleThread(QThread):
method __init__ (line 38) | def __init__(self, task: SubtitleTask):
method set_custom_prompt_text (line 46) | def set_custom_prompt_text(self, text: str):
method _setup_llm_config (line 49) | def _setup_llm_config(self) -> Optional[SubtitleConfig]:
method run (line 72) | def run(self):
method need_llm (line 250) | def need_llm(self, subtitle_config: SubtitleConfig, asr_data: ASRData):
method callback (line 265) | def callback(self, result: List[SubtitleProcessData]):
method stop (line 277) | def stop(self):
FILE: app/thread/transcript_thread.py
class TranscriptThread (line 15) | class TranscriptThread(QThread):
method __init__ (line 20) | def __init__(self, task: TranscribeTask):
method run (line 24) | def run(self):
method _validate_task (line 42) | def _validate_task(self):
method _check_downloaded_subtitle (line 58) | def _check_downloaded_subtitle(self) -> bool:
method _perform_transcription (line 78) | def _perform_transcription(self):
method progress_callback (line 147) | def progress_callback(self, value, message):
FILE: app/thread/version_checker_thread.py
class VersionChecker (line 15) | class VersionChecker(QObject):
method __init__ (line 22) | def __init__(self):
method get_latest_version_info (line 33) | def get_latest_version_info(self) -> dict:
method has_new_version (line 67) | def has_new_version(self) -> bool:
method check_announcement (line 95) | def check_announcement(self) -> None:
method check_new_version_announcement (line 132) | def check_new_version_announcement(self) -> None:
method perform_check (line 148) | def perform_check(self) -> None:
FILE: app/thread/video_download_thread.py
class VideoDownloadThread (line 15) | class VideoDownloadThread(QThread):
method __init__ (line 24) | def __init__(self, url: str, work_dir: str):
method run (line 29) | def run(self):
method progress_hook (line 39) | def progress_hook(self, d):
method sanitize_filename (line 59) | def sanitize_filename(self, name: str, replacement: str = "_") -> str:
method download (line 115) | def download(self, need_subtitle: bool = True, need_thumbnail: bool = ...
FILE: app/thread/video_info_thread.py
class VideoInfoThread (line 13) | class VideoInfoThread(QThread):
method __init__ (line 17) | def __init__(self, file_path):
method run (line 21) | def run(self):
FILE: app/thread/video_synthesis_thread.py
class VideoSynthesisThread (line 15) | class VideoSynthesisThread(QThread):
method __init__ (line 20) | def __init__(self, task: SynthesisTask):
method run (line 25) | def run(self):
method progress_callback (line 108) | def progress_callback(self, value, message):
FILE: app/view/batch_process_interface.py
class BatchProcessInterface (line 47) | class BatchProcessInterface(QWidget):
method __init__ (line 48) | def __init__(self, parent=None):
method init_ui (line 58) | def init_ui(self):
method setup_connections (line 133) | def setup_connections(self):
method on_add_file_clicked (line 143) | def on_add_file_clicked(self):
method dragEnterEvent (line 165) | def dragEnterEvent(self, event):
method dropEvent (line 171) | def dropEvent(self, event):
method add_files (line 175) | def add_files(self, file_paths):
method filter_files (line 248) | def filter_files(self, file_paths, task_type: BatchTaskType):
method add_task_to_table (line 269) | def add_task_to_table(self, file_path):
method show_context_menu (line 294) | def show_context_menu(self, pos):
method open_output_folder (line 321) | def open_output_folder(self, file_path: str):
method update_task_progress (line 336) | def update_task_progress(self, file_path: str, progress: int, status: ...
method on_task_error (line 346) | def on_task_error(self, file_path: str, error: str):
method on_task_completed (line 354) | def on_task_completed(self, file_path: str):
method start_all_tasks (line 361) | def start_all_tasks(self):
method start_task (line 406) | def start_task(self, file_path: str):
method cancel_task (line 422) | def cancel_task(self, file_path: str):
method clear_tasks (line 430) | def clear_tasks(self):
method on_task_type_changed (line 434) | def on_task_type_changed(self, task_type: str):
method closeEvent (line 448) | def closeEvent(self, event):
method on_table_double_clicked (line 452) | def on_table_double_clicked(self, index):
FILE: app/view/home_interface.py
class HomeInterface (line 14) | class HomeInterface(QWidget):
method __init__ (line 15) | def __init__(self, parent=None):
method switch_to_transcription (line 73) | def switch_to_transcription(self, file_path):
method switch_to_subtitle_optimization (line 85) | def switch_to_subtitle_optimization(self, file_path, video_path):
method switch_to_video_synthesis (line 95) | def switch_to_video_synthesis(self, video_path, subtitle_path):
method addSubInterface (line 106) | def addSubInterface(self, widget, objectName, text):
method onCurrentIndexChanged (line 116) | def onCurrentIndexChanged(self, index):
method closeEvent (line 122) | def closeEvent(self, event):
FILE: app/view/llm_logs_interface.py
class LogDetailDialog (line 38) | class LogDetailDialog(MessageBoxBase):
method __init__ (line 41) | def __init__(self, log_entry: Dict[str, Any], parent=None):
method _setup_ui (line 46) | def _setup_ui(self):
method _copy_request (line 121) | def _copy_request(self):
method _copy_response (line 136) | def _copy_response(self):
class LLMLogsInterface (line 152) | class LLMLogsInterface(QWidget):
method __init__ (line 155) | def __init__(self, parent=None):
method _setup_ui (line 169) | def _setup_ui(self):
method _setup_toolbar (line 178) | def _setup_toolbar(self):
method _setup_table (line 197) | def _setup_table(self):
method _setup_footer (line 244) | def _setup_footer(self):
method _connect_signals (line 274) | def _connect_signals(self):
method _setup_file_watcher (line 282) | def _setup_file_watcher(self):
method _on_file_changed (line 292) | def _on_file_changed(self, path: str):
method _on_dir_changed (line 299) | def _on_dir_changed(self, path: str):
method _on_refresh_clicked (line 305) | def _on_refresh_clicked(self):
method _load_logs (line 316) | def _load_logs(self):
method _filter_logs (line 346) | def _filter_logs(self):
method _update_table (line 375) | def _update_table(self):
method _create_item (line 428) | def _create_item(self, text: str, align_left: bool = False) -> QTableW...
method _show_detail (line 437) | def _show_detail(self, index):
method _prev_page (line 444) | def _prev_page(self):
method _next_page (line 449) | def _next_page(self):
method _clear_logs (line 455) | def _clear_logs(self):
FILE: app/view/log_window.py
class LogWindow (line 13) | class LogWindow(QWidget):
method __init__ (line 14) | def __init__(self, parent=None):
method load_last_lines (line 79) | def load_last_lines(self, read_size):
method on_scroll_changed (line 123) | def on_scroll_changed(self, value):
method update_log (line 129) | def update_log(self):
method open_log_folder (line 158) | def open_log_folder(self):
FILE: app/view/main_window.py
class MainWindow (line 33) | class MainWindow(FluentWindow):
method __init__ (line 34) | def __init__(self):
method initNavigation (line 65) | def initNavigation(self):
method switchTo (line 93) | def switchTo(self, interface):
method initWindow (line 100) | def initWindow(self):
method onGithubDialog (line 122) | def onGithubDialog(self):
method onNewVersion (line 140) | def onNewVersion(self, version, update_required, update_info, download...
method onAnnouncement (line 168) | def onAnnouncement(self, content):
method resizeEvent (line 175) | def resizeEvent(self, e):
method closeEvent (line 180) | def closeEvent(self, event):
method stop (line 195) | def stop(self):
method _check_ffmpeg (line 201) | def _check_ffmpeg(self):
FILE: app/view/setting_interface.py
class SettingInterface (line 40) | class SettingInterface(ScrollArea):
method __init__ (line 43) | def __init__(self, parent=None):
method __initGroups (line 61) | def __initGroups(self):
method __initCards (line 84) | def __initCards(self):
method __createLLMServiceCards (line 261) | def __createLLMServiceCards(self):
method __createASRServiceCards (line 429) | def __createASRServiceCards(self):
method __createTranslateServiceCards (line 490) | def __createTranslateServiceCards(self):
method __initWidget (line 557) | def __initWidget(self):
method __initLayout (line 594) | def __initLayout(self):
method __connectSignalToSlot (line 627) | def __connectSignalToSlot(self):
method __showRestartTooltip (line 695) | def __showRestartTooltip(self):
method __onsavePathCardClicked (line 704) | def __onsavePathCardClicked(self):
method __onCacheEnabledChanged (line 712) | def __onCacheEnabledChanged(self, is_enabled: bool):
method checkLLMConnection (line 731) | def checkLLMConnection(self):
method onConnectionCheckError (line 773) | def onConnectionCheckError(self, message):
method onConnectionCheckFinished (line 784) | def onConnectionCheckFinished(self, is_success, message, models):
method checkUpdate (line 821) | def checkUpdate(self):
method __onLLMServiceChanged (line 824) | def __onLLMServiceChanged(self, service):
method __onTranslatorServiceChanged (line 863) | def __onTranslatorServiceChanged(self, service):
method __onTranscribeModelChanged (line 886) | def __onTranscribeModelChanged(self, model_name):
method checkWhisperConnection (line 905) | def checkWhisperConnection(self):
method onWhisperConnectionCheckFinished (line 960) | def onWhisperConnectionCheckFinished(self, success, result):
method onWhisperConnectionCheckError (line 981) | def onWhisperConnectionCheckError(self, message):
class WhisperConnectionThread (line 995) | class WhisperConnectionThread(QThread):
method __init__ (line 1001) | def __init__(self, base_url, api_key, model):
method run (line 1007) | def run(self):
class LLMConnectionThread (line 1018) | class LLMConnectionThread(QThread):
method __init__ (line 1022) | def __init__(self, api_base, api_key, model):
method run (line 1028) | def run(self):
FILE: app/view/subtitle_interface.py
class SubtitleTableModel (line 61) | class SubtitleTableModel(QAbstractTableModel):
method __init__ (line 62) | def __init__(self, data: Union[str, Dict[str, Any]] = ""):
method load_data (line 70) | def load_data(self, data: str):
method data (line 78) | def data(self, index: QModelIndex, role: int = Qt.DisplayRole) -> Any:...
method setData (line 111) | def setData(self, index: QModelIndex, value: Any, role: int = Qt.EditR...
method headerData (line 134) | def headerData(
method rowCount (line 158) | def rowCount(self, parent: Optional[QModelIndex] = None) -> int:
method columnCount (line 161) | def columnCount(self, parent: Optional[QModelIndex] = None) -> int:
method flags (line 164) | def flags(self, index: QModelIndex) -> Qt.ItemFlags:
method update_data (line 171) | def update_data(self, new_data: Dict[str, str]) -> None:
method update_all (line 190) | def update_all(self, data: Dict[str, Any]) -> None:
class SubtitleInterface (line 196) | class SubtitleInterface(QWidget):
method __init__ (line 199) | def __init__(self, parent: Optional[QWidget] = None):
method _init_ui (line 211) | def _init_ui(self):
method set_values (line 220) | def set_values(self):
method _setup_top_layout (line 229) | def _setup_top_layout(self):
method _setup_subtitle_table (line 352) | def _setup_subtitle_table(self):
method _setup_bottom_layout (line 386) | def _setup_bottom_layout(self):
method _setup_signals (line 403) | def _setup_signals(self) -> None:
method show_prompt_dialog (line 415) | def show_prompt_dialog(self) -> None:
method _update_prompt_button_style (line 421) | def _update_prompt_button_style(self) -> None:
method set_task (line 430) | def set_task(self, task: SubtitleTask) -> None:
method update_info (line 439) | def update_info(self, task: SubtitleTask) -> None:
method start_subtitle_optimization (line 449) | def start_subtitle_optimization(self, need_create_task: bool = True) -...
method process (line 490) | def process(self) -> None:
method on_subtitle_optimization_finished (line 495) | def on_subtitle_optimization_finished(
method on_subtitle_optimization_error (line 511) | def on_subtitle_optimization_error(self, error: str) -> None:
method on_subtitle_optimization_progress (line 522) | def on_subtitle_optimization_progress(self, value: int, status: str) -...
method update_data (line 526) | def update_data(self, data):
method update_all (line 529) | def update_all(self, data):
method remove_widget (line 532) | def remove_widget(self) -> None:
method on_file_select (line 542) | def on_file_select(self) -> None:
method on_save_format_clicked (line 556) | def on_save_format_clicked(self, format: str) -> None:
method on_open_folder_clicked (line 602) | def on_open_folder_clicked(self) -> None:
method load_subtitle_file (line 625) | def load_subtitle_file(self, file_path: str) -> None:
method dragEnterEvent (line 632) | def dragEnterEvent(self, event: QDragEnterEvent) -> None:
method dropEvent (line 635) | def dropEvent(self, event: QDropEvent) -> None:
method closeEvent (line 666) | def closeEvent(self, event: QCloseEvent) -> None:
method show_subtitle_settings (line 671) | def show_subtitle_settings(self) -> None:
method show_video_player (line 676) | def show_video_player(self) -> None:
method on_subtitle_clicked (line 717) | def on_subtitle_clicked(self, index: QModelIndex) -> None:
method show_context_menu (line 728) | def show_context_menu(self, pos) -> None:
method merge_selected_rows (line 760) | def merge_selected_rows(self, rows: List[int]) -> None:
method keyPressEvent (line 823) | def keyPressEvent(self, event: QKeyEvent) -> None:
method cancel_optimization (line 836) | def cancel_optimization(self) -> None:
method on_target_language_changed (line 852) | def on_target_language_changed(self, language: str) -> None:
method on_subtitle_optimization_changed (line 860) | def on_subtitle_optimization_changed(self, checked: bool) -> None:
method on_subtitle_translation_changed (line 865) | def on_subtitle_translation_changed(self, checked: bool) -> None:
method on_subtitle_layout_changed (line 872) | def on_subtitle_layout_changed(self, layout: str) -> None:
class PromptDialog (line 879) | class PromptDialog(MessageBoxBase):
method __init__ (line 880) | def __init__(self, parent: Optional[QWidget] = None):
method setup_ui (line 887) | def setup_ui(self) -> None:
method get_prompt (line 919) | def get_prompt(self) -> str:
method save_prompt (line 922) | def save_prompt(self) -> None:
FILE: app/view/subtitle_style_interface.py
class AssPreviewThread (line 62) | class AssPreviewThread(QThread):
method __init__ (line 67) | def __init__(
method run (line 82) | def run(self):
class RoundedBgPreviewThread (line 93) | class RoundedBgPreviewThread(QThread):
method __init__ (line 98) | def __init__(
method run (line 114) | def run(self):
class SubtitleStyleInterface (line 126) | class SubtitleStyleInterface(QWidget):
method __init__ (line 127) | def __init__(self, parent=None):
method _initSettingsArea (line 152) | def _initSettingsArea(self):
method _initPreviewArea (line 180) | def _initPreviewArea(self):
method _initSettingCards (line 230) | def _initSettingCards(self):
method _initLayout (line 455) | def _initLayout(self):
method _initStyle (line 506) | def _initStyle(self):
method __setValues (line 521) | def __setValues(self):
method connectSignals (line 599) | def connectSignals(self):
method on_open_style_folder_clicked (line 686) | def on_open_style_folder_clicked(self):
method on_subtitle_layout_changed (line 690) | def on_subtitle_layout_changed(self, layout: str):
method on_render_mode_changed_external (line 695) | def on_render_mode_changed_external(self, mode_text: str):
method onRenderModeChanged (line 706) | def onRenderModeChanged(self):
method onRoundedBgSettingChanged (line 719) | def onRoundedBgSettingChanged(self):
method _updateVisibleGroups (line 756) | def _updateVisibleGroups(self):
method _getStyleFileExtension (line 769) | def _getStyleFileExtension(self) -> str:
method _refreshStyleList (line 774) | def _refreshStyleList(self):
method _getCurrentRenderMode (line 811) | def _getCurrentRenderMode(self) -> SubtitleRenderModeEnum:
method _parseRgbaHex (line 816) | def _parseRgbaHex(self, hex_color: str) -> QColor:
method onOrientationChanged (line 829) | def onOrientationChanged(self):
method onAssSettingChanged (line 838) | def onAssSettingChanged(self):
method selectPreviewImage (line 850) | def selectPreviewImage(self):
method generateAssStyles (line 862) | def generateAssStyles(self) -> str:
method updatePreview (line 900) | def updatePreview(self):
method onPreviewReady (line 967) | def onPreviewReady(self, preview_path):
method updatePreviewImage (line 972) | def updatePreviewImage(self):
method resizeEvent (line 981) | def resizeEvent(self, event):
method showEvent (line 985) | def showEvent(self, event):
method loadStyle (line 990) | def loadStyle(self, style_name):
method _loadAssStyle (line 1020) | def _loadAssStyle(self, style_path: Path):
method _loadRoundedBgStyle (line 1088) | def _loadRoundedBgStyle(self, style_path: Path):
method createNewStyle (line 1114) | def createNewStyle(self):
method saveStyle (line 1153) | def saveStyle(self, style_name):
method _saveAssStyle (line 1166) | def _saveAssStyle(self, style_path: Path):
method _saveRoundedBgStyle (line 1172) | def _saveRoundedBgStyle(self, style_path: Path):
method dragEnterEvent (line 1192) | def dragEnterEvent(self, event):
method dropEvent (line 1203) | def dropEvent(self, event):
class StyleNameDialog (line 1226) | class StyleNameDialog(MessageBoxBase):
method __init__ (line 1229) | def __init__(self, parent=None):
method _validateInput (line 1249) | def _validateInput(self, text):
FILE: app/view/task_creation_interface.py
class TaskCreationInterface (line 46) | class TaskCreationInterface(QWidget):
method __init__ (line 53) | def __init__(self, parent=None):
method setup_ui (line 66) | def setup_ui(self):
method setup_logo (line 76) | def setup_logo(self):
method setup_search_layout (line 91) | def setup_search_layout(self):
method setup_status_layout (line 142) | def setup_status_layout(self):
method setup_info_label (line 158) | def setup_info_label(self):
method setup_signals (line 207) | def setup_signals(self):
method setup_values (line 213) | def setup_values(self):
method on_start_clicked (line 216) | def on_start_clicked(self):
method on_search_input_changed (line 237) | def on_search_input_changed(self):
method dragEnterEvent (line 243) | def dragEnterEvent(self, event):
method dropEvent (line 246) | def dropEvent(self, event):
method create_task (line 278) | def create_task(self):
method _is_valid_url (line 292) | def _is_valid_url(self, url):
method _process_file (line 299) | def _process_file(self, file_path):
method _process_url (line 302) | def _process_url(self, url):
method on_video_download_finished (line 327) | def on_video_download_finished(self, video_file_path):
method on_create_task_progress (line 346) | def on_create_task_progress(self, value, status):
method on_create_task_error (line 352) | def on_create_task_error(self, error):
method set_task (line 360) | def set_task(self, task):
method update_info (line 364) | def update_info(self):
method process (line 368) | def process(self):
method show_log_window (line 383) | def show_log_window(self):
method show_donate_dialog (line 392) | def show_donate_dialog(self):
FILE: app/view/transcription_interface.py
class VideoInfoCard (line 61) | class VideoInfoCard(CardWidget):
method __init__ (line 64) | def __init__(self, parent: Optional[QWidget] = None):
method setup_ui (line 73) | def setup_ui(self) -> None:
method setup_thumbnail (line 83) | def setup_thumbnail(self) -> None:
method setup_info_layout (line 98) | def setup_info_layout(self) -> None:
method create_pill_button (line 131) | def create_pill_button(self, text: str, width: int) -> PillPushButton:
method setup_button_layout (line 139) | def setup_button_layout(self) -> None:
method update_info (line 153) | def update_info(self, video_info: VideoInfo) -> None:
method update_audio_tracks (line 176) | def update_audio_tracks(self, video_info: VideoInfo) -> None:
method update_audio_track_button_text (line 216) | def update_audio_track_button_text(
method on_audio_track_selected (line 233) | def on_audio_track_selected(self, array_index: int, audio_streams: lis...
method update_thumbnail (line 243) | def update_thumbnail(self, thumbnail_path):
method setup_signals (line 255) | def setup_signals(self) -> None:
method on_start_button_clicked (line 259) | def on_start_button_clicked(self):
method on_open_folder_clicked (line 266) | def on_open_folder_clicked(self):
method start_transcription (line 284) | def start_transcription(self, need_create_task=True):
method on_transcript_progress (line 304) | def on_transcript_progress(self, value, message):
method on_transcript_error (line 309) | def on_transcript_error(self, error):
method on_transcript_finished (line 322) | def on_transcript_finished(self, task):
method reset_ui (line 329) | def reset_ui(self):
method set_task (line 336) | def set_task(self, task):
method stop (line 341) | def stop(self):
class TranscriptionInterface (line 346) | class TranscriptionInterface(QWidget):
method __init__ (line 351) | def __init__(self, parent: Optional[QWidget] = None):
method _init_ui (line 362) | def _init_ui(self) -> None:
method _setup_command_bar (line 378) | def _setup_command_bar(self):
method _setup_signals (line 430) | def _setup_signals(self) -> None:
method _show_output_settings (line 447) | def _show_output_settings(self):
method _set_value (line 452) | def _set_value(self) -> None:
method on_transcription_model_changed (line 458) | def on_transcription_model_changed(self, model_name: str):
method _on_transcript_finished (line 467) | def _on_transcript_finished(self, task: TranscribeTask):
method _on_file_select (line 481) | def _on_file_select(self):
method update_info (line 496) | def update_info(self, file_path):
method _on_video_info_error (line 503) | def _on_video_info_error(self, error_msg):
method set_task (line 513) | def set_task(self, task: TranscribeTask) -> None:
method process (line 519) | def process(self):
method dragEnterEvent (line 524) | def dragEnterEvent(self, event):
method dropEvent (line 528) | def dropEvent(self, event):
method closeEvent (line 569) | def closeEvent(self, event):
FILE: app/view/video_synthesis_interface.py
class VideoSynthesisInterface (line 47) | class VideoSynthesisInterface(QWidget):
method __init__ (line 50) | def __init__(self, parent=None):
method setup_ui (line 63) | def setup_ui(self):
method _setup_command_bar (line 133) | def _setup_command_bar(self):
method setup_style (line 214) | def setup_style(self):
method setup_signals (line 257) | def setup_signals(self):
method set_value (line 274) | def set_value(self):
method on_soft_subtitle_action_triggered (line 285) | def on_soft_subtitle_action_triggered(self, checked: bool):
method on_soft_subtitle_changed (line 312) | def on_soft_subtitle_changed(self, checked: bool):
method on_need_video_action_triggered (line 316) | def on_need_video_action_triggered(self, checked: bool):
method on_need_video_changed (line 339) | def on_need_video_changed(self, checked: bool):
method on_video_quality_action_changed (line 344) | def on_video_quality_action_changed(self, quality_text: str):
method on_video_quality_changed (line 359) | def on_video_quality_changed(self, quality_text: str):
method on_use_style_action_triggered (line 363) | def on_use_style_action_triggered(self, checked: bool):
method on_use_style_changed (line 389) | def on_use_style_changed(self, checked: bool):
method on_render_mode_changed (line 394) | def on_render_mode_changed(self, mode_text: str):
method on_render_mode_changed_external (line 406) | def on_render_mode_changed_external(self, mode_text: str):
method _update_synthesis_controls_state (line 410) | def _update_synthesis_controls_state(self):
method _update_style_controls_state (line 422) | def _update_style_controls_state(self):
method choose_subtitle_file (line 429) | def choose_subtitle_file(self):
method choose_video_file (line 442) | def choose_video_file(self):
method create_task (line 453) | def create_task(self):
method set_task (line 467) | def set_task(self, task: SynthesisTask):
method update_info (line 471) | def update_info(self):
method start_video_synthesis (line 476) | def start_video_synthesis(self, need_create_task=True):
method process (line 496) | def process(self):
method on_video_synthesis_finished (line 499) | def on_video_synthesis_finished(self, task):
method on_video_synthesis_progress (line 511) | def on_video_synthesis_progress(self, progress, message):
method on_video_synthesis_error (line 515) | def on_video_synthesis_error(self, error):
method open_video_folder (line 526) | def open_video_folder(self):
method dragEnterEvent (line 549) | def dragEnterEvent(self, event):
method dropEvent (line 553) | def dropEvent(self, event: QDropEvent):
FILE: docs/.vitepress/theme/index.ts
method enhanceApp (line 7) | enhanceApp({ app }) {
FILE: main.py
function exception_hook (line 45) | def exception_hook(exctype, value, tb):
function main (line 83) | def main():
FILE: scripts/translate_llm.py
class Translation (line 77) | class Translation(BaseModel):
class TranslationBatch (line 85) | class TranslationBatch(BaseModel):
function detect_target_language (line 111) | def detect_target_language(filename: str) -> str:
function translate_batch (line 133) | def translate_batch(
function translate_file (line 188) | def translate_file(ts_file: Path, target_lang: str) -> None:
function main (line 277) | def main():
FILE: tests/conftest.py
function sample_asr_data (line 50) | def sample_asr_data():
function sample_translate_data (line 77) | def sample_translate_data():
function target_language (line 95) | def target_language():
function check_env_vars (line 110) | def check_env_vars():
function expected_translations (line 136) | def expected_translations() -> Dict[str, Dict[str, List[str]]]:
function mock_llm_client (line 185) | def mock_llm_client(monkeypatch):
function assert_translation_quality (line 390) | def assert_translation_quality(
FILE: tests/test_asr/conftest.py
function test_audio_path (line 17) | def test_audio_path() -> Path:
function test_audio_path_zh (line 41) | def test_audio_path_zh() -> Path:
function test_audio_path_en (line 65) | def test_audio_path_en() -> Path:
function assert_asr_result_valid (line 88) | def assert_asr_result_valid(result, min_segments: int = 0) -> None:
FILE: tests/test_asr/test_asr_data.py
class TestASRDataSegEdgeCases (line 11) | class TestASRDataSegEdgeCases:
method test_zero_duration_segment (line 14) | def test_zero_duration_segment(self):
method test_negative_duration (line 21) | def test_negative_duration(self):
method test_very_long_timestamp (line 26) | def test_very_long_timestamp(self):
method test_unicode_text_extreme (line 32) | def test_unicode_text_extreme(self):
method test_empty_translation (line 39) | def test_empty_translation(self):
method test_multiline_text (line 45) | def test_multiline_text(self):
class TestASRDataEdgeCases (line 53) | class TestASRDataEdgeCases:
method test_mixed_empty_and_whitespace (line 56) | def test_mixed_empty_and_whitespace(self):
method test_overlapping_timestamps (line 69) | def test_overlapping_timestamps(self):
method test_unsorted_large_dataset (line 81) | def test_unsorted_large_dataset(self):
method test_duplicate_timestamps (line 93) | def test_duplicate_timestamps(self):
method test_single_segment (line 103) | def test_single_segment(self):
class TestWordTimestampEdgeCases (line 112) | class TestWordTimestampEdgeCases:
method test_exactly_80_percent_threshold (line 115) | def test_exactly_80_percent_threshold(self):
method test_79_percent_below_threshold (line 128) | def test_79_percent_below_threshold(self):
method test_mixed_cjk_latin_single_chars (line 142) | def test_mixed_cjk_latin_single_chars(self):
method test_three_char_cjk (line 153) | def test_three_char_cjk(self):
class TestSplitToWordsEdgeCases (line 160) | class TestSplitToWordsEdgeCases:
method test_split_empty_text (line 163) | def test_split_empty_text(self):
method test_split_only_punctuation (line 170) | def test_split_only_punctuation(self):
method test_split_very_long_word (line 177) | def test_split_very_long_word(self):
method test_split_mixed_scripts (line 186) | def test_split_mixed_scripts(self):
method test_split_numbers_and_words (line 199) | def test_split_numbers_and_words(self):
method test_split_thai_with_combining_chars (line 210) | def test_split_thai_with_combining_chars(self):
method test_split_zero_duration_distribution (line 218) | def test_split_zero_duration_distribution(self):
class TestMergeEdgeCases (line 228) | class TestMergeEdgeCases:
method test_merge_single_segment (line 231) | def test_merge_single_segment(self):
method test_merge_all_segments (line 239) | def test_merge_all_segments(self):
method test_merge_invalid_indices (line 248) | def test_merge_invalid_indices(self):
method test_merge_with_next_at_boundary (line 260) | def test_merge_with_next_at_boundary(self):
method test_merge_with_unicode (line 268) | def test_merge_with_unicode(self):
class TestOptimizeTimingEdgeCases (line 280) | class TestOptimizeTimingEdgeCases:
method test_optimize_negative_gap (line 283) | def test_optimize_negative_gap(self):
method test_optimize_exact_threshold (line 294) | def test_optimize_exact_threshold(self):
method test_optimize_word_level_no_change (line 306) | def test_optimize_word_level_no_change(self):
class TestRemovePunctuationEdgeCases (line 320) | class TestRemovePunctuationEdgeCases:
method test_remove_multiple_punctuation (line 323) | def test_remove_multiple_punctuation(self):
method test_remove_punctuation_only (line 330) | def test_remove_punctuation_only(self):
method test_remove_punctuation_middle (line 337) | def test_remove_punctuation_middle(self):
method test_remove_non_chinese_punctuation (line 344) | def test_remove_non_chinese_punctuation(self):
class TestFormatConversionEdgeCases (line 352) | class TestFormatConversionEdgeCases:
method test_srt_layout_modes_all (line 355) | def test_srt_layout_modes_all(self):
method test_srt_no_translation_all_layouts (line 375) | def test_srt_no_translation_all_layouts(self):
method test_json_large_dataset (line 384) | def test_json_large_dataset(self):
method test_txt_multiline_segments (line 395) | def test_txt_multiline_segments(self):
class TestFileIOEdgeCases (line 406) | class TestFileIOEdgeCases:
method test_save_unsupported_format (line 409) | def test_save_unsupported_format(self):
method test_load_nonexistent_file (line 423) | def test_load_nonexistent_file(self):
method test_save_load_unicode_path (line 428) | def test_save_load_unicode_path(self):
class TestParseEdgeCases (line 440) | class TestParseEdgeCases:
method test_parse_malformed_srt (line 443) | def test_parse_malformed_srt(self):
method test_parse_srt_missing_text (line 456) | def test_parse_srt_missing_text(self):
method test_parse_srt_97_percent_translation (line 469) | def test_parse_srt_97_percent_translation(self):
method test_parse_json_non_numeric_keys (line 487) | def test_parse_json_non_numeric_keys(self):
method test_parse_vtt_empty_blocks (line 500) | def test_parse_vtt_empty_blocks(self):
FILE: tests/test_asr/test_bcut_asr.py
class TestBcutASR (line 14) | class TestBcutASR:
method bcut_asr_sentence (line 22) | def bcut_asr_sentence(self, test_audio_path: Path) -> BcutASR:
method bcut_asr_word (line 37) | def bcut_asr_word(self, test_audio_path: Path) -> BcutASR:
method test_transcribe_parametrized (line 104) | def test_transcribe_parametrized(
FILE: tests/test_asr/test_chunk_merger.py
function create_sentence_segments (line 16) | def create_sentence_segments(sentences, start_time=0):
function create_word_level_segments (line 31) | def create_word_level_segments(words, start_time=0, is_chinese=True):
class TestSentenceLevelMerging (line 66) | class TestSentenceLevelMerging:
method merger (line 70) | def merger(self):
method test_chinese_podcast_perfect_overlap (line 73) | def test_chinese_podcast_perfect_overlap(self, merger):
method test_english_lecture_perfect_overlap (line 112) | def test_english_lecture_perfect_overlap(self, merger):
method test_no_overlap_sequential_chunks (line 142) | def test_no_overlap_sequential_chunks(self, merger):
method test_three_chunks_continuous_merge (line 159) | def test_three_chunks_continuous_merge(self, merger):
class TestASRErrorCases (line 198) | class TestASRErrorCases:
method merger (line 202) | def merger(self):
method test_homophone_error_chinese (line 205) | def test_homophone_error_chinese(self, merger):
method test_punctuation_difference_english (line 232) | def test_punctuation_difference_english(self, merger):
method test_partial_match_only_one_sentence (line 266) | def test_partial_match_only_one_sentence(self, merger):
method test_complete_mismatch_noise_in_overlap (line 292) | def test_complete_mismatch_noise_in_overlap(self, merger):
method test_filler_words_different_recognition (line 318) | def test_filler_words_different_recognition(self, merger):
class TestWordLevelMerging (line 358) | class TestWordLevelMerging:
method merger (line 362) | def merger(self):
method test_chinese_word_level_perfect_overlap (line 365) | def test_chinese_word_level_perfect_overlap(self, merger):
method test_english_word_level_perfect_overlap (line 391) | def test_english_word_level_perfect_overlap(self, merger):
method test_chinese_word_level_partial_match (line 415) | def test_chinese_word_level_partial_match(self, merger):
method test_english_word_level_capitalization_difference (line 442) | def test_english_word_level_capitalization_difference(self, merger):
class TestMixedLanguage (line 473) | class TestMixedLanguage:
method merger (line 477) | def merger(self):
method test_tech_talk_chinese_english_mixed (line 480) | def test_tech_talk_chinese_english_mixed(self, merger):
method test_product_name_mixed_word_level (line 508) | def test_product_name_mixed_word_level(self, merger):
class TestEdgeCases (line 541) | class TestEdgeCases:
method merger (line 545) | def merger(self):
method test_empty_chunk (line 548) | def test_empty_chunk(self, merger):
method test_single_word_segments (line 562) | def test_single_word_segments(self, merger):
method test_identical_chunks_100_percent_overlap (line 576) | def test_identical_chunks_100_percent_overlap(self, merger):
method test_very_long_overlap_90_percent (line 594) | def test_very_long_overlap_90_percent(self, merger):
class TestLongSequences (line 620) | class TestLongSequences:
method merger (line 624) | def merger(self):
method test_10_chunks_continuous_chinese (line 627) | def test_10_chunks_continuous_chinese(self, merger):
method test_very_long_text_word_level_english (line 666) | def test_very_long_text_word_level_english(self, merger):
class TestOutputFormat (line 696) | class TestOutputFormat:
method merger (line 700) | def merger(self):
method test_output_has_valid_timestamps (line 703) | def test_output_has_valid_timestamps(self, merger):
method test_can_save_to_srt (line 720) | def test_can_save_to_srt(self, merger, tmp_path):
class TestStrictMode (line 751) | class TestStrictMode:
method strict_merger (line 755) | def strict_merger(self):
method test_insufficient_overlap_fallback_to_time (line 758) | def test_insufficient_overlap_fallback_to_time(self, strict_merger):
method test_sufficient_overlap_merge_normally (line 779) | def test_sufficient_overlap_merge_normally(self, strict_merger):
FILE: tests/test_asr/test_chunked_asr.py
class MockASR (line 33) | class MockASR(BaseASR):
method __init__ (line 42) | def __init__(
method _run (line 55) | def _run(
method _make_segments (line 87) | def _make_segments(self, resp_data: dict) -> List[ASRDataSeg]:
function create_test_audio_file (line 99) | def create_test_audio_file(duration_sec: int = 60) -> str:
class TestChunkedASRBasics (line 124) | class TestChunkedASRBasics:
method test_init_default_params (line 127) | def test_init_default_params(self):
method test_init_custom_params (line 143) | def test_init_custom_params(self):
method test_short_audio_no_chunking (line 163) | def test_short_audio_no_chunking(self):
method test_long_audio_with_chunking (line 185) | def test_long_audio_with_chunking(self):
class TestAudioSplitting (line 215) | class TestAudioSplitting:
method test_split_exact_chunks (line 218) | def test_split_exact_chunks(self):
method test_split_with_overlap (line 238) | def test_split_with_overlap(self):
method test_split_remainder_chunk (line 262) | def test_split_remainder_chunk(self):
class TestConcurrentTranscription (line 289) | class TestConcurrentTranscription:
method test_concurrency_3_workers (line 292) | def test_concurrency_3_workers(self):
method test_independent_asr_instances (line 314) | def test_independent_asr_instances(self):
class TestChunkMerging (line 345) | class TestChunkMerging:
method test_merge_preserves_order (line 348) | def test_merge_preserves_order(self):
class TestEdgeCases (line 371) | class TestEdgeCases:
method test_very_short_audio (line 374) | def test_very_short_audio(self):
method test_zero_overlap (line 386) | def test_zero_overlap(self):
class TestErrorHandling (line 411) | class TestErrorHandling:
method test_asr_failure_propagates (line 414) | def test_asr_failure_propagates(self):
class TestProgressCallback (line 436) | class TestProgressCallback:
method test_callback_invoked (line 439) | def test_callback_invoked(self):
class TestIntegration (line 468) | class TestIntegration:
method test_full_pipeline_short_audio (line 471) | def test_full_pipeline_short_audio(self):
method test_full_pipeline_long_audio (line 491) | def test_full_pipeline_long_audio(self):
FILE: tests/test_asr/test_chunking.py
class MockASR (line 27) | class MockASR(BaseASR):
method __init__ (line 33) | def __init__(
method _run (line 56) | def _run(
method _make_segments (line 98) | def _make_segments(self, resp_data: dict) -> List[ASRDataSeg]:
method _get_subclass_params (line 109) | def _get_subclass_params(self) -> dict:
function create_test_audio (line 122) | def create_test_audio(duration_ms: int, frequency: int = 440) -> bytes:
function create_test_audio_file (line 141) | def create_test_audio_file(duration_sec: int) -> str:
class TestAudioSplitting (line 168) | class TestAudioSplitting:
method test_split_long_audio_into_chunks (line 171) | def test_split_long_audio_into_chunks(self):
method test_split_short_audio_no_chunks (line 205) | def test_split_short_audio_no_chunks(self):
method test_split_exact_chunk_length (line 230) | def test_split_exact_chunk_length(self):
method test_split_with_zero_overlap (line 251) | def test_split_with_zero_overlap(self):
FILE: tests/test_asr/test_jianying_asr.py
class TestJianYingASR (line 14) | class TestJianYingASR:
method test_transcribe_parametrized (line 106) | def test_transcribe_parametrized(
FILE: tests/test_asr/test_whisper_api_asr.py
class TestWhisperAPI (line 14) | class TestWhisperAPI:
method skip_if_no_env (line 18) | def skip_if_no_env(self, check_env_vars) -> None:
method test_chinese_word_timestamp (line 26) | def test_chinese_word_timestamp(self, test_audio_path_zh: Path) -> None:
method test_transcribe_parametrized (line 65) | def test_transcribe_parametrized(
FILE: tests/test_optimize/test_optimize.py
class TestSubtitleOptimizer (line 19) | class TestSubtitleOptimizer:
method optimizer (line 23) | def optimizer(self, mock_llm_client) -> SubtitleOptimizer:
method sample_asr_data (line 34) | def sample_asr_data(self) -> ASRData:
method test_optimize_basic (line 55) | def test_optimize_basic(
method test_agent_loop_validation (line 84) | def test_agent_loop_validation(
method test_optimize_empty_handling (line 107) | def test_optimize_empty_handling(self, optimizer: SubtitleOptimizer):
FILE: tests/test_split/test_alignment.py
class TestSubtitleAligner (line 11) | class TestSubtitleAligner:
method aligner (line 15) | def aligner(self) -> SubtitleAligner:
method test_align_identical_texts (line 19) | def test_align_identical_texts(self, aligner):
method test_align_with_missing_items (line 30) | def test_align_with_missing_items(self, aligner):
method test_align_with_extra_items (line 43) | def test_align_with_extra_items(self, aligner):
method test_align_empty_texts (line 56) | def test_align_empty_texts(self, aligner):
method test_align_single_item (line 66) | def test_align_single_item(self, aligner):
method test_align_completely_different_texts (line 76) | def test_align_completely_different_texts(self, aligner):
method test_align_chinese_text (line 87) | def test_align_chinese_text(self, aligner):
FILE: tests/test_split/test_split.py
class TestPreprocessEdgeCases (line 12) | class TestPreprocessEdgeCases:
method test_unicode_extremes (line 15) | def test_unicode_extremes(self):
method test_mixed_punctuation_types (line 29) | def test_mixed_punctuation_types(self):
method test_zero_duration_segments (line 40) | def test_zero_duration_segments(self):
method test_overlapping_timestamps (line 49) | def test_overlapping_timestamps(self):
method test_reversed_timestamps (line 59) | def test_reversed_timestamps(self):
method test_very_long_text (line 67) | def test_very_long_text(self):
method test_whitespace_only_segments (line 75) | def test_whitespace_only_segments(self):
method test_mixed_case_with_numbers (line 87) | def test_mixed_case_with_numbers(self):
method test_special_characters (line 96) | def test_special_characters(self):
method test_newlines_and_tabs_in_text (line 106) | def test_newlines_and_tabs_in_text(self):
class TestSubtitleSplitterEdgeCases (line 115) | class TestSubtitleSplitterEdgeCases:
method test_extremely_short_segments (line 118) | def test_extremely_short_segments(self):
method test_extremely_long_single_segment (line 133) | def test_extremely_long_single_segment(self):
method test_alternating_long_short_segments (line 147) | def test_alternating_long_short_segments(self):
method test_all_same_timestamp (line 170) | def test_all_same_timestamp(self):
method test_large_time_gaps (line 183) | def test_large_time_gaps(self):
method test_1000_segments_stress (line 197) | def test_1000_segments_stress(self):
method test_mixed_language_segments (line 215) | def test_mixed_language_segments(self):
method test_numbers_only_segments (line 230) | def test_numbers_only_segments(self):
method test_repeated_text_segments (line 245) | def test_repeated_text_segments(self):
class TestSplitterParameters (line 260) | class TestSplitterParameters:
method test_max_word_count_zero (line 263) | def test_max_word_count_zero(self):
method test_max_word_count_very_large (line 278) | def test_max_word_count_very_large(self):
method test_max_word_count_exactly_matches (line 290) | def test_max_word_count_exactly_matches(self):
class TestMergeShortSegments (line 303) | class TestMergeShortSegments:
method test_all_segments_very_short (line 306) | def test_all_segments_very_short(self):
method test_mixed_short_and_long (line 319) | def test_mixed_short_and_long(self):
method test_alternating_short_long_pattern (line 336) | def test_alternating_short_long_pattern(self):
class TestStopAndThreading (line 359) | class TestStopAndThreading:
method test_stop_before_start (line 362) | def test_stop_before_start(self):
method test_stop_during_processing (line 370) | def test_stop_during_processing(self):
method test_multiple_stop_calls (line 393) | def test_multiple_stop_calls(self):
class TestTimestampIntegrity (line 404) | class TestTimestampIntegrity:
method test_no_negative_durations (line 407) | def test_no_negative_durations(self):
method test_no_gaps_in_timeline (line 422) | def test_no_gaps_in_timeline(self):
method test_preserves_total_duration (line 440) | def test_preserves_total_duration(self):
FILE: tests/test_split/test_split_by_llm.py
class TestSplitByLLM (line 18) | class TestSplitByLLM:
method test_count_words_chinese (line 21) | def test_count_words_chinese(self):
method test_count_words_english (line 26) | def test_count_words_english(self):
method test_count_words_mixed (line 31) | def test_count_words_mixed(self):
method test_split_chinese_text (line 37) | def test_split_chinese_text(self, mock_llm_client):
method test_split_english_text (line 64) | def test_split_english_text(self, mock_llm_client):
method test_split_mixed_text (line 88) | def test_split_mixed_text(self, mock_llm_client):
method test_split_preserves_content (line 108) | def test_split_preserves_content(self, mock_llm_client):
method test_split_short_text (line 119) | def test_split_short_text(self, mock_llm_client):
method test_agent_loop_correction (line 132) | def test_agent_loop_correction(self, mock_llm_client):
FILE: tests/test_split/test_split_core.py
class TestPreprocessSegments (line 15) | class TestPreprocessSegments:
method test_remove_pure_punctuation (line 18) | def test_remove_pure_punctuation(self):
method test_english_word_lowercase (line 31) | def test_english_word_lowercase(self):
method test_need_lower_false (line 44) | def test_need_lower_false(self):
method test_mixed_language (line 50) | def test_mixed_language(self):
method test_empty_segments (line 63) | def test_empty_segments(self):
method test_chinese_punctuation (line 68) | def test_chinese_punctuation(self):
method test_apostrophe_in_word (line 80) | def test_apostrophe_in_word(self):
class TestSubtitleSplitterInit (line 92) | class TestSubtitleSplitterInit:
method test_default_initialization (line 95) | def test_default_initialization(self):
method test_custom_parameters (line 105) | def test_custom_parameters(self):
method test_thread_pool_created (line 118) | def test_thread_pool_created(self):
class TestDetermineNumSegments (line 125) | class TestDetermineNumSegments:
method test_small_word_count (line 128) | def test_small_word_count(self):
method test_exact_threshold (line 134) | def test_exact_threshold(self):
method test_just_above_threshold (line 140) | def test_just_above_threshold(self):
method test_multiple_segments (line 146) | def test_multiple_segments(self):
method test_zero_word_count (line 152) | def test_zero_word_count(self):
class TestGroupByTimeGaps (line 159) | class TestGroupByTimeGaps:
method test_no_gaps (line 162) | def test_no_gaps(self):
method test_large_gap (line 174) | def test_large_gap(self):
method test_multiple_gaps (line 187) | def test_multiple_gaps(self):
method test_empty_segments (line 199) | def test_empty_segments(self):
method test_single_segment (line 205) | def test_single_segment(self):
method test_check_large_gaps_enabled (line 213) | def test_check_large_gaps_enabled(self):
class TestSplitByCommonWords (line 230) | class TestSplitByCommonWords:
method test_split_on_prefix_word (line 233) | def test_split_on_prefix_word(self):
method test_split_on_suffix_word (line 250) | def test_split_on_suffix_word(self):
method test_english_common_words (line 266) | def test_english_common_words(self):
method test_no_common_words (line 283) | def test_no_common_words(self):
method test_empty_segments (line 293) | def test_empty_segments(self):
class TestSplitLongSegment (line 300) | class TestSplitLongSegment:
method test_short_segment (line 303) | def test_short_segment(self):
method test_long_segment_with_gaps (line 317) | def test_long_segment_with_gaps(self):
method test_very_short_segments (line 337) | def test_very_short_segments(self):
method test_equal_time_gaps (line 347) | def test_equal_time_gaps(self):
method test_preserves_timestamps (line 360) | def test_preserves_timestamps(self):
class TestMergeShortSegment (line 375) | class TestMergeShortSegment:
method test_merge_very_short_segments (line 378) | def test_merge_very_short_segments(self):
method test_merge_with_short_gap (line 390) | def test_merge_with_short_gap(self):
method test_no_merge_long_segments (line 402) | def test_no_merge_long_segments(self):
method test_no_merge_large_gap (line 414) | def test_no_merge_large_gap(self):
method test_merge_respects_max_word_count (line 426) | def test_merge_respects_max_word_count(self):
method test_english_text_merge (line 440) | def test_english_text_merge(self):
method test_empty_segments (line 452) | def test_empty_segments(self):
method test_single_segment (line 459) | def test_single_segment(self):
class TestStopMethod (line 467) | class TestStopMethod:
method test_stop_sets_running_false (line 470) | def test_stop_sets_running_false(self):
method test_stop_shuts_down_executor (line 477) | def test_stop_shuts_down_executor(self):
method test_multiple_stops (line 484) | def test_multiple_stops(self):
method test_stop_idempotent (line 491) | def test_stop_idempotent(self):
class TestEdgeCases (line 501) | class TestEdgeCases:
method test_zero_thread_num (line 504) | def test_zero_thread_num(self):
method test_negative_max_word_count (line 515) | def test_negative_max_word_count(self):
method test_very_large_thread_num (line 523) | def test_very_large_thread_num(self):
FILE: tests/test_split/test_split_realistic.py
function create_whisper_style_segments (line 14) | def create_whisper_style_segments(
class TestRealWorldScenarios (line 58) | class TestRealWorldScenarios:
method test_podcast_long_monologue (line 61) | def test_podcast_long_monologue(self):
method test_interview_qa_with_pauses (line 79) | def test_interview_qa_with_pauses(self):
method test_news_broadcast_style (line 114) | def test_news_broadcast_style(self):
method test_casual_conversation_with_hesitation (line 126) | def test_casual_conversation_with_hesitation(self):
method test_technical_presentation_bilingual (line 163) | def test_technical_presentation_bilingual(self):
method test_subtitle_with_background_noise_gaps (line 203) | def test_subtitle_with_background_noise_gaps(self):
class TestEdgeCasesRealistic (line 233) | class TestEdgeCasesRealistic:
method test_very_fast_speech (line 236) | def test_very_fast_speech(self):
method test_very_slow_speech (line 246) | def test_very_slow_speech(self):
method test_subtitle_with_numbers_and_punctuation (line 256) | def test_subtitle_with_numbers_and_punctuation(self):
method test_empty_or_whitespace_segments (line 294) | def test_empty_or_whitespace_segments(self):
method test_subtitle_crossing_one_hour (line 307) | def test_subtitle_crossing_one_hour(self):
class TestGroupByTimeGapsRealistic (line 330) | class TestGroupByTimeGapsRealistic:
method test_scene_change_detection (line 333) | def test_scene_change_detection(self):
method test_natural_sentence_pauses (line 360) | def test_natural_sentence_pauses(self):
class TestSplitByCommonWordsRealistic (line 387) | class TestSplitByCommonWordsRealistic:
method test_long_compound_sentence_chinese (line 390) | def test_long_compound_sentence_chinese(self):
method test_english_compound_sentence (line 401) | def test_english_compound_sentence(self):
class TestMergeShortSegmentRealistic (line 413) | class TestMergeShortSegmentRealistic:
method test_merge_single_character_words (line 416) | def test_merge_single_character_words(self):
method test_dont_merge_across_large_pause (line 437) | def test_dont_merge_across_large_pause(self):
method test_merge_interjections (line 454) | def test_merge_interjections(self):
FILE: tests/test_subtitle/conftest.py
function qapp (line 10) | def qapp():
function use_qapp (line 20) | def use_qapp(qapp):
FILE: tests/test_subtitle/test_subtitle_thread.py
function get_test_model (line 30) | def get_test_model():
function run_thread_with_timeout (line 57) | def run_thread_with_timeout(thread, timeout_ms=60000):
function subtitle_file (line 104) | def subtitle_file():
function output_dir (line 114) | def output_dir():
function base_config (line 121) | def base_config():
class TestSubtitleThreadSplit (line 132) | class TestSubtitleThreadSplit:
method test_split_sentence (line 135) | def test_split_sentence(
method test_split_semantic (line 161) | def test_split_semantic(
class TestSubtitleThreadOptimize (line 184) | class TestSubtitleThreadOptimize:
method test_optimize_with_llm (line 187) | def test_optimize_with_llm(
class TestSubtitleThreadTranslate (line 211) | class TestSubtitleThreadTranslate:
method test_translate_google (line 215) | def test_translate_google(self, subtitle_file, output_dir, base_config):
method test_translate_bing (line 238) | def test_translate_bing(self, subtitle_file, output_dir, base_config):
method test_translate_llm (line 257) | def test_translate_llm(
class TestSubtitleThreadFullPipeline (line 282) | class TestSubtitleThreadFullPipeline:
method test_split_and_translate (line 285) | def test_split_and_translate(
method test_optimize_and_translate (line 310) | def test_optimize_and_translate(
class TestSubtitleThreadError (line 336) | class TestSubtitleThreadError:
method test_missing_file (line 339) | def test_missing_file(self, output_dir, base_config):
method test_no_translator_service (line 350) | def test_no_translator_service(self, subtitle_file, output_dir, base_c...
FILE: tests/test_thread/conftest.py
function qapp (line 16) | def qapp():
function sample_audio_path (line 25) | def sample_audio_path() -> str:
function sample_video_path (line 35) | def sample_video_path(tmp_path: Path, sample_audio_path: str) -> str:
function sample_subtitle_path (line 66) | def sample_subtitle_path(tmp_path: Path) -> str:
function output_dir (line 76) | def output_dir(tmp_path: Path) -> Generator[str, None, None]:
function run_thread_with_timeout (line 83) | def run_thread_with_timeout(thread, timeout_ms: int = 30000) -> dict:
FILE: tests/test_thread/test_subtitle_pipeline_thread.py
class TestSubtitlePipelineThread (line 9) | class TestSubtitlePipelineThread:
method test_pipeline_placeholder (line 12) | def test_pipeline_placeholder(self, qapp):
FILE: tests/test_thread/test_transcript_thread.py
class TestTranscriptThread (line 17) | class TestTranscriptThread:
method base_config (line 21) | def base_config(self) -> TranscribeConfig:
method test_transcribe_audio_with_faster_whisper (line 34) | def test_transcribe_audio_with_faster_whisper(
method test_transcribe_video_with_faster_whisper (line 61) | def test_transcribe_video_with_faster_whisper(
method test_transcribe_missing_video (line 83) | def test_transcribe_missing_video(
method test_transcribe_empty_path (line 100) | def test_transcribe_empty_path(
FILE: tests/test_thread/test_video_info_thread.py
class TestVideoInfoThread (line 10) | class TestVideoInfoThread:
method test_get_video_info_missing_file (line 13) | def test_get_video_info_missing_file(self, qapp):
method test_get_video_info_invalid_file (line 20) | def test_get_video_info_invalid_file(self, tmp_path, qapp):
FILE: tests/test_thread/test_video_synthesis_thread.py
class TestVideoSynthesisThread (line 14) | class TestVideoSynthesisThread:
method base_config (line 18) | def base_config(self) -> SynthesisConfig:
method test_synthesize_skip_video (line 25) | def test_synthesize_skip_video(
method test_synthesize_missing_video (line 50) | def test_synthesize_missing_video(
method test_synthesize_empty_paths (line 71) | def test_synthesize_empty_paths(
FILE: tests/test_translate/test_bing_translator.py
class TestBingTranslator (line 14) | class TestBingTranslator:
method bing_translator (line 18) | def bing_translator(self, target_language: TargetLanguage) -> BingTran...
method test_translate_simple_text (line 31) | def test_translate_simple_text(
method test_translate_chunk (line 61) | def test_translate_chunk(
FILE: tests/test_translate/test_cache_validation.py
function ensure_cache_enabled (line 16) | def ensure_cache_enabled():
function test_cache (line 24) | def test_cache(tmp_path) -> Cache:
class TestCacheValidation (line 31) | class TestCacheValidation:
method test_exception_not_cached (line 34) | def test_exception_not_cached(self, test_cache: Cache) -> None:
method test_validate_none_not_cached (line 55) | def test_validate_none_not_cached(self, test_cache: Cache) -> None:
method test_validate_empty_not_cached (line 78) | def test_validate_empty_not_cached(self, test_cache: Cache) -> None:
method test_custom_validator (line 101) | def test_custom_validator(self, test_cache: Cache) -> None:
method test_valid_result_cached (line 128) | def test_valid_result_cached(self, test_cache: Cache) -> None:
method test_no_validator_caches_all (line 149) | def test_no_validator_caches_all(self, test_cache: Cache) -> None:
method test_cache_disabled_bypasses_cache (line 172) | def test_cache_disabled_bypasses_cache(self, test_cache: Cache) -> None:
FILE: tests/test_translate/test_deeplx_translator.py
class TestDeepLXTranslator (line 23) | class TestDeepLXTranslator:
method deeplx_translator (line 27) | def deeplx_translator(
method test_translate_simple_text (line 45) | def test_translate_simple_text(
method test_translate_chunk (line 79) | def test_translate_chunk(
FILE: tests/test_translate/test_google_translator.py
class TestGoogleTranslator (line 14) | class TestGoogleTranslator:
method google_translator (line 18) | def google_translator(self, target_language: TargetLanguage) -> Google...
method test_translate_simple_text (line 32) | def test_translate_simple_text(
method test_translate_chunk (line 62) | def test_translate_chunk(
FILE: tests/test_translate/test_llm_translator.py
class TestLLMTranslator (line 22) | class TestLLMTranslator:
method llm_translator (line 26) | def llm_translator(
method test_translate_simple_text (line 46) | def test_translate_simple_text(
method test_translate_chunk (line 69) | def test_translate_chunk(
method test_cache_works (line 97) | def test_cache_works(
method test_reflect_translation (line 124) | def test_reflect_translation(
FILE: tests/test_tts/test_tts_core.py
class TestTTSConfig (line 23) | class TestTTSConfig:
method test_default_config (line 26) | def test_default_config(self):
method test_custom_config (line 42) | def test_custom_config(self):
class TestTTSData (line 60) | class TestTTSData:
method test_create_tts_data_seg (line 63) | def test_create_tts_data_seg(self):
method test_create_tts_data_from_segments (line 80) | def test_create_tts_data_from_segments(self):
method test_from_texts (line 91) | def test_from_texts(self):
method test_filter_empty_segments (line 100) | def test_filter_empty_segments(self):
class TestTTSStatus (line 114) | class TestTTSStatus:
method test_status_properties (line 117) | def test_status_properties(self):
method test_callback_tuple (line 123) | def test_callback_tuple(self):
method test_with_progress (line 128) | def test_with_progress(self):
method test_all_statuses (line 133) | def test_all_statuses(self):
class MockTTS (line 144) | class MockTTS(BaseTTS):
method __init__ (line 147) | def __init__(self, config: TTSConfig):
method _synthesize (line 151) | def _synthesize(self, segment: TTSDataSeg, output_path: str) -> None:
class TestBaseTTS (line 161) | class TestBaseTTS:
method test_generate_cache_key (line 164) | def test_generate_cache_key(self):
method test_generate_filename (line 187) | def test_generate_filename(self):
method test_synthesize_single (line 202) | def test_synthesize_single(self):
method test_synthesize_batch (line 220) | def test_synthesize_batch(self):
method test_batch_with_callback (line 243) | def test_batch_with_callback(self):
method test_cache_parameter (line 265) | def test_cache_parameter(self):
class TestSiliconFlowTTS (line 302) | class TestSiliconFlowTTS:
method test_init_without_api_key (line 305) | def test_init_without_api_key(self):
method test_synthesize_success (line 312) | def test_synthesize_success(self, mock_post):
method test_synthesize_with_optional_params (line 347) | def test_synthesize_with_optional_params(self, mock_post):
class TestOpenAITTS (line 374) | class TestOpenAITTS:
method test_init_without_api_key (line 377) | def test_init_without_api_key(self):
method test_synthesize_success (line 384) | def test_synthesize_success(self, mock_openai_class):
method test_synthesize_with_custom_voice (line 436) | def test_synthesize_with_custom_voice(self, mock_openai_class):
method test_default_voice (line 473) | def test_default_voice(self, mock_openai_class):
FILE: tests/test_tts/test_tts_integration.py
function siliconflow_config (line 47) | def siliconflow_config():
function openai_config (line 72) | def openai_config():
class TestSiliconFlowIntegration (line 84) | class TestSiliconFlowIntegration:
method test_siliconflow_single_synthesis (line 87) | def test_siliconflow_single_synthesis(self, siliconflow_config):
method test_siliconflow_batch_synthesis (line 103) | def test_siliconflow_batch_synthesis(self, siliconflow_config):
class TestOpenAITTSIntegration (line 141) | class TestOpenAITTSIntegration:
method test_openai_single_synthesis (line 144) | def test_openai_single_synthesis(self, openai_config):
method test_openai_batch_synthesis (line 160) | def test_openai_batch_synthesis(self, openai_config):
Condensed preview — 223 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,707K chars).
[
{
"path": ".github/ISSUE_TEMPLATE/01_bug.yaml",
"chars": 953,
"preview": "name: 错误 | Bug\ndescription: 反馈程序出现的错误 | Report bugs\nlabels: [\"bug\"]\nbody:\n - type: markdown\n attributes:\n value"
},
{
"path": ".github/ISSUE_TEMPLATE/02_request.yaml",
"chars": 656,
"preview": "name: 功能请求 | Feature Request\ndescription: 提出增加新功能的请求 | Create the request for a new feature\nlabels: [\"enhancement\"]\nbody"
},
{
"path": ".github/ISSUE_TEMPLATE/03_question.yaml",
"chars": 257,
"preview": "name: 问题咨询 Question\ndescription: 向作者咨询软件使用或配置相关的问题 | Consult about software usage or configuration\n\nlabels: [\"question\"]"
},
{
"path": ".github/workflows/claude-code-review.yml",
"chars": 3033,
"preview": "name: Claude Code Review\n\non:\n pull_request:\n types: [opened, synchronize]\n # Optional: Only run on specific file"
},
{
"path": ".github/workflows/claude.yml",
"chars": 2313,
"preview": "name: Claude Code\n\non:\n issue_comment:\n types: [created]\n pull_request_review_comment:\n types: [created]\n issue"
},
{
"path": ".github/workflows/deploy-docs.yml",
"chars": 1216,
"preview": "name: Deploy Documentation\n\non:\n push:\n branches:\n - master\n - main\n - dev\n paths:\n - \"docs/*"
},
{
"path": ".gitignore",
"chars": 632,
"preview": "# win 二进制文件资源目录\n/resource/bin/\n!/resource/bin/bin_environment.txt\n\n# 开发环境\n.idea/\n*.pyc\n*/__pycache__/\n*.env\n*.env.local\n"
},
{
"path": "CHANGELOG.md",
"chars": 1236,
"preview": "# 更新日志\n\n## 2025.02.07\n\n### Bug 修复与其他改进\n\n- 修复谷歌翻译语言不正确的问题。\n- 修部微软翻译不准确的问题。\n- 修复运行设备不选择cuda时显示报 winError的错误\n- 修复合成失败的问题\n- "
},
{
"path": "LICENSE",
"chars": 35143,
"preview": " GNU GENERAL PUBLIC LICENSE\n Version 3, 29 June 2007\n\n Copyright (C) 2007 Free "
},
{
"path": "README.md",
"chars": 10812,
"preview": "<div align=\"center\">\n <img src=\"./legacy-docs/images/logo.png\"alt=\"VideoCaptioner Logo\" width=\"100\">\n <p>卡卡字幕助手</p>\n "
},
{
"path": "app/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "app/common/config.py",
"chars": 11462,
"preview": "# coding:utf-8\nfrom enum import Enum\n\nfrom PyQt5.QtCore import QLocale\nfrom PyQt5.QtGui import QColor\nfrom qfluentwidget"
},
{
"path": "app/common/signal_bus.py",
"chars": 1860,
"preview": "from PyQt5.QtCore import QObject, QUrl, pyqtSignal\n\n\nclass SignalBus(QObject):\n # 字幕排布信号\n subtitle_layout_changed "
},
{
"path": "app/components/DonateDialog.py",
"chars": 2828,
"preview": "import os\n\nfrom PyQt5.QtCore import Qt\nfrom PyQt5.QtGui import QPixmap\nfrom PyQt5.QtWidgets import QHBoxLayout, QLabel, "
},
{
"path": "app/components/EditComboBoxSettingCard.py",
"chars": 2368,
"preview": "from typing import List, Optional, Union\n\nfrom PyQt5.QtCore import Qt, pyqtSignal\nfrom PyQt5.QtGui import QIcon\nfrom PyQ"
},
{
"path": "app/components/FasterWhisperSettingWidget.py",
"chars": 30479,
"preview": "import os\nimport subprocess\nfrom pathlib import Path\n\nfrom PyQt5.QtCore import Qt, QThread, pyqtSignal\nfrom PyQt5.QtGui "
},
{
"path": "app/components/LanguageSettingDialog.py",
"chars": 3216,
"preview": "from PyQt5.QtWidgets import QVBoxLayout\nfrom qfluentwidgets import (\n ComboBox,\n InfoBar,\n InfoBarPosition,\n "
},
{
"path": "app/components/LineEditSettingCard.py",
"chars": 1215,
"preview": "from typing import Optional\n\nfrom PyQt5.QtCore import Qt, pyqtSignal\nfrom qfluentwidgets import LineEdit, SettingCard\nfr"
},
{
"path": "app/components/MySettingCard.py",
"chars": 10089,
"preview": "# coding:utf-8\nfrom typing import List, Optional, Union\n\nfrom PyQt5.QtCore import Qt, pyqtSignal\nfrom PyQt5.QtGui import"
},
{
"path": "app/components/MyVideoWidget.py",
"chars": 20796,
"preview": "# coding:utf-8\nimport sys\nfrom enum import Enum\nfrom pathlib import Path\nfrom typing import Optional\n\nimport vlc # type"
},
{
"path": "app/components/SimpleSettingCard.py",
"chars": 2463,
"preview": "from PyQt5.QtCore import pyqtSignal\nfrom PyQt5.QtWidgets import QHBoxLayout\nfrom qfluentwidgets import (\n CaptionLabe"
},
{
"path": "app/components/SpinBoxSettingCard.py",
"chars": 2795,
"preview": "from typing import Optional, Union\n\nfrom PyQt5.QtCore import Qt, pyqtSignal\nfrom PyQt5.QtGui import QIcon\nfrom qfluentwi"
},
{
"path": "app/components/SubtitleSettingDialog.py",
"chars": 1730,
"preview": "from qfluentwidgets import (\n BodyLabel,\n MessageBoxBase,\n SwitchSettingCard,\n)\nfrom qfluentwidgets import Flue"
},
{
"path": "app/components/TranscriptionOutputDialog.py",
"chars": 1158,
"preview": "# -*- coding: utf-8 -*-\nfrom qfluentwidgets import (\n BodyLabel,\n ComboBoxSettingCard,\n MessageBoxBase,\n)\nfrom "
},
{
"path": "app/components/TranscriptionSettingDialog.py",
"chars": 1149,
"preview": "# -*- coding: utf-8 -*-\nfrom qfluentwidgets import (\n BodyLabel,\n ComboBoxSettingCard,\n MessageBoxBase,\n)\nfrom "
},
{
"path": "app/components/WhisperAPISettingWidget.py",
"chars": 7093,
"preview": "from PyQt5.QtCore import Qt, QThread, pyqtSignal\nfrom PyQt5.QtWidgets import (\n QVBoxLayout,\n QWidget,\n)\nfrom qflu"
},
{
"path": "app/components/WhisperCppSettingWidget.py",
"chars": 21171,
"preview": "import os\n\nfrom PyQt5.QtCore import Qt\nfrom PyQt5.QtWidgets import (\n QHBoxLayout,\n QHeaderView,\n QTableWidgetI"
},
{
"path": "app/components/transcription_setting_card.py",
"chars": 2087,
"preview": "from typing import Optional\n\nfrom PyQt5.QtWidgets import (\n QStackedWidget,\n QVBoxLayout,\n QWidget,\n)\n\nfrom ..c"
},
{
"path": "app/config.py",
"chars": 1494,
"preview": "import logging\nimport os\nfrom pathlib import Path\n\nVERSION = \"v1.4.0\"\nYEAR = 2025\nAPP_NAME = \"VideoCaptioner\"\nAUTHOR = \""
},
{
"path": "app/core/asr/__init__.py",
"chars": 446,
"preview": "from .bcut import BcutASR\nfrom .chunked_asr import ChunkedASR\nfrom .faster_whisper import FasterWhisperASR\nfrom .jianyin"
},
{
"path": "app/core/asr/asr_data.py",
"chars": 28924,
"preview": "import json\nimport math\nimport os\nimport platform\nimport re\nfrom pathlib import Path\nfrom typing import List, Optional, "
},
{
"path": "app/core/asr/base.py",
"chars": 6744,
"preview": "import os\nimport threading\nimport time\nimport uuid\nimport zlib\nfrom io import BytesIO\nfrom typing import Callable, Optio"
},
{
"path": "app/core/asr/bcut.py",
"chars": 6342,
"preview": "import json\nimport time\nfrom typing import Any, Callable, List, Optional, Union\n\nimport requests\n\nfrom .asr_data import "
},
{
"path": "app/core/asr/chunk_merger.py",
"chars": 9726,
"preview": "\"\"\"ASR 音频分块结果合并模块\n\n基于精确/模糊文本匹配的音频分块合并算法(参考 Groq API Cookbook)。\n使用滑动窗口找到最佳对齐位置,在重叠区域中点切分。\n\n匹配策略:\n- 词级时间戳(字级): 精确文本匹配\n- 句子"
},
{
"path": "app/core/asr/chunked_asr.py",
"chars": 7420,
"preview": "\"\"\"音频分块 ASR 装饰器\n\n为任何 BaseASR 实现添加音频分块转录能力,适用于长音频处理。\n使用装饰器模式实现关注点分离。\n\"\"\"\n\nimport io\nimport threading\nfrom concurrent.futu"
},
{
"path": "app/core/asr/faster_whisper.py",
"chars": 10936,
"preview": "import hashlib\nimport os\nimport re\nimport shutil\nimport subprocess\nimport tempfile\nfrom pathlib import Path\nfrom typing "
},
{
"path": "app/core/asr/jianying.py",
"chars": 12334,
"preview": "import datetime\nimport hashlib\nimport hmac\nimport json\nimport os\nimport time\nimport uuid\nfrom typing import Any, Callabl"
},
{
"path": "app/core/asr/status.py",
"chars": 1731,
"preview": "from enum import Enum\nfrom typing import Tuple\n\n\nclass ASRStatus(Enum):\n \"\"\"ASR processing status with progress perce"
},
{
"path": "app/core/asr/transcribe.py",
"chars": 6056,
"preview": "from app.core.asr.asr_data import ASRData\nfrom app.core.asr.bcut import BcutASR\nfrom app.core.asr.chunked_asr import Chu"
},
{
"path": "app/core/asr/whisper_api.py",
"chars": 3941,
"preview": "from typing import Any, Callable, List, Optional, Union\n\nfrom openai import OpenAI\n\nfrom app.core.llm.client import norm"
},
{
"path": "app/core/asr/whisper_cpp.py",
"chars": 10232,
"preview": "import os\nimport re\nimport shutil\nimport subprocess\nimport sys\nimport tempfile\nimport time\nfrom pathlib import Path\nfrom"
},
{
"path": "app/core/constant.py",
"chars": 298,
"preview": "\"\"\"\n常量配置模块\n\n定义应用程序中使用的常量,包括 InfoBar 显示时长等\n\"\"\"\n\n# InfoBar 显示时长配置(单位:毫秒)\nINFOBAR_DURATION_FOREVER = 24 * 60 * 60 * 1000 #"
},
{
"path": "app/core/entities.py",
"chars": 21983,
"preview": "import datetime\nimport uuid\nfrom dataclasses import dataclass, field\nfrom enum import Enum\nfrom typing import TYPE_CHECK"
},
{
"path": "app/core/llm/__init__.py",
"chars": 338,
"preview": "\"\"\"LLM unified client module.\"\"\"\n\nfrom .check_llm import check_llm_connection, get_available_models\nfrom .check_whisper "
},
{
"path": "app/core/llm/check_llm.py",
"chars": 3105,
"preview": "\"\"\"LLM 连接测试工具\"\"\"\n\nfrom typing import Literal, Optional\n\nimport openai\n\nfrom app.core.llm.client import normalize_base_ur"
},
{
"path": "app/core/llm/check_whisper.py",
"chars": 2302,
"preview": "\"\"\"Whisper API 连接测试工具\"\"\"\n\nfrom typing import Literal, Optional\n\nimport openai\n\nfrom app.config import ASSETS_PATH\nfrom a"
},
{
"path": "app/core/llm/client.py",
"chars": 3416,
"preview": "\"\"\"Unified LLM client for the application.\"\"\"\n\nimport os\nimport threading\nfrom typing import Any, List, Optional\nfrom ur"
},
{
"path": "app/core/llm/context.py",
"chars": 1341,
"preview": "\"\"\"任务上下文管理\n\n使用模块级变量存储任务上下文,确保跨线程池传递(ThreadPoolExecutor 不会自动复制 contextvars)。\n\"\"\"\n\nimport threading\nimport uuid\nfrom datac"
},
{
"path": "app/core/llm/request_logger.py",
"chars": 3406,
"preview": "import json\nimport threading\nimport time\nfrom datetime import datetime\nfrom typing import Any, Dict\n\nimport httpx\n\nfrom "
},
{
"path": "app/core/optimize/optimize.py",
"chars": 12034,
"preview": "\"\"\"字幕优化模块\n\n使用LLM优化字幕内容,支持agent loop自动验证和修正。\n\"\"\"\n\nimport atexit\nimport difflib\nimport re\nfrom concurrent.futures import T"
},
{
"path": "app/core/prompts/__init__.py",
"chars": 2303,
"preview": "\"\"\"提示词管理模块\n\n所有提示词以 Markdown 文件形式存储,支持模板变量替换。\n\n使用示例:\n from app.core.prompts import get_prompt\n\n # 加载提示词\n prompt "
},
{
"path": "app/core/prompts/analysis/video.md",
"chars": 565,
"preview": "你是一位专业视频分析师,擅长从视频字幕中提取关键信息并识别重要术语。\n\n<context>\n在视频翻译前,需要先理解视频内容和提取专业术语,以确保翻译准确性和一致性。这对于包含专业术语、人名、组织名的视频尤为重要。\n</context>\n\n"
},
{
"path": "app/core/prompts/optimize/subtitle.md",
"chars": 2615,
"preview": "You are a professional subtitle correction expert. Your task is to fix errors in video subtitles while preserving the or"
},
{
"path": "app/core/prompts/split/semantic.md",
"chars": 1282,
"preview": "你是一位专业的字幕分段专家。你的任务是将未分段的连续文本按语义断点拆分,使字幕便于阅读和理解。\n\n<instructions>\n1. 在语义自然断点处插入 <br>(可在句内、句间灵活分段)\n2. 字数限制:\n - CJK语言(中文、日"
},
{
"path": "app/core/prompts/split/sentence.md",
"chars": 1330,
"preview": "你是一位专业的字幕分句专家。你的任务是将未分段的连续文本按句子结构拆分,在句子的自然停顿点或者语义断点插入分隔符。\n\n<instructions>\n1. 在句子边界处插入 <br> (句号、逗号、分号等标点符号应出现的位置)\n2. 分割段的"
},
{
"path": "app/core/prompts/translate/reflect.md",
"chars": 4684,
"preview": "You are a professional subtitle translator specializing in ${target_language}. Your goal is to produce translations that"
},
{
"path": "app/core/prompts/translate/single.md",
"chars": 196,
"preview": "You are a professional ${target_language} translator.\nPlease translate the following text into ${target_language}.\nRetur"
},
{
"path": "app/core/prompts/translate/standard.md",
"chars": 887,
"preview": "You are a professional subtitle translator specializing in ${target_language}. Your goal is to produce translations that"
},
{
"path": "app/core/split/alignment.py",
"chars": 7154,
"preview": "import difflib\n\n\nclass SubtitleAligner:\n \"\"\"\n 字幕文本对齐器,用于对齐两个文本序列,支持基于相似度的匹配。当目标文本缺少某项时,会使用其上一项进行填充。\n\n 使用示例:\n "
},
{
"path": "app/core/split/split.py",
"chars": 22806,
"preview": "import atexit\nimport difflib\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\nfrom typing import List, Un"
},
{
"path": "app/core/split/split_by_llm.py",
"chars": 6793,
"preview": "import difflib\nimport re\nfrom typing import List, Tuple\n\nfrom ..llm import call_llm\nfrom ..prompts import get_prompt\nfro"
},
{
"path": "app/core/subtitle/README.md",
"chars": 3047,
"preview": "# 字幕渲染模块\n\n提供两种字幕渲染方式:\n- **ASS 样式**:FFmpeg + libass 渲染(支持 CUDA 加速)\n- **圆角背景**:PIL 绘制现代风格字幕(带圆角矩形背景)\n\n## 模块结构\n\n```\napp/cor"
},
{
"path": "app/core/subtitle/__init__.py",
"chars": 1297,
"preview": "\"\"\"Subtitle rendering module (ASS and rounded background styles)\"\"\"\n\nfrom typing import Optional\n\nfrom app.config import"
},
{
"path": "app/core/subtitle/ass_renderer.py",
"chars": 13283,
"preview": "\"\"\"ASS subtitle renderer\"\"\"\n\nimport os\nimport re\nimport subprocess\nimport tempfile\nfrom pathlib import Path\nfrom typing "
},
{
"path": "app/core/subtitle/ass_utils.py",
"chars": 9984,
"preview": "\"\"\"ASS subtitle utilities with accurate text width calculation\"\"\"\n\nimport re\nfrom dataclasses import dataclass\nfrom typi"
},
{
"path": "app/core/subtitle/font_utils.py",
"chars": 5656,
"preview": "\"\"\"Font discovery and loading utilities\"\"\"\n\nfrom functools import lru_cache\nfrom pathlib import Path\nfrom typing import "
},
{
"path": "app/core/subtitle/rounded_renderer.py",
"chars": 14601,
"preview": "\"\"\"Rounded background subtitle renderer\"\"\"\n\nimport os\nimport re\nimport subprocess\nimport tempfile\nfrom dataclasses impor"
},
{
"path": "app/core/subtitle/styles.py",
"chars": 674,
"preview": "\"\"\"Subtitle style configurations\"\"\"\n\nfrom dataclasses import dataclass\n\nfrom app.core.entities import SubtitleLayoutEnum"
},
{
"path": "app/core/subtitle/text_utils.py",
"chars": 7862,
"preview": "\"\"\"Text processing utilities\"\"\"\n\nimport re\nfrom typing import List, Tuple\n\nfrom .font_utils import FontType\n\n# CJK and A"
},
{
"path": "app/core/task_factory.py",
"chars": 10468,
"preview": "import datetime\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom app.common.config import cfg\nfrom app.config "
},
{
"path": "app/core/translate/__init__.py",
"chars": 758,
"preview": "\"\"\"\n翻译模块\n\n提供多种翻译服务:OpenAI LLM、Google、Bing、DeepLX\n\"\"\"\n\nfrom app.core.entities import SubtitleProcessData\nfrom app.core.tr"
},
{
"path": "app/core/translate/base.py",
"chars": 5154,
"preview": "\"\"\"翻译器基类\"\"\"\n\nimport atexit\nfrom abc import ABC, abstractmethod\nfrom concurrent.futures import ThreadPoolExecutor, as_com"
},
{
"path": "app/core/translate/bing_translator.py",
"chars": 3588,
"preview": "\"\"\"Bing 翻译器\"\"\"\n\nfrom typing import Callable, List, Optional\n\nimport requests\n\nfrom app.core.entities import SubtitleProc"
},
{
"path": "app/core/translate/deeplx_translator.py",
"chars": 2030,
"preview": "\"\"\"DeepLX 翻译器\"\"\"\n\nimport os\nfrom typing import Callable, List, Optional\n\nimport requests\n\nfrom app.core.translate.base i"
},
{
"path": "app/core/translate/factory.py",
"chars": 2717,
"preview": "\"\"\"翻译器工厂\"\"\"\n\nfrom typing import Callable, Optional\n\nfrom app.core.translate.base import BaseTranslator\nfrom app.core.tra"
},
{
"path": "app/core/translate/google_translator.py",
"chars": 2686,
"preview": "\"\"\"Google 翻译器\"\"\"\n\nimport html\nimport re\nfrom typing import Callable, List, Optional\n\nimport requests\n\nfrom app.core.enti"
},
{
"path": "app/core/translate/llm_translator.py",
"chars": 7604,
"preview": "\"\"\"LLM 翻译器(使用 OpenAI)\"\"\"\n\nimport json\nfrom typing import Any, Callable, Dict, List, Optional, Tuple\n\nimport json_repair\n"
},
{
"path": "app/core/translate/types.py",
"chars": 5814,
"preview": "\"\"\"翻译器类型枚举\"\"\"\n\nfrom enum import Enum\n\n\nclass TranslatorType(Enum):\n \"\"\"翻译器类型\"\"\"\n\n OPENAI = \"openai\"\n GOOGLE = \""
},
{
"path": "app/core/tts/__init__.py",
"chars": 467,
"preview": "\"\"\"TTS (Text-To-Speech) 模块\n\n提供多种 TTS 服务的统一接口\n\"\"\"\n\nfrom .base import BaseTTS\nfrom .openai_fm import OpenAIFmTTS\nfrom .ope"
},
{
"path": "app/core/tts/base.py",
"chars": 5457,
"preview": "\"\"\"TTS 基类 - 提供缓存、批量处理等通用功能\"\"\"\n\nimport hashlib\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\nfrom typing i"
},
{
"path": "app/core/tts/openai_fm.py",
"chars": 3184,
"preview": "\"\"\"OpenAI.fm TTS 实现\n\nOpenAI.fm 是一个免费的 TTS 服务,提供多种音色和语音风格。\nAPI 文档: https://www.openai.fm/\n\"\"\"\n\nfrom urllib.parse import q"
},
{
"path": "app/core/tts/openai_tts.py",
"chars": 1578,
"preview": "\"\"\"OpenAI TTS 实现(支持 OpenAI 兼容接口)\"\"\"\n\nfrom openai import OpenAI\n\nfrom app.core.tts.base import BaseTTS\nfrom app.core.tts."
},
{
"path": "app/core/tts/siliconflow.py",
"chars": 5745,
"preview": "\"\"\"SiliconFlow TTS 实现\"\"\"\n\nimport hashlib\nfrom pathlib import Path\n\nimport requests\n\nfrom app.core.tts.base import BaseTT"
},
{
"path": "app/core/tts/status.py",
"chars": 1295,
"preview": "from enum import Enum\nfrom typing import Tuple\n\n\nclass TTSStatus(Enum):\n \"\"\"TTS processing status with progress perce"
},
{
"path": "app/core/tts/tts_data.py",
"chars": 2567,
"preview": "\"\"\"TTS 数据结构定义\"\"\"\n\nfrom dataclasses import dataclass\nfrom typing import List, Literal, Optional\n\n\n@dataclass\nclass TTSCon"
},
{
"path": "app/core/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "app/core/utils/cache.py",
"chars": 3565,
"preview": "\"\"\"Disk cache utility for API responses and computation results.\n\nThis module provides a simple interface for caching us"
},
{
"path": "app/core/utils/logger.py",
"chars": 2142,
"preview": "import logging\nimport logging.handlers\nfrom pathlib import Path\n\nfrom ...config import LOG_LEVEL, LOG_PATH\n\n\ndef setup_l"
},
{
"path": "app/core/utils/platform_utils.py",
"chars": 3071,
"preview": "\"\"\"\n跨平台工具函数\n\"\"\"\n\nimport logging\nimport os\nimport platform\nimport subprocess\n\nfrom app.core.entities import TranscribeMod"
},
{
"path": "app/core/utils/subprocess_helper.py",
"chars": 4502,
"preview": "\"\"\"子进程输出流处理工具模块\"\"\"\n\nimport queue\nimport subprocess\nimport threading\nfrom typing import Callable, Optional, Tuple\n\nfrom ."
},
{
"path": "app/core/utils/text_utils.py",
"chars": 2227,
"preview": "\"\"\"多语言文本处理工具\n\n统一的文本分析工具,支持CJK和世界多语言字符统计。\n\"\"\"\n\nimport re\n\n# ==================== Unicode 字符范围定义 ====================\n\n# 按"
},
{
"path": "app/core/utils/video_utils.py",
"chars": 17393,
"preview": "import os\nimport re\nimport shutil\nimport subprocess\nimport tempfile\nfrom contextlib import contextmanager\nfrom pathlib i"
},
{
"path": "app/thread/batch_process_thread.py",
"chars": 12002,
"preview": "import queue\nimport time\nfrom functools import partial\nfrom typing import Dict, Optional\n\nfrom PyQt5.QtCore import QThre"
},
{
"path": "app/thread/file_download_thread.py",
"chars": 6817,
"preview": "import shutil\nimport subprocess\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\n\nimport requests\nfrom PyQt5"
},
{
"path": "app/thread/modelscope_download_thread.py",
"chars": 3118,
"preview": "import io\nimport logging\nimport sys\nfrom typing import Callable\n\nfrom modelscope.hub.callback import ProgressCallback\nfr"
},
{
"path": "app/thread/subtitle_pipeline_thread.py",
"chars": 4610,
"preview": "import datetime\n\nfrom PyQt5.QtCore import QThread, pyqtSignal\n\nfrom app.core.entities import (\n FullProcessTask,\n "
},
{
"path": "app/thread/subtitle_thread.py",
"chars": 11981,
"preview": "import os\nfrom pathlib import Path\nfrom typing import List, Optional\n\nfrom PyQt5.QtCore import QThread, pyqtSignal\n\nfrom"
},
{
"path": "app/thread/transcript_thread.py",
"chars": 4996,
"preview": "import datetime\nimport tempfile\nfrom pathlib import Path\n\nfrom PyQt5.QtCore import QThread, pyqtSignal\n\nfrom app.core.as"
},
{
"path": "app/thread/version_checker_thread.py",
"chars": 5433,
"preview": "# coding: utf-8\nimport hashlib\nfrom datetime import datetime\n\nimport requests\nfrom PyQt5.QtCore import QObject, QVersion"
},
{
"path": "app/thread/video_download_thread.py",
"chars": 7333,
"preview": "import os\nimport re\nfrom pathlib import Path\n\nimport requests\nimport yt_dlp\nfrom PyQt5.QtCore import QThread, pyqtSignal"
},
{
"path": "app/thread/video_info_thread.py",
"chars": 1077,
"preview": "import tempfile\nfrom pathlib import Path\n\nfrom PyQt5.QtCore import QThread, pyqtSignal\n\nfrom app.core.entities import Vi"
},
{
"path": "app/thread/video_synthesis_thread.py",
"chars": 3885,
"preview": "import datetime\nimport tempfile\nfrom pathlib import Path\n\nfrom PyQt5.QtCore import QThread, pyqtSignal\n\nfrom app.core.as"
},
{
"path": "app/view/batch_process_interface.py",
"chars": 15667,
"preview": "import os\n\nfrom PyQt5.QtCore import Qt, QUrl\nfrom PyQt5.QtGui import QColor, QDesktopServices, QFont\nfrom PyQt5.QtWidget"
},
{
"path": "app/view/home_interface.py",
"chars": 5044,
"preview": "from typing import Optional\n\nfrom PyQt5.QtWidgets import QSizePolicy, QStackedWidget, QVBoxLayout, QWidget\nfrom qfluentw"
},
{
"path": "app/view/llm_logs_interface.py",
"chars": 15947,
"preview": "\"\"\"LLM 请求日志查看界面\"\"\"\n\nimport json\nfrom typing import Any, Dict, List\n\nfrom PyQt5.QtCore import QFileSystemWatcher, Qt\nfrom"
},
{
"path": "app/view/log_window.py",
"chars": 5310,
"preview": "import os\nimport platform\nimport subprocess\n\nfrom PyQt5.QtCore import Qt, QTimer\nfrom PyQt5.QtGui import QTextCursor\nfro"
},
{
"path": "app/view/main_window.py",
"chars": 6820,
"preview": "import atexit\nimport os\nimport shutil\n\nimport psutil\nfrom PyQt5.QtCore import QSize, QThread, QUrl\nfrom PyQt5.QtGui impo"
},
{
"path": "app/view/setting_interface.py",
"chars": 35233,
"preview": "import webbrowser\n\nfrom PyQt5.QtCore import Qt, QThread, QUrl, pyqtSignal\nfrom PyQt5.QtGui import QDesktopServices\nfrom "
},
{
"path": "app/view/subtitle_interface.py",
"chars": 32841,
"preview": "# -*- coding: utf-8 -*-\nimport json\nimport os\nimport sys\nimport tempfile\nfrom pathlib import Path\nfrom typing import Any"
},
{
"path": "app/view/subtitle_style_interface.py",
"chars": 45515,
"preview": "import json\nfrom pathlib import Path\nfrom typing import Optional, Tuple\n\nfrom PIL import ImageFont\nfrom PyQt5.QtCore imp"
},
{
"path": "app/view/task_creation_interface.py",
"chars": 13394,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport sys\nfrom urllib.parse import urlparse\n\nfrom PyQt5.QtCore import QStandardPaths,"
},
{
"path": "app/view/transcription_interface.py",
"chars": 20331,
"preview": "# -*- coding: utf-8 -*-\n\nimport datetime\nimport os\nimport sys\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom"
},
{
"path": "app/view/video_synthesis_interface.py",
"chars": 20834,
"preview": "# -*- coding: utf-8 -*-\n\nimport os\nimport sys\nfrom pathlib import Path\n\nfrom PyQt5.QtCore import Qt, pyqtSignal\nfrom PyQ"
},
{
"path": "docs/.vitepress/config.mts",
"chars": 14277,
"preview": "import { defineConfig } from 'vitepress'\n\nexport default defineConfig({\n title: 'VideoCaptioner',\n description: '基于大语言"
},
{
"path": "docs/.vitepress/theme/CustomHome.vue",
"chars": 14131,
"preview": "<template>\n <div class=\"custom-home\">\n <!-- Hero Section -->\n <section class=\"hero-section\">\n <div class=\"he"
},
{
"path": "docs/.vitepress/theme/custom.css",
"chars": 8816,
"preview": "/**\n * VideoCaptioner Custom Theme\n * Inspired by Anthropic's modern, elegant design\n */\n\n/* ===== Color Variables ====="
},
{
"path": "docs/.vitepress/theme/index.ts",
"chars": 230,
"preview": "import DefaultTheme from \"vitepress/theme\";\nimport CustomHome from \"./CustomHome.vue\";\nimport \"./custom.css\";\n\nexport de"
},
{
"path": "docs/README.md",
"chars": 4119,
"preview": "# VideoCaptioner 文档\n\n这是 VideoCaptioner 项目的文档源文件,使用 [VitePress](https://vitepress.dev/) 构建。\n\n## 📚 在线查看\n\n文档已自动部署到 GitHub P"
},
{
"path": "docs/config/asr.md",
"chars": 332,
"preview": "# ASR 配置指南\n\n语音识别(ASR)配置详解。\n\n## 支持的 ASR 引擎\n\n| 引擎 | 特点 | 推荐场景 |\n|------|------|---------|\n| **FasterWhisper** | 准确度高,支持GPU"
},
{
"path": "docs/config/cookies.md",
"chars": 250,
"preview": "# Cookie 配置指南\n\n配置 Cookie 以下载高清视频。\n\n## 何时需要配置 Cookie?\n\n在以下情况下需要配置 Cookie:\n\n1. 下载视频网站需要登录信息\n2. 只能下载较低分辨率的视频\n3. 网络条件较差时需要验证"
},
{
"path": "docs/config/llm.md",
"chars": 6615,
"preview": "---\ntitle: LLM 配置指南 - VideoCaptioner\ndescription: 详细的 LLM API 配置教程,支持 OpenAI、DeepSeek、SiliconCloud、Gemini、Ollama 等多种服务商。"
},
{
"path": "docs/config/translator.md",
"chars": 305,
"preview": "# 翻译配置指南\n\n字幕翻译配置详解。\n\n## 支持的翻译服务\n\n| 服务 | 特点 | 推荐场景 |\n|------|------|---------|\n| **LLM 翻译** | 质量最好 | 追求质量 |\n| **Bing 翻译**"
},
{
"path": "docs/dev/api.md",
"chars": 333,
"preview": "# API 文档\n\n核心 API 接口文档。\n\n## ASR API\n\n### `transcribe()`\n\n```python\nfrom app.core.asr import transcribe\n\nresult = transcri"
},
{
"path": "docs/dev/architecture.md",
"chars": 591,
"preview": "# 架构设计\n\nVideoCaptioner 的系统架构设计。\n\n## 技术栈\n\n- **UI 框架**: PyQt5 + QFluentWidgets\n- **ASR 引擎**: Whisper (FasterWhisper/Whispe"
},
{
"path": "docs/dev/asr-chunk-merger.md",
"chars": 4826,
"preview": "# ChunkMerger 使用指南\n\n## 概述\n\n`ChunkMerger` 用于合并多个音频分块的 ASR(语音识别)结果。当处理长音频时,通常需要将音频分割成多个片段分别识别,然后合并结果。本模块使用精确文本匹配算法(基于 Groq"
},
{
"path": "docs/dev/asr-chunked-usage.md",
"chars": 5093,
"preview": "# ChunkedASR 使用指南\n\n## 概述\n\n`ChunkedASR` 是一个装饰器类,为任何 `BaseASR` 实现添加音频分块转录能力。适用于长音频(>20分钟)的分块转录,避免 API 超时或内存溢出。\n\n## 核心特性\n\n-"
},
{
"path": "docs/dev/contributing.md",
"chars": 581,
"preview": "# 贡献指南\n\n感谢你对 VideoCaptioner 的贡献!\n\n## 开发环境设置\n\n1. Fork 本仓库\n2. 克隆你的 Fork\n3. 安装开发依赖\n\n```bash\ngit clone https://github.com/YO"
},
{
"path": "docs/dev/translate-module.md",
"chars": 3676,
"preview": "# 翻译模块 (Translate Module)\n\n多语言字幕翻译模块,支持多种翻译服务。\n\n## 模块结构\n\n```\napp/core/translate/\n├── __init__.py # 模块导出\n├──"
},
{
"path": "docs/dev/view-structure.md",
"chars": 704,
"preview": "view/ 目录结构:用户界面 (UI) 模块 \n\n下面是本软件的一个主要页面结构,方便开发者查看和修改。\n\n\n```\n├── main_window.py ------------------ 主窗口 (应用程序框架)\n│ │\n"
},
{
"path": "docs/en/config/asr.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/config/cookies.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/config/llm.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/config/translator.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/dev/api.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/dev/architecture.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/dev/contributing.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/guide/batch-processing.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/guide/configuration.md",
"chars": 54,
"preview": "# Configuration\n\nEnglish documentation coming soon...\n"
},
{
"path": "docs/en/guide/faq.md",
"chars": 44,
"preview": "# FAQ\n\nEnglish documentation coming soon...\n"
},
{
"path": "docs/en/guide/getting-started.md",
"chars": 102,
"preview": "# Getting Started\n\nEnglish documentation coming soon...\n\nPlease refer to the Chinese version for now.\n"
},
{
"path": "docs/en/guide/manuscript.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/guide/subtitle-style.md",
"chars": 0,
"preview": ""
},
{
"path": "docs/en/guide/workflow.md",
"chars": 49,
"preview": "# Workflow\n\nEnglish documentation coming soon...\n"
},
{
"path": "docs/en/index.md",
"chars": 6227,
"preview": "---\nlayout: home\ntitle: VideoCaptioner - AI Video Subtitle Tool | Free & Open Source\ntitleTemplate: false\ndescription: F"
},
{
"path": "docs/guide/configuration.md",
"chars": 148,
"preview": "# 配置指南\n\n详细的配置选项说明。\n\n## 全局配置\n\n待补充...\n\n## 高级配置\n\n待补充...\n\n---\n\n更多配置细节,请参考:\n- [LLM 配置](/config/llm)\n- [ASR 配置](/config/asr)\n-"
},
{
"path": "docs/guide/cookies-config.md",
"chars": 3203,
"preview": "# Cookie 配置指南\n\n本指南将帮助你配置浏览器 Cookie,以便下载需要登录才能访问的视频。\n\n## 为什么需要配置 Cookie?\n\n在使用 VideoCaptioner 下载视频时,你可能会遇到以下错误:\n\n中的安装步骤。\n\n## 使用问题\n\n### Q: 转录时出现幻觉或重复怎么办?\n\nA:"
},
{
"path": "docs/guide/getting-started.md",
"chars": 5907,
"preview": "---\ntitle: 快速开始 - VideoCaptioner\ndescription: 快速安装和配置 VideoCaptioner,5分钟开始处理你的第一个视频字幕。支持 Windows、macOS、Linux 多平台。\nhead:\n"
},
{
"path": "docs/guide/llm-config.md",
"chars": 3601,
"preview": "# LLM API 配置指南\n\n本指南将帮助你配置大语言模型(LLM)API,用于字幕的智能断句、优化和翻译。\n\n## 为什么需要配置 LLM?\n\nVideoCaptioner 使用 LLM 提供以下核心功能:\n\n- **智能断句** - "
},
{
"path": "docs/guide/quick-example.md",
"chars": 5687,
"preview": "# 快速示例教程\n\n通过一个 TED 演讲视频的完整处理流程,快速了解 VideoCaptioner 的强大功能。\n\n:::tip 示例视频信息\n\n- 视频时长:14 分钟\n- 原始语言:英语\n- 目标语言:简体中文\n- 总处理时间:约 4"
},
{
"path": "docs/guide/workflow.md",
"chars": 187,
"preview": "# 工作流程\n\n了解 VideoCaptioner 的完整工作流程。\n\n## 处理流程图\n\n```\n视频输入 → 语音识别 → 字幕分割 → 字幕优化 → 字幕翻译 → 视频合成\n```\n\n## 详细说明\n\n待补充...\n\n---\n\n相关文"
},
{
"path": "docs/index.md",
"chars": 1256,
"preview": "---\nlayout: page\ntitle: VideoCaptioner - 基于LLM的智能视频字幕处理工具\ntitleTemplate: false\ndescription: 免费开源的AI视频字幕处理助手,支持Whisper语音识"
},
{
"path": "docs/package-lock.json",
"chars": 88024,
"preview": "{\n \"name\": \"videocaptioner-docs\",\n \"version\": \"1.4.0\",\n \"lockfileVersion\": 3,\n \"requires\": true,\n \"packages\": {\n "
},
{
"path": "docs/package.json",
"chars": 388,
"preview": "{\n \"name\": \"videocaptioner-docs\",\n \"version\": \"1.4.0\",\n \"description\": \"Documentation site for VideoCaptioner\",\n \"pr"
},
{
"path": "docs/public/BingSiteAuth.xml",
"chars": 96,
"preview": "<?xml version=\"1.0\"?>\n<users>\n <user><!-- 需要时在 Bing Webmaster Tools 中获取验证码 --></user>\n</users>\n"
},
{
"path": "docs/public/robots.txt",
"chars": 144,
"preview": "# https://www.robotstxt.org/robotstxt.html\nUser-agent: *\nAllow: /\n\n# Sitemaps\nSitemap: https://weifeng2333.github.io/Vid"
},
{
"path": "legacy-docs/README_EN.md",
"chars": 15366,
"preview": "<div align=\"center\">\n <img src=\"./images/logo.png\"alt=\"VideoCaptioner Logo\" width=\"100\">\n <p>Kaka Subtitle Assistant</"
},
{
"path": "legacy-docs/README_JA.md",
"chars": 9408,
"preview": "<div align=\"center\">\n <img src=\"./images/logo.png\" alt=\"VideoCaptioner ロゴ\" width=\"100\">\n <p>Kaka カカ字幕アシスタント</p>\n <h1>"
},
{
"path": "legacy-docs/README_TW.md",
"chars": 10067,
"preview": "<div align=\"center\">\n <img src=\"./images/logo.png\" alt=\"VideoCaptioner Logo\" width=\"100\">\n <p>卡卡字幕助手</p>\n <h1>VideoCa"
},
{
"path": "legacy-docs/about_chunk_merge.md",
"chars": 107,
"preview": "\nhttps://github.com/groq/groq-api-cookbook/blob/main/tutorials/audio-chunking/audio_chunking_tutorial.ipynb"
},
{
"path": "legacy-docs/get_cookies.md",
"chars": 764,
"preview": "# Cookie 配置说明\n\n## 问题说明\n在使用软件下载视频时,可能会遇到以下错误提示:\n\n\n\n这是因为:\n1. 某些视频平台(如B站)需要用户登录信息才能获取高"
},
{
"path": "legacy-docs/llm_config.md",
"chars": 1434,
"preview": "\n目前国内多家大模型厂商都提供了API接口,可以自行申请。也可以使用中转站,使用 OpenAI 或 Claude的API。\n\n本教程以两种配置方式为例进行说明:\n\n[SiliconFlow-API 配置](./llm_config.md#S"
},
{
"path": "legacy-docs/test.md",
"chars": 3888,
"preview": "### 使用 Whisper 转录\n\n\n### 转录成功以后的字幕\n```\n1\n00:00:02,080 --> 00:00:08,600\nSo in college, I wa"
},
{
"path": "main.py",
"chars": 2589,
"preview": "\"\"\"\nCopyright (c) 2024 [VideoCaptioner]\nAll rights reserved.\n\nAuthor: Weifeng\n\"\"\"\n\nimport os\nimport platform\nimport sys\n"
},
{
"path": "pyproject.toml",
"chars": 3378,
"preview": "[project]\nname = \"videocaptioner\"\nversion = \"1.3.3\"\ndescription = \"AI-powered video captioning tool based on LLM\"\nreadme"
},
{
"path": "resource/assets/qss/dark/demo.qss",
"chars": 223,
"preview": "QWidget {\n border: 1px solid rgb(29, 29, 29);\n border-right: none;\n border-bottom: none;\n border-top-left-ra"
},
{
"path": "resource/assets/qss/light/demo.qss",
"chars": 300,
"preview": "Widget > QLabel {\n font: 24px 'Segoe UI', 'Microsoft YaHei';\n}\n\nWidget {\n border: 1px solid rgb(229, 229, 229);\n "
},
{
"path": "resource/subtitle_style/default.json",
"chars": 234,
"preview": "{\n \"font_name\": \"LXGW WenKai\",\n \"font_size\": 32,\n \"text_color\": \"#000000\",\n \"bg_color\": \"#0de3ffe5\",\n \"corner_radiu"
},
{
"path": "resource/subtitle_style/default.txt",
"chars": 462,
"preview": "[V4+ Styles]\nFormat: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline"
},
{
"path": "resource/subtitle_style/毕导科普风.txt",
"chars": 460,
"preview": "[V4+ Styles]\nFormat: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline"
},
{
"path": "resource/subtitle_style/番剧可爱风.txt",
"chars": 460,
"preview": "[V4+ Styles]\nFormat: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline"
},
{
"path": "resource/subtitle_style/竖屏.txt",
"chars": 462,
"preview": "[V4+ Styles]\nFormat: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline"
},
{
"path": "resource/translations/VideoCaptioner_en_US.ts",
"chars": 103406,
"preview": "<?xml version='1.0' encoding='utf-8'?>\n<TS version=\"2.1\">\n<context>\n <name>BatchProcessInterface</name>\n <message>"
},
{
"path": "resource/translations/VideoCaptioner_zh_CN.qm",
"chars": 7,
"preview": "<d\u0018!\u001c`"
},
{
"path": "resource/translations/VideoCaptioner_zh_CN.ts",
"chars": 99089,
"preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!DOCTYPE TS>\n<TS version=\"2.1\">\n<context>\n <name>BatchProcessInterface</name>"
},
{
"path": "resource/translations/VideoCaptioner_zh_HK.ts",
"chars": 94786,
"preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!DOCTYPE TS>\n<TS version=\"2.1\">\n<context>\n <name>BatchProcessInterface</name>"
},
{
"path": "scripts/lint.sh",
"chars": 485,
"preview": "#!/bin/bash\n# Clean unused imports and sort import order in Python project\n\necho \"🧹 Cleaning unused imports...\"\n\n# Remov"
},
{
"path": "scripts/run.bat",
"chars": 3563,
"preview": "@echo off\nchcp 65001 >nul\nsetlocal EnableDelayedExpansion\n\n:: VideoCaptioner Installer & Launcher for Windows\n:: Usage: "
},
{
"path": "scripts/run.sh",
"chars": 4750,
"preview": "#!/bin/bash\n# VideoCaptioner Installer & Launcher for macOS/Linux\n# Usage: curl -fsSL https://raw.githubusercontent.com/"
},
{
"path": "scripts/trans-compile.sh",
"chars": 1791,
"preview": "#!/bin/bash\n# Compile .ts translation files to .qm binary files\n# Usage: ./scripts/trans-compile.sh [language_code]\n# "
},
{
"path": "scripts/trans-extract.sh",
"chars": 2285,
"preview": "#!/bin/bash\n# Extract translation strings from Python code to .ts files\n# Auto-removes obsolete entries\n# Usage: ./scrip"
},
{
"path": "scripts/translate_llm.py",
"chars": 9274,
"preview": "#!/usr/bin/env python3\n\"\"\"\nTranslate .ts files using OpenAI Structured Outputs\n\nEnsures 1:1 mapping between source and t"
},
{
"path": "tests/README.md",
"chars": 2612,
"preview": "# 测试套件\n\nVideoCaptioner 翻译模块的集成测试。\n\n## 📁 测试文件\n\n```\ntests/test_translate/\n├── test_google_translator.py # Google 翻译器(免费 "
},
{
"path": "tests/__init__.py",
"chars": 39,
"preview": "\"\"\"\n测试套件\n\n用于测试 VideoCaptioner 核心功能\n\"\"\"\n"
},
{
"path": "tests/conftest.py",
"chars": 14060,
"preview": "\"\"\"Root-level test configuration and shared fixtures.\n\nThis conftest.py provides shared fixtures and utilities for all t"
},
{
"path": "tests/fixtures/README.md",
"chars": 1563,
"preview": "# Test Fixtures\n\nThis directory contains shared test resources used across multiple test modules.\n\n## Structure\n\n```\ntes"
},
{
"path": "tests/test_asr/README.md",
"chars": 4162,
"preview": "# ASR Integration Tests\n\nThis directory contains integration tests for various ASR (Automatic Speech Recognition) servic"
},
{
"path": "tests/test_asr/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/test_asr/conftest.py",
"chars": 3517,
"preview": "\"\"\"ASR-specific fixtures and utilities for integration tests.\n\nThis conftest.py provides ASR-specific fixtures that are "
},
{
"path": "tests/test_asr/test_asr_data.py",
"chars": 16194,
"preview": "\"\"\"ASRData 核心功能测试 - 严格边缘用例\"\"\"\n\nimport tempfile\nfrom pathlib import Path\n\nimport pytest\n\nfrom app.core.asr.asr_data impor"
},
{
"path": "tests/test_asr/test_bcut_asr.py",
"chars": 4807,
"preview": "\"\"\"BcutASR integration tests.\"\"\"\n\nfrom pathlib import Path\n\nimport pytest\n\nfrom app.core.asr import BcutASR\nfrom app.cor"
},
{
"path": "tests/test_asr/test_chunk_merger.py",
"chars": 24740,
"preview": "\"\"\"ChunkMerger 真实场景测试套件\n\n测试策略:\n1. 使用真实的 ASR 输出场景(句子级 + 字/词级)\n2. 覆盖中文、英文、中英混合场景\n3. 测试 ASR 识别错误的真实 bad cases\n4. 直接验证合并后的完整"
},
{
"path": "tests/test_asr/test_chunked_asr.py",
"chars": 15249,
"preview": "\"\"\"ChunkedASR 全面测试\n\n测试策略:\n1. 使用 Mock ASR 避免实际 API 调用\n2. 覆盖所有核心功能(分块、并发、合并)\n3. 测试边界情况(短音频、单块、错误等)\n4. 验证进度回调机制\n5. 确保线程安全和并"
},
{
"path": "tests/test_asr/test_chunking.py",
"chars": 11850,
"preview": "\"\"\"音频分块 ASR 功能的真实场景测试\n\n测试覆盖:\n1. 音频切割功能(pydub)\n2. 并发转录功能(ThreadPoolExecutor)\n3. 结果合并功能(ChunkMerger)\n4. 边界情况(短音频、单块、空音频等)\n"
},
{
"path": "tests/test_asr/test_jianying_asr.py",
"chars": 4994,
"preview": "\"\"\"JianYingASR integration tests.\"\"\"\n\nfrom pathlib import Path\n\nimport pytest\n\nfrom app.core.asr import JianYingASR\nfrom"
},
{
"path": "tests/test_asr/test_whisper_api_asr.py",
"chars": 3583,
"preview": "\"\"\"WhisperAPI integration tests.\"\"\"\n\nimport os\nfrom pathlib import Path\n\nimport pytest\n\nfrom app.core.asr import Whisper"
},
{
"path": "tests/test_optimize/test_optimize.py",
"chars": 3513,
"preview": "\"\"\"Subtitle optimizer tests.\n\nRequires environment variables:\n OPENAI_BASE_URL: OpenAI-compatible API endpoint\n OP"
},
{
"path": "tests/test_split/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/test_split/test_alignment.py",
"chars": 2847,
"preview": "\"\"\"字幕对齐模块测试\n\n测试 app/core/split/alignment.py 中的核心功能\n\"\"\"\n\nimport pytest\n\nfrom app.core.split.alignment import SubtitleAlig"
},
{
"path": "tests/test_split/test_split.py",
"chars": 14899,
"preview": "\"\"\"字幕分割模块测试 - 严格边缘用例\n\n测试 app/core/split/split.py 中的核心功能\n\"\"\"\n\nimport pytest\n\nfrom app.core.asr.asr_data import ASRData, A"
}
]
// ... and 23 more files (download for full content)
About this extraction
This page contains the full source code of the WEIFENG2333/VideoCaptioner GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 223 files (1.5 MB), approximately 430.6k tokens, and a symbol index with 1434 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.