Repository: HFrost0/Lighting-bilibili-download Branch: master Commit: bb5b234cdfe3 Files: 95 Total size: 235.6 KB Directory structure: gitextract_wman3yls/ ├── .github/ │ └── workflows/ │ ├── doc-deploy.yml │ ├── python-app.yml │ └── python-publish.yml ├── .gitignore ├── CONTRIBUTING.md ├── CONTRIBUTING_EN.md ├── LICENSE ├── README.md ├── bilix/ │ ├── __init__.py │ ├── __main__.py │ ├── _process.py │ ├── cli/ │ │ ├── assign.py │ │ └── main.py │ ├── download/ │ │ ├── base_downloader.py │ │ ├── base_downloader_m3u8.py │ │ ├── base_downloader_part.py │ │ └── utils.py │ ├── exception.py │ ├── ffmpeg.py │ ├── log.py │ ├── progress/ │ │ ├── abc.py │ │ ├── cli_progress.py │ │ └── ws_progress.py │ ├── sites/ │ │ ├── bilibili/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── api_test.py │ │ │ ├── downloader.py │ │ │ ├── downloader_test.py │ │ │ ├── informer.py │ │ │ ├── informer_test.py │ │ │ ├── utils.py │ │ │ └── utils_test.py │ │ ├── cctv/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── api_test.py │ │ │ └── downloader.py │ │ ├── douyin/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── api_test.py │ │ │ ├── downloader.py │ │ │ └── downloader_test.py │ │ ├── hanime1/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── api_test.py │ │ │ └── downloader.py │ │ ├── jable/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── api_test.py │ │ │ └── downloader.py │ │ ├── tiktok/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── api_test.py │ │ │ ├── downloader.py │ │ │ └── downloader_test.py │ │ ├── yhdmp/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── api_test.py │ │ │ ├── downloader.py │ │ │ └── yhdmp.js │ │ ├── yinghuacd/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── api_test.py │ │ │ └── downloader.py │ │ └── youtube/ │ │ ├── __init__.py │ │ ├── api.py │ │ ├── api_test.py │ │ └── downloader.py │ └── utils.py ├── docs/ │ ├── .vitepress/ │ │ ├── config.ts │ │ └── theme/ │ │ ├── index.ts │ │ └── style/ │ │ └── var.css │ ├── advance_guide.md │ ├── api_examples.md │ ├── async.md │ ├── download_examples.md │ ├── en/ │ │ ├── advance_guide.md │ │ ├── api_examples.md │ │ ├── async.md │ │ ├── download_examples.md │ │ ├── index.md │ │ ├── install.md │ │ ├── more.md │ │ └── quickstart.md │ ├── index.md │ ├── install.md │ ├── more.md │ ├── package.json │ └── quickstart.md ├── examples/ │ ├── a_very_simple_example.py │ ├── download_by_timerange.py │ ├── limit_download_rate.py │ ├── multi_site_download_same_time.py │ ├── multi_type_tasks.py │ └── use_of_api.py └── pyproject.toml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/doc-deploy.yml ================================================ name: Document Deploy on: workflow_dispatch: { } push: paths: - 'docs/**' branches: - master jobs: deploy: runs-on: ubuntu-latest permissions: pages: write id-token: write environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - uses: actions/setup-node@v3 with: node-version: 16 cache: 'npm' cache-dependency-path: docs/package-lock.json - name: Install dependencies and build run: | npm ci npm run docs:build working-directory: docs - uses: actions/configure-pages@v2 - uses: actions/upload-pages-artifact@v1 with: path: docs/.vitepress/dist - name: Deploy id: deployment uses: actions/deploy-pages@v1 ================================================ FILE: .github/workflows/python-app.yml ================================================ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Python application on: push: paths: - '.github/workflows/python-app.yml' - 'bilix/**' - 'pyproject.toml' branches: [ "master" ] pull_request: paths: - '.github/workflows/python-app.yml' - 'bilix/**' - 'pyproject.toml' branches: [ "master" ] permissions: contents: read jobs: build: runs-on: ubuntu-latest strategy: # You can use PyPy versions in python-version. # For example, pypy-2.7 and pypy-3.8 matrix: python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip if [ -f requirements.txt ]; then pip install -r requirements.txt; fi pip install -e . ================================================ FILE: .github/workflows/python-publish.yml ================================================ # This workflow will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries # This workflow uses actions that are not certified by GitHub. # They are provided by a third-party and are governed by # separate terms of service, privacy policy, and support # documentation. name: Upload Python Package on: release: types: [published] permissions: contents: read jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v3 with: python-version: '3.x' - name: Install dependencies run: | python -m pip install --upgrade pip pip install build - name: Build package run: python -m build - name: Publish package uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} ================================================ FILE: .gitignore ================================================ .idea .vscode .fleet .pytest_cache videos __pycache__/ *.egg-info/ *.pyc venv*/ build/ dist/ docs/.vitepress/dist docs/.vitepress/cache node_modules .venv ================================================ FILE: CONTRIBUTING.md ================================================ # bilix 开发指南 感谢你对贡献bilix有所兴趣,在你开始之前可以阅读下面的一些提示。请注意,bilix正快速迭代, 如果你在阅读本文档时发现有些内容已经过时,请以master分支的代码为准。 # 开始之前 在一切开始之前,你需要先 **fork** 本仓库,然后clone你fork的仓库到你的本地: ```shell git clone https://github.com/your_user_name/bilix ``` 拉取至本地后,我**建议**你在独立的python环境中进行测试和开发,确认后进行本地源码可编辑安装: ```shell pip install -e . ``` 试试bilix命令能否正常执行。通过测试了?至此,你可以在本地开发bilix了🍻 # bilix 结构 在动手改动代码之前你需要对bilix的结构有一定的了解,下面是bilix的大致目录和各模块相应功能: ```text bilix ├── __init__.py ├── __main__.py ├── _process.py # 多进程相关 ├── cli │   ├── assign.py # 分配任务,动态导入相关 │   └── main.py # 命令行入口 ├── download │   ├── base_downloader.py │   ├── base_downloader_m3u8.py # 基础m3u8下载器 │   ├── base_downloader_part.py # 基础分段文件下载器 │   └── utils.py # 下载相关的一些工具函数 ├── exception.py ├── log.py ├── progress │   ├── abc.py # 进度条抽象类 │   ├── cli_progress.py # 命令行进度条 │   └── ws_progress.py ├── serve │   ├── __init__.py │   ├── app.py │   ├── auth.py │   ├── serve.py │   └── user.py ├── sites # 站点扩展目录,稍后介绍 └── utils.py # 通用工具函数 ``` ## 基础下载器 bilix在`bilix.download`中提供了两种基础下载器,m3u8下载器和分段文件下载器。 它们基于`httpx`乃至更底层的`asyncio`及IO多路复用,并且集成了速度控制,并发控制,断点续传,时间段切片,进度条显示等许多实用功能。 bilix的站点扩展下载功能都将基于这些基础下载器完成,基础下载器本身也提供cli服务 ## 下载器是如何提供cli服务的 在bilix中,一个类只要实现了`handle`方法,就可以被注册到命令行(cli)中,`handle`方法的函数签名为 ```python @classmethod def handle(cls, method: str, keys: Tuple[str, ...], options: dict): ... ``` handle函数的实现应该满足下面三个原则: 1. 如果类根据`method` `keys` `options`认为自己不应该承担下载任务,`handle`函数应该返回`None` 2. 如果类可以承担任务,但发现`method`不在自己的可接受范围内,应该抛出`HandleMethodError`异常 3. 如果类可以承担任务,且`method`在自己的可接受范围内,应该返回两个值,第一个值为下载器实例,第二个值为下载coroutine Q:🙋为什么我看到有的下载器返回的是类本身,以及下载函数对象? ```python @classmethod def handle(cls, method: str, keys: Tuple[str, ...], options: dict): if method == 'f' or method == 'get_file': return cls, cls.get_file ``` A:为了偷懒,如果返回值是类以及下载函数对象,将根据命令行参数及type hint自动组装为实例和coroutine, 适用于当命令行options的名字和方法,类参数名字、类型一致的情况 其实`handle`函数给你了较大的自由,你可以根据自己的需求,自由的组合出适合你的下载器的cli服务 ## 如何快速添加一个站点的支持 在`bilix/sites`下,已经有一些站点的支持,如果你想要添加一个新的站点支持,可以按照下面的步骤进行: 1. 在`sites`文件夹下新建一个站点文件夹,例如`example` 2. 在`example`文件夹下添加站点的api模块`api.py`,仿照其他站点的格式实现从输入网页url到输出视频url,视频title的各种api 3. 在`example`文件夹下添加站点api模块的测试`api_test.py`,让大家随时测试站点是否可用 4. 在`example`文件夹下添加站点的下载器`donwloader.py`,定义`DownloaderExample` 类,根据该站点使用的传输方法选择相应的`BaseDownloader`进行继承,然后在类中定义好下载视频的方法,并实现`handle` 方法。另外你还可以添加`downloader_test.py`来验证你的下载器是否可用 5. 在`example`文件夹下添加`__init__.py`,将`DownloaderExample`类导入,并且在`__all__`中添加`DownloaderExample`以方便bilix找到你的下载器 搞定,使用bilix命令测试一下吧 当前已经有其他开发者为bilix对其他站点的适配做出了贡献🎉, 或许被接受的[New site PR](https://github.com/HFrost0/bilix/pulls?q=is%3Apr+is%3Aclosed+label%3A%22New+site%22)也能为你提供帮助 ================================================ FILE: CONTRIBUTING_EN.md ================================================ # Development guide of bilix Thank you for your interest in contributing to bilix. Before you start, you can read some tips below. Please note that bilix is rapidly iterating, if you find some content outdated while reading this document, please refer to the code of the master branch. # Before starting Before everything starts, you need to first **fork** this repository, and then clone your fork: ```shell git clone https://github.com/your_user_name/bilix ``` After clone, I **recommend** you to test and develop in an independent python environment, and then perform local source editable installation after that: ```shell pip install -e . ``` Try whether the `bilix` command can be executed normally. Passed the test? At this point, you can develop bilix locally🍻 # Structure of bilix Before making any changes to the code, you need to have some understanding of the structure of bilix. ```text bilix ├── __init__.py ├── __main__.py ├── _process.py # related to multiprocessing ├── cli │   ├── assign.py # assign tasks, dynamically import related │   └── main.py # command line entry ├── download │   ├── base_downloader.py │   ├── base_downloader_m3u8.py # basic m3u8 downloader │   ├── base_downloader_part.py # basic segmented file downloader │   └── utils.py # some utils for download ├── exception.py ├── log.py ├── progress │   ├── abc.py # abstract class of progress │   ├── cli_progress.py # progress for cli │   └── ws_progress.py ├── serve │   ├── __init__.py │   ├── app.py │   ├── auth.py │   ├── serve.py │   └── user.py ├── sites # site support └── utils.py # some utils ``` # BaseDownloader bilix provides two basic downloaders in `bilix.download`, m3u8 downloader and content range file downloader. They are based on `httpx` and even lower-level `asyncio` and IO multiplexing, and integrate many practical functions such as speed control, concurrency control, download resume, time range clip, and progress bar display. The site extension of bilix will be based on these basic downloaders, and the basic downloaders themselves also provide cli services # How does the downloader provide cli service In bilix, as long as a class implements the `handle` method, it can be registered in the command line interface (cli). The function signature of the `handle` method is ```python @classmethod def handle(cls, method: str, keys: Tuple[str, ...], options: dict): ... ``` The implementation of the `handle` function should meet the following three principles: 1. If the class thinks that it should not be assigned the download task according to `method` `keys` `options`, the `handle` function should return `None` 2. If the class can be assigned the task, but finds that the `method` is not within its acceptable range, it should raise a `HandleMethodError` exception 3. If the class can handle the task, and `method` is within its acceptable range, it should return two values, the first value is the downloader instance, and the second value is the download coroutine Q: 🙋Why do I see that some downloaders return the class itself and the download function object? ```python @classmethod def handle(cls, method: str, keys: Tuple[str, ...], options: dict): if method == 'f' or method == 'get_file': return cls, cls.get_file ``` A: Just for easy, if the return value is a class and the function object, it will be automatically assembled into an instance and coroutine according to the command line arguments, options and type hint. # How to add support for a site Under `bilix/sites`, there are already some sites supported, if you want to add a new site support, you can follow the steps below: 1. Create a new site folder under the `sites` folder, such as `example` 2. Add the site's api module `api.py` under the `example` folder, and follow the format of other sites to implement various APIs from input webpage url to output video url and video title 3. Add the site api module test `api_test.py` under the `example` folder, so that everyone can test whether the site is available at any time 4. Add the site downloader `donwloader.py` under the `example` folder, define `DownloaderExample` Class, select the corresponding `BaseDownloader` to inherit according to the site, then define the method of downloading the video in the class, and implement `handle` method. 5. Add `__init__.py` under the `example` folder, import `DownloaderExample` class, and add `DownloaderExample` in `__all__` to facilitate bilix to find your downloader Okay, let's test it At present, other developers have contributed to the extension of bilix to other sites🎉, Maybe the accepted [New site PR](https://github.com/HFrost0/bilix/pulls?q=is%3Apr+is%3Aclosed+label%3A%22New+site%22) can also help you ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [HFrost0] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # bilix [![GitHub license](https://img.shields.io/github/license/HFrost0/bilix?style=flat-square)](https://github.com/HFrost0/bilix/blob/master/LICENSE) ![PyPI](https://img.shields.io/pypi/v/bilix?style=flat-square&color=blue) ![GitHub commit activity](https://img.shields.io/github/commit-activity/m/HFrost0/bilix) ![PyPI - Downloads](https://img.shields.io/pypi/dm/bilix?label=pypi%20downloads&style=flat-square) ⚡️Lightning-fast asynchronous download tool for bilibili and more ## Features ### ⚡️ Fast & Async Asynchronous high concurrency support, controllable concurrency and speed settings. ### 😉 Lightweight & User-friendly Lightweight user-friendly CLI with progress notification, focusing on core functionality. ### 📝 Fully-featured Submissions, anime, TV Series, video clip, audio, favourite, danmaku ,cover... ### 🔨 Extensible Extensible Python module suitable for more download scenarios. ## Install ```shell pip install bilix ``` for macOS, you can also install `bilix` by `brew` ```shell brew install bilix ``` ## Usage Example * If you prefer to use command line interface (cli) ```shell bilix v 'url' ``` > `v` is a method short alias for `get_video` * If you prefer to code with python ```python from bilix.sites.bilibili import DownloaderBilibili import asyncio async def main(): async with DownloaderBilibili() as d: await d.get_video('url') asyncio.run(main()) ``` ## Community If you find any bugs or other issues, feel free to raise an [Issue](https://github.com/HFrost0/bilix/issues). If you have new ideas or new feature requests👍,welcome to participate in the [Discussion](https://github.com/HFrost0/bilix/discussions) If you find this project helpful, you can support the author by [Star](https://github.com/HFrost0/bilix/stargazers)🌟 ## Contribute ❤️ Welcome! Details can be found in [Contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING_EN.md) ================================================ FILE: bilix/__init__.py ================================================ """ Lighting-fast async download tool inspired by w """ __version__ = "0.18.9" __url__ = "https://github.com/HFrost0/bilix" ================================================ FILE: bilix/__main__.py ================================================ from bilix.cli.main import main main() ================================================ FILE: bilix/_process.py ================================================ import signal import sys from concurrent.futures import ProcessPoolExecutor from functools import partial def _init(): def shutdown(*args): sys.exit(0) signal.signal(signal.SIGINT, shutdown) def singleton(cls): _instance = {} def inner(*args, **kwargs): if cls not in _instance: _instance[cls] = cls(*args, **kwargs) return _instance[cls] return inner # singleton ProcessPoolExecutor to avoid recreation in spawn process SingletonPPE = singleton(partial(ProcessPoolExecutor, initializer=_init)) if __name__ == '__main__': p = SingletonPPE(max_workers=5) p.shutdown() ================================================ FILE: bilix/cli/assign.py ================================================ import asyncio import inspect import re import time from functools import wraps from pathlib import Path from typing import Callable, Union, Tuple from importlib import import_module from bilix.exception import HandleMethodError, HandleError from bilix.log import logger def kwargs_filter(obj: Union[type, Callable], kwargs: dict): """ :param obj: :param kwargs: :return: """ sig = inspect.signature(obj) obj_require = set(sig.parameters.keys()) def check(k): if k in obj_require: p = sig.parameters[k] # check type hint try: if p.annotation is inspect.Signature.empty or \ isinstance(kwargs[k], p.annotation): return True else: logger.debug(f"kwarg {k}:{kwargs[k]} has been drop due to type hint missmatch") return False except TypeError: # https://peps.python.org/pep-0604/#isinstance-and-issubclass # lower than 3.10, Union # TypeError: Subscripted generics cannot be used with class and instance checks return True return False kwargs = {k: kwargs[k] for k in filter(check, kwargs)} return kwargs def module_handle_funcs(module): """find and yield all handle func in module""" attrs = getattr(module, '__all__', None) attrs = attrs or dir(module) for attr_name in attrs: if attr_name.startswith('__'): continue executor_cls = getattr(module, attr_name) if not inspect.isclass(executor_cls): continue handle_func = getattr(executor_cls, 'handle', None) if handle_func is None: continue yield handle_func def auto_assemble(handle_func): @wraps(handle_func) def wrapped(cls, method: str, keys: Tuple[str, ...], options: dict): res = handle_func(cls, method, keys, options) if res is NotImplemented or res is None: return res executor, cor = res # handle func return class instead of instance if inspect.isclass(executor): kwargs = kwargs_filter(executor, options) executor = executor(**kwargs) logger.debug(f"auto assemble {executor} by {kwargs}") # handle func return async function instead of coroutine if inspect.iscoroutinefunction(cor): kwargs = kwargs_filter(cor, options) cors = [] for key in keys: if not hasattr(cor, '__self__'): # coroutine function has not bound to instance cors.append(cor(executor, key, **kwargs)) # bound executor to self else: cors.append(cor(key, **kwargs)) logger.debug(f"auto assemble {cor} by {kwargs}") cor = asyncio.gather(*cors) return executor, cor return wrapped def longest_common_len(str1, str2): m, n = len(str1), len(str2) dp = [[0] * (n + 1) for _ in range(m + 1)] max_length = 0 for i in range(1, m + 1): for j in range(1, n + 1): if str1[i - 1] == str2[j - 1]: dp[i][j] = dp[i - 1][j - 1] + 1 max_length = max(max_length, dp[i][j]) return max_length def find_sites(): sites_path = Path(__file__).parent.parent / 'sites' for site in sites_path.iterdir(): if not site.is_dir() or not (site / '__init__.py').exists(): continue yield site def assign(cli_kwargs): method = cli_kwargs.pop('method') keys = cli_kwargs.pop('keys') options = cli_kwargs modules = [ # path, cmp_key ('download.base_downloader_m3u8', 'm3u8'), ('download.base_downloader_part', 'file'), ] for site in find_sites(): modules.append((f"sites.{site.name}", site.name)) pattern = re.compile(r"https?://(?:[\w-]*\.)?([\w-]+)\.([\w-]+)") if g := pattern.search(keys[0]): cmp_base = g.group(1) else: cmp_base = keys[0] def key(x: Tuple[str, str]): if x[0].startswith("sites"): return longest_common_len(cmp_base, x[-1]) else: # base_downloader return longest_common_len(method, x[-1]) for module, _ in sorted(modules, key=key, reverse=True): a = time.time() try: module = import_module(f"bilix.{module}") except ImportError as e: logger.debug(f"duo to ImportError <{e}>, skip ") continue logger.debug(f"import cost {time.time() - a:.6f} s ") exc = None for handle_func in module_handle_funcs(module): try: res = handle_func(method, keys, options) except HandleMethodError as e: exc = e continue if res is NotImplemented or res is None: continue executor, cor = res logger.debug(f"Assign to {executor.__class__.__name__}") return executor, cor if exc is not None: # for the module, some handler can handle, but method miss match raise exc raise HandleError(f"Can't find any handler for method: '{method}' keys: {keys}") ================================================ FILE: bilix/cli/main.py ================================================ import asyncio import typing from pathlib import Path import click import rich from rich.panel import Panel from rich.table import Table from .. import __version__ from ..log import logger from .assign import assign from ..progress.cli_progress import CLIProgress from ..utils import parse_bytes_str, s2t from ..exception import HandleError def handle_help(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> None: if not value or ctx.resilient_parsing: return print_help() ctx.exit() def handle_version(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ) -> None: if not value or ctx.resilient_parsing: return print(f"Version {__version__}") ctx.exit() def handle_debug(ctx: click.Context, param: typing.Union[click.Option, click.Parameter], value: typing.Any, ): if not value or ctx.resilient_parsing: return from rich.traceback import install install() logger.setLevel('DEBUG') logger.debug("Debug on, more information will be shown") def print_help(): console = rich.console.Console() console.print(f"\n[bold]bilix {__version__}", justify="center") console.print("⚡️快如闪电的bilibili下载工具,基于Python现代Async特性,高速批量下载整部动漫,电视剧,up投稿等\n", justify="center") console.print("使用方法: bilix [cyan] [OPTIONS][/cyan] ", justify="left") table = Table.grid(padding=1, pad_edge=False) table.add_column("Parameter", no_wrap=True, justify="left", style="bold") table.add_column("Description") table.add_row( "[cyan]", 'get_series 或 s: 获取整个系列的视频(包括多p投稿,动漫,电视剧,电影,纪录片),也可以下载单个视频\n' 'get_video 或 v: 获取特定的单个视频,在用户不希望下载系列其他视频的时候可以使用\n' 'get_up 或 up: 获取某个up的所有投稿视频,支持数量选择,关键词搜索,排序\n' 'get_cate 或 cate: 获取分区视频,支持数量选择,关键词搜索,排序\n' 'get_favour 或 fav: 获取收藏夹内视频,支持数量选择,关键词搜索\n' 'get_collect 或 col:获取合集或视频列表内视频\n' 'info: 打印url所属资源的详细信息(例如点赞数,画质,编码格式等)' ) table.add_row( "[cyan][/cyan]", '如使用get_video/get_series,填写视频的url\n' '如使用get_up,填写b站用户空间页url或用户id\n' '如使用get_cate,填写分区名称\n' '如使用get_favour,填写收藏夹页url或收藏夹id\n' '如使用get_collect,填写合集或者视频列表详情页url\n' '如使用info,填写任意资源url' ) console.print(table) # console.rule("OPTIONS参数") table = Table(highlight=True, box=None, show_header=False) table.add_column("OPTIONS", no_wrap=True, justify="left", style="bold") table.add_column("type", no_wrap=True, justify="left", style="bold") table.add_column("Description", ) table.add_row( "-d --dir", '[dark_cyan]str', "文件的下载目录,默认当前路径下的videos文件夹下,不存在会自动创建" ) table.add_row( "-q --quality", '[dark_cyan]int | str', "视频画面质量,默认0为最高画质,越大画质越低,超出范围时自动选最低画质,或者直接使用字符串指定'1080p'等名称" ) table.add_row( "-vc --video-con", '[dark_cyan]int', "控制最大同时下载的视频数量,理论上网络带宽越高可以设的越高,默认3", ) table.add_row( "-pc --part-con", '[dark_cyan]int', "控制每个媒体的分段并发数,默认10", ) table.add_row( '--cookie', '[dark_cyan]str', '有条件的用户可以提供大会员的SESSDATA来下载会员视频' ) table.add_row( "-fb --from-browser", '[dark_cyan]str', '从哪个浏览器中导入cookies,例如safari,chrome,edge...默认无', ) table.add_row( '--days', '[dark_cyan]int', '过去days天中的结果,默认为7,仅get_up, get_cate时生效' ) table.add_row( "-n --num", '[dark_cyan]int', "下载前多少个投稿,仅get_up,get_cate,get_favor时生效", ) table.add_row( "--order", '[dark_cyan]str', '何种排序,pubdate发布时间(默认), click播放数,scores评论数,stow收藏数,coin硬币数,dm弹幕数, 仅get_up, get_cate时生效', ) table.add_row( "--keyword", '[dark_cyan]str', '搜索关键词, 仅get_up, get_cate,get_favor时生效', ) table.add_row( "-ns --no-series", '', '只下载搜索结果每个视频的第一p,仅get_up,get_cate,get_favour时生效', ) table.add_row( "-nh --no-hierarchy", '', '不使用层次目录,所有视频统一保存在下载目录下' ) table.add_row( "--image", '', '下载视频封面' ) table.add_row( "--subtitle", '', '下载srt字幕', ) table.add_row( "--dm", '', '下载弹幕', ) table.add_row( "-oa --only-audio", '', '仅下载音频,下载的音质固定为最高音质', ) table.add_row( "-p", '[dark_cyan]int, int', '下载集数范围,例如-p 1 3 只下载P1至P3,仅get_series时生效', ) table.add_row( "--codec", '[dark_cyan]str', '视频及音频编码(可使用info查看后填写,使用:分隔),可使用完整名称(例如avc1.640032,fLaC)或部分名称(例如avc,hev)', ) table.add_row( "-sl --speed-limit", '[dark_cyan]str', '最大下载速度,默认无限制。例如:-sl 1.5MB', ) table.add_row( "-sr --stream-retry", '[dark_cyan]int', '下载过程中发生网络错误后最大重试数,默认5', ) table.add_row( "-tr --time-range", '[dark_cyan]str', r'下载视频的时间范围,格式如 h:m:s-h:m:s 或 s-s,默认无,仅get_video时生效', ) table.add_row("-h --help", '', "帮助信息") table.add_row("-v --version", '', "版本信息") table.add_row("--debug", '', "显示debug信息") console.print(Panel(table, border_style="dim", title="Options", title_align="left")) class BasedQualityType(click.ParamType): name = "quality" def convert(self, value, param, ctx): try: value = int(value) except ValueError: return value # str if value in {1080, 720, 480, 360}: return str(value) else: return value # relative choice like 0, 1, 2, 999... class BasedSpeedLimit(click.ParamType): name = "speed_limit" def convert(self, value, param, ctx): if value is not None: return parse_bytes_str(value) class BasedTimeRange(click.ParamType): name = "time_range" def convert(self, value, param, ctx): start_time, end_time = map(s2t, value.split('-')) return start_time, end_time @click.command(add_help_option=False) @click.argument("method", type=str) @click.argument("keys", type=str, nargs=-1, required=True) @click.option( "-d", "--dir", "path", type=Path, default='videos', ) @click.option( '-q', '--quality', 'quality', type=BasedQualityType(), default=0, # default relatively choice ) @click.option( '-vc', '--video-con', 'video_concurrency', type=int, default=3, ) @click.option( '-pc', "--part-con", "part_concurrency", type=int, default=10, ) @click.option( '--cookie', 'cookie', type=str, ) @click.option( '--days', 'days', type=int, default=7, ) @click.option( '-n', '--num', type=int, default=10, ) @click.option( '--order', 'order', type=str, default='pubdate', ) @click.option( '--keyword', 'keyword', type=str ) @click.option( '-ns', '--no-series', 'series', is_flag=True, default=True, ) @click.option( '-nh', '--no-hierarchy', 'hierarchy', is_flag=True, default=True, ) @click.option( '--image', 'image', is_flag=True, default=False, ) @click.option( '--subtitle', 'subtitle', is_flag=True, default=False, ) @click.option( '--dm', 'dm', is_flag=True, default=False, ) @click.option( '-oa', '--only-audio', 'only_audio', is_flag=True, default=False, ) @click.option( '-p', 'p_range', type=(int, int), ) @click.option( '--codec', 'codec', type=str, default='' ) @click.option( '--speed-limit', '-sl', 'speed_limit', type=BasedSpeedLimit(), default=None, ) @click.option( '--stream-retry', '-sr', 'stream_retry', type=int, default=5 ) @click.option( '--from-browser', '-fb', 'browser', type=str, ) @click.option( '--time-range', '-tr', 'time_range', type=BasedTimeRange(), default=None, ) @click.option( '-h', "--help", is_flag=True, is_eager=True, expose_value=False, callback=handle_help, ) @click.option( '-v', "--version", is_flag=True, is_eager=True, expose_value=False, callback=handle_version, ) @click.option( "--debug", is_flag=True, is_eager=True, expose_value=False, callback=handle_debug, ) def main(**kwargs): loop = asyncio.new_event_loop() # avoid deprecated warning in 3.11 asyncio.set_event_loop(loop) logger.debug(f'CLI KEY METHOD and OPTIONS: {kwargs}') try: # CLIProgress.switch_theme(gs="cyan", bs="dark_cyan") CLIProgress.start() # start progress if not kwargs['path'].exists(): kwargs['path'].mkdir(parents=True) logger.info(f'Directory {kwargs["path"]} not exists, auto created') executor, cor = assign(kwargs) loop.run_until_complete(cor) except HandleError as e: # method no match logger.error(e) except KeyboardInterrupt: logger.info('[cyan]提示:用户中断,重复执行命令可继续下载') finally: CLIProgress.stop() # stop rich progress to ensure cursor is repositioned ================================================ FILE: bilix/download/base_downloader.py ================================================ import asyncio import inspect import logging import re import time from functools import wraps from typing import Union, Optional, Tuple from contextlib import asynccontextmanager from urllib.parse import urlparse import aiofiles import httpx from bilix.cli.assign import auto_assemble from bilix.log import logger as dft_logger from bilix.download.utils import req_retry, path_check from bilix.progress.abc import Progress from bilix.progress.cli_progress import CLIProgress from bilix.exception import HandleMethodError from pathlib import Path, PurePath __all__ = ['BaseDownloader'] class BaseDownloaderMeta(type): def __new__(cls, name, bases, dct): dct['_cli_info'] = {} dct['_cli_map'] = {} for method_name, method in dct.items(): if not method_name.startswith('_') and asyncio.iscoroutinefunction(method): if 'path' in (sig := inspect.signature(method)).parameters: dct[method_name] = cls.ensure_path(method, sig) if cls.check_unique_method(method, bases): cli_info = cls.parse_cli_doc(method) if cli_info: dct['_cli_info'][method] = cli_info dct['_cli_map'][method_name] = method if cli_info['short']: dct['_cli_map'][cli_info['short']] = method return super().__new__(cls, name, bases, dct) @staticmethod def check_unique_method(method_name: str, bases: Tuple[type, ...]): for base in bases: if method_name in base.__dict__: return False return True @staticmethod def parse_cli_doc(func) -> Optional[dict]: docstring = func.__doc__ if not docstring or ':cli:' not in docstring: return params_matches = re.findall(r":param (\w+): (.+)", docstring) params = {param: description for param, description in params_matches} cli_short_match = re.search(r":cli: short: (\w+)", docstring) short_name = cli_short_match.group(1) if cli_short_match else None return {"short": short_name, "params": params} @staticmethod def ensure_path(func, sig): path_index = next(i for i, name in enumerate(sig.parameters) if name == 'path') @wraps(func) async def wrapper(*args, **kwargs): new_args = list(args) if path_index < len(args) and isinstance(args[path_index], str): new_args[path_index] = Path(args[path_index]) elif 'path' in kwargs and isinstance(kwargs['path'], str): kwargs['path'] = Path(kwargs['path']) return await func(*new_args, **kwargs) wrapper.__annotations__['path'] = Union[Path, str] return wrapper class BaseDownloader(metaclass=BaseDownloaderMeta): pattern: re.Pattern = None cookie_domain: str = "" _cli_info: dict _cli_map: dict def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int] = None, stream_retry: int = 5, progress: Progress = None, logger: logging.Logger = None, ): """ :param client: client used for http request :param browser: load cookies from which browser :param speed_limit: global download rate for the downloader, should be a number (Byte/s unit) :param progress: progress obj """ # use cli progress by default self.progress = progress or CLIProgress() self.logger = logger or dft_logger self.client = client if client else httpx.AsyncClient(headers={'user-agent': 'PostmanRuntime/7.29.0'}) if browser: # load cookies from browser, may need auth self.update_cookies_from_browser(browser) assert speed_limit is None or speed_limit > 0 self.speed_limit = speed_limit self.stream_retry = stream_retry # active stream number self._stream_num = 0 async def __aenter__(self): await self.client.__aenter__() return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self.client.__aexit__(exc_type, exc_val, exc_tb) async def aclose(self): """Close transport and proxies for httpx client""" await self.client.aclose() async def get_static(self, url: str, path: Union[str, Path], convert_func=None) -> Path: """ :param url: :param path: file path without suffix :param convert_func: function used to convert http bytes content, must be named like ...2... :return: downloaded file path """ # use suffix from convert_func's name if convert_func: suffix = '.' + convert_func.__name__.split('2')[-1] # try to find suffix from url else: suffix = PurePath(urlparse(url).path).suffix path = path.with_name(path.name + suffix) exist, path = path_check(path) if exist: self.logger.info(f'[green]已存在[/green] {path.name}') return path res = await req_retry(self.client, url) content = convert_func(res.content) if convert_func else res.content async with aiofiles.open(path, 'wb') as f: await f.write(content) self.logger.info(f'[cyan]已完成[/cyan] {path.name}') return path @asynccontextmanager async def _stream_context(self, times: int): """ contextmanager to print log, slow down streaming and count active stream number :param times: error occur times which is related to sleep time :return: """ self._stream_num += 1 try: yield except httpx.HTTPStatusError as e: if e.response.status_code == 403: self.logger.warning(f"STREAM slowing down since 403 forbidden {e}") await asyncio.sleep(10. * (times + 1)) else: self.logger.warning(f"STREAM {e}") await asyncio.sleep(.5 * (times + 1)) raise except httpx.TransportError as e: msg = f'STREAM {e.__class__.__name__} 异常可能由于网络条件不佳或并发数过大导致,若重复出现请考虑降低并发数' self.logger.warning(msg) if times > 2 else self.logger.debug(msg) await asyncio.sleep(.1 * (times + 1)) raise except Exception as e: self.logger.warning(f'STREAM Unexpected Exception class:{e.__class__.__name__} {e}') raise finally: self._stream_num -= 1 @property def stream_num(self): """current activate network stream number""" return self._stream_num @property def chunk_size(self) -> Optional[int]: if self.speed_limit and self.speed_limit < 1e5: # 1e5 limit bound # only restrict chunk_size when speed_limit is too low return int(self.speed_limit * 0.1) # 0.1 delay slope # default to None setup return None async def _check_speed(self, content_size): if self.speed_limit and (cur_speed := self.progress.active_speed) > self.speed_limit: t_tgt = content_size / self.speed_limit * self.stream_num t_real = content_size / cur_speed t = t_tgt - t_real await asyncio.sleep(t) def update_cookies_from_browser(self, browser: str): try: a = time.time() import browser_cookie3 f = getattr(browser_cookie3, browser.lower()) self.logger.debug(f"trying to load cookies from {browser}: {self.cookie_domain}, may need auth") self.client.cookies.update(f(domain_name=self.cookie_domain)) self.logger.debug(f"load complete, consumed time: {time.time() - a} s") except AttributeError: raise AttributeError(f"Invalid Browser {browser}") @classmethod def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict) -> bool: """check if the cls can be handled by this downloader""" if cls.pattern: return cls.pattern.match(keys[0]) is not None else: return method in cls._cli_map @classmethod @auto_assemble def handle(cls, method: str, keys: Tuple[str, ...], options: dict): if cls._decide_handle(method, keys, options): try: method = cls._cli_map[method] except KeyError: raise HandleMethodError(cls, method) return cls, method ================================================ FILE: bilix/download/base_downloader_m3u8.py ================================================ import asyncio import uuid from pathlib import Path, PurePath from typing import Tuple, Union from urllib.parse import urlparse import aiofiles import httpx import os import m3u8 from Crypto.Cipher import AES from m3u8 import Segment from bilix.download.base_downloader import BaseDownloader from bilix.download.utils import path_check, merge_files from bilix import ffmpeg from .utils import req_retry __all__ = ['BaseDownloaderM3u8'] class BaseDownloaderM3u8(BaseDownloader): """Base Async http m3u8 Downloader""" def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int] = None, stream_retry: int = 5, progress=None, logger=None, # unique params part_concurrency: int = 10, video_concurrency: Union[int, asyncio.Semaphore] = 3, ): super(BaseDownloaderM3u8, self).__init__( client=client, browser=browser, stream_retry=stream_retry, speed_limit=speed_limit, progress=progress, logger=logger ) self.v_sema = asyncio.Semaphore(video_concurrency) if isinstance(video_concurrency, int) else video_concurrency self.part_concurrency = part_concurrency self.decrypt_cache = {} async def _decrypt(self, seg: m3u8.Segment, content: bytearray): async def get_key(): key_bytes = (await req_retry(self.client, uri)).content iv = bytes.fromhex(seg.key.iv.replace('0x', '')) if seg.key.iv is not None else \ seg.custom_parser_values['iv'] return AES.new(key_bytes, AES.MODE_CBC, iv) uri = seg.key.absolute_uri if uri not in self.decrypt_cache: self.decrypt_cache[uri] = asyncio.ensure_future(get_key()) self.decrypt_cache[uri] = await self.decrypt_cache[uri] elif asyncio.isfuture(self.decrypt_cache[uri]): await self.decrypt_cache[uri] cipher = self.decrypt_cache[uri] return cipher.decrypt(content) async def to_invariant_m3u8(self, m3u8_url: str) -> m3u8.M3U8: res = await req_retry(self.client, m3u8_url, follow_redirects=True) m3u8_info = m3u8.loads(res.text) if not m3u8_info.base_uri: m3u8_info.base_uri = m3u8_url if m3u8_info.is_variant: self.logger.debug(f"m3u8 is variant, use first playlist: {m3u8_info.playlists[0].absolute_uri}") return await self.to_invariant_m3u8(m3u8_info.playlists[0].absolute_uri) return m3u8_info async def get_m3u8_video(self, m3u8_url: str, path: Union[str, Path], time_range: Tuple[int, int] = None) -> Path: """ download video from m3u8 url :cli: short: m3u8 :param m3u8_url: :param path: file path or file dir, if dir, filename will be set according to m3u8_url :param time_range: (start, end) in seconds, if provided, only download the clip and add start-end to filename :return: downloaded file path """ if path.is_dir(): path = (path / PurePath(urlparse(m3u8_url).path).stem).with_suffix('.mp4') if time_range: path = path.with_stem(f"{path.stem}-{time_range[0]}-{time_range[1]}") exist, path = path_check(path) if exist: self.logger.info(f"[green]已存在[/green] {path.name}") return path async with self.v_sema: task_id = await self.progress.add_task(total=None, description=path.name) m3u8_info = await self.to_invariant_m3u8(m3u8_url) cors = [] p_sema = asyncio.Semaphore(self.part_concurrency) total_time = 0 if time_range: current_time = 0 start_time, end_time = time_range inside = False else: inside = True for idx, seg in enumerate(m3u8_info.segments): if time_range: current_time += seg.duration if not inside and current_time > start_time: inside = True s = seg.duration - (current_time - start_time) elif current_time > end_time: break if inside: total_time += seg.duration # https://stackoverflow.com/questions/50628791/decrypt-m3u8-playlist-encrypted-with-aes-128-without-iv if seg.key and seg.key.iv is None: seg.custom_parser_values['iv'] = idx.to_bytes(16, 'big') cors.append(self._get_seg(seg, path.with_name(f"{path.stem}-{idx}.ts"), task_id, p_sema)) if len(cors) == 0 and time_range: raise Exception(f"time range <{start_time}-{end_time}> invalid for <{path.name}>") if init_sec := m3u8_info.segments[0].init_section: async def _get_init(): r = await req_retry(self.client, init_sec.absolute_uri) async with aiofiles.open(fn := path.with_name(f"{path.stem}-init"), 'wb') as f: await f.write(r.content) return fn cors.insert(0, _get_init()) merge_fn = merge_files else: merge_fn = ffmpeg.concat await self.progress.update(task_id, total_time=total_time) file_list = await asyncio.gather(*cors) await merge_fn(file_list, path) if time_range: path_tmp = path.with_stem(str(uuid.uuid4())) # to save key frame, use 0 as start time instead of s, clip will be a little longer than expected await ffmpeg.time_range_clip(path, 0, end_time - start_time + s, path_tmp) os.rename(path_tmp, path) self.logger.info(f"[cyan]已完成[/cyan] {path.name}") await self.progress.update(task_id, visible=False) return path async def _update_task_total(self, task_id, time_part: float, update_size: int): task = self.progress.tasks[task_id] if task.total is None: confirmed_t = time_part confirmed_b = update_size else: confirmed_t = time_part + task.fields['confirmed_t'] confirmed_b = update_size + task.fields['confirmed_b'] predicted_total = task.fields['total_time'] * confirmed_b / confirmed_t await self.progress.update(task_id, total=predicted_total, confirmed_t=confirmed_t, confirmed_b=confirmed_b) async def _get_seg(self, seg: Segment, path: Path, task_id, p_sema: asyncio.Semaphore) -> Path: exists, path = path_check(path) if exists: downloaded = os.path.getsize(path) await self._update_task_total(task_id, time_part=seg.duration, update_size=downloaded) await self.progress.update(task_id, advance=downloaded) return path seg_url = seg.absolute_uri async with p_sema: content = None for times in range(1 + self.stream_retry): content = bytearray() try: async with self.client.stream("GET", seg_url, follow_redirects=True) as r, self._stream_context(times): r.raise_for_status() # pre-update total if content-length is provided and first time to get content if 'content-length' in r.headers and not content: await self._update_task_total( task_id, time_part=seg.duration, update_size=int(r.headers['content-length'])) async for chunk in r.aiter_bytes(chunk_size=self.chunk_size): content.extend(chunk) await self.progress.update(task_id, advance=len(chunk)) await self._check_speed(len(chunk)) if 'content-length' not in r.headers: # after-update total if content-length is not provided await self._update_task_total(task_id, time_part=seg.duration, update_size=len(content)) break except (httpx.HTTPStatusError, httpx.TransportError): continue else: raise Exception(f"STREAM 超过重复次数 {seg_url}") content = self._after_seg(seg, content) # in case encrypted if seg.key: content = await self._decrypt(seg, content) async with aiofiles.open(path, 'wb') as f: await f.write(content) return path def _after_seg(self, seg: Segment, content: bytearray) -> bytearray: """hook for subclass to modify segment content, happened before decrypt""" return content ================================================ FILE: bilix/download/base_downloader_part.py ================================================ import asyncio from pathlib import Path, PurePath from typing import Union, List, Iterable, Tuple from urllib.parse import urlparse import aiofiles import httpx import uuid import random import os from email.message import Message from pymp4.parser import Box from bilix.download.base_downloader import BaseDownloader from bilix.download.utils import path_check, merge_files from bilix import ffmpeg from .utils import req_retry __all__ = ['BaseDownloaderPart'] class BaseDownloaderPart(BaseDownloader): """Base Async http Content-Range Downloader""" def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int, None] = None, stream_retry: int = 5, progress=None, logger=None, # unique params part_concurrency: int = 10, ): super(BaseDownloaderPart, self).__init__( client=client, browser=browser, stream_retry=stream_retry, speed_limit=speed_limit, progress=progress, logger=logger ) self.part_concurrency = part_concurrency async def _pre_req(self, urls: List[str]) -> Tuple[int, str]: # use GET instead of HEAD due to 404 bug https://github.com/HFrost0/bilix/issues/16 res = await req_retry(self.client, urls[0], follow_redirects=True, headers={'Range': 'bytes=0-1'}) total = int(res.headers['Content-Range'].split('/')[-1]) # get filename if content_disposition := res.headers.get('Content-Disposition', None): m = Message() m['content-type'] = content_disposition filename = m.get_param('filename', '') else: filename = '' # change origin url to redirected position to avoid twice redirect if res.history: urls[0] = str(res.url) return total, filename async def get_media_clip( self, url_or_urls: Union[str, Iterable[str]], path: Union[Path, str], time_range: Tuple[int, int], init_range: str, seg_range: str, get_s: asyncio.Future = None, set_s: asyncio.Future = None, task_id=None, ): """ :param url_or_urls: :param path: :param time_range: (start_time, end_time) :param init_range: xxx-xxx :param seg_range: xxx-xxx :param get_s: :param set_s: :param task_id: :return: """ upper = task_id is not None and self.progress.tasks[task_id].fields.get('upper', None) exist, path = path_check(path) if exist: if not upper: self.logger.info(f'[green]已存在[/green] {path.name}') return path urls = [url_or_urls] if isinstance(url_or_urls, str) else [url for url in url_or_urls] init_start, init_end = map(int, init_range.split('-')) seg_start, seg_end = map(int, seg_range.split('-')) res = await req_retry(self.client, urls[0], follow_redirects=True, headers={'Range': f'bytes={seg_start}-{seg_end}'}) container = Box.parse(res.content) assert container.type == b'sidx' if get_s: start_time = await get_s end_time = time_range[1] else: start_time, end_time = time_range pre_time, pre_byte = 0, seg_end + 1 inside = False parts = [(init_start, init_end)] total = init_end - init_start + 1 s = 0 for idx, ref in enumerate(container.references): if ref.reference_type != "MEDIA": self.logger.debug("not a media", ref) continue seg_duration = ref.segment_duration / container.timescale if not inside and start_time < pre_time + seg_duration: s = start_time - pre_time inside = True if inside and end_time < pre_time: break if inside: total += ref.referenced_size parts.append((pre_byte, pre_byte + ref.referenced_size - 1)) pre_time += seg_duration pre_byte += ref.referenced_size if len(parts) == 1: raise Exception(f"time range <{start_time}-{end_time}> invalid for <{path.name}>") if set_s: set_s.set_result(start_time - s) if task_id is not None: await self.progress.update( task_id, total=self.progress.tasks[task_id].total + total if self.progress.tasks[task_id].total else total) else: task_id = await self.progress.add_task(description=path.name, total=total) p_sema = asyncio.Semaphore(self.part_concurrency) async def get_seg(part_range: Tuple[int, int]): async with p_sema: return await self._get_file_part(urls, path=path, part_range=part_range, task_id=task_id) file_list = await asyncio.gather(*[get_seg(part_range) for part_range in parts]) path_tmp = path.with_name(str(uuid.uuid4())) await merge_files(file_list, path_tmp) if set_s: await ffmpeg.time_range_clip(path_tmp, start=0, t=end_time - start_time + s, output_path=path) else: await ffmpeg.time_range_clip(path_tmp, start=s, t=end_time - start_time, output_path=path) if not upper: # no upstream task await self.progress.update(task_id, visible=False) self.logger.info(f"[cyan]已完成[/cyan] {path.name}") return path async def get_file(self, url_or_urls: Union[str, Iterable[str]], path: Union[Path, str], task_id=None) -> Path: """ download file by http content-range :cli: short: f :param url_or_urls: file url or urls with backups :param path: file path or dir path, if dir path, filename will be extracted from url :param task_id: if not provided, a new progress task will be created :return: downloaded file path """ urls = [url_or_urls] if isinstance(url_or_urls, str) else [url for url in url_or_urls] upper = task_id is not None and self.progress.tasks[task_id].fields.get('upper', None) if not path.is_dir(): exist, path = path_check(path) if exist: if not upper: self.logger.info(f'[green]已存在[/green] {path.name}') return path total, req_filename = await self._pre_req(urls) if path.is_dir(): file_name = req_filename if req_filename else PurePath(urlparse(urls[0]).path).name path /= file_name exist, path = path_check(path) if exist: if not upper: self.logger.info(f'[green]已存在[/green] {path.name}') return path if task_id is not None: await self.progress.update( task_id, total=self.progress.tasks[task_id].total + total if self.progress.tasks[task_id].total else total) else: task_id = await self.progress.add_task(description=path.name, total=total) part_length = total // self.part_concurrency cors = [] for i in range(self.part_concurrency): start = i * part_length end = (i + 1) * part_length - 1 if i < self.part_concurrency - 1 else total - 1 cors.append(self._get_file_part(urls, path=path, part_range=(start, end), task_id=task_id)) file_list = await asyncio.gather(*cors) await merge_files(file_list, new_path=path) if not upper: await self.progress.update(task_id, visible=False) self.logger.info(f"[cyan]已完成[/cyan] {path.name}") return path async def _get_file_part(self, urls: List[str], path: Path, part_range: Tuple[int, int], task_id) -> Path: start, end = part_range part_path = path.with_name(f'{path.name}.{part_range[0]}-{part_range[1]}') exist, part_path = path_check(part_path) if exist: downloaded = os.path.getsize(part_path) start += downloaded await self.progress.update(task_id, advance=downloaded) if start > end: return part_path # skip already finished url_idx = random.randint(0, len(urls) - 1) for times in range(1 + self.stream_retry): try: async with \ self.client.stream("GET", urls[url_idx], follow_redirects=True, headers={'Range': f'bytes={start}-{end}'}) as r, \ self._stream_context(times), \ aiofiles.open(part_path, 'ab') as f: r.raise_for_status() if r.history: # avoid twice redirect urls[url_idx] = r.url async for chunk in r.aiter_bytes(chunk_size=self.chunk_size): await f.write(chunk) start += len(chunk) await self.progress.update(task_id, advance=len(chunk)) await self._check_speed(len(chunk)) break except (httpx.HTTPStatusError, httpx.TransportError): continue else: raise Exception(f"STREAM 超过重复次数 {part_path.name}") return part_path ================================================ FILE: bilix/download/utils.py ================================================ import asyncio import errno import os import random from functools import wraps from pathlib import Path import aiofiles import httpx from typing import Union, Sequence, Tuple, List from bilix.exception import APIError, APIParseError from bilix.log import logger async def merge_files(file_list: List[Path], new_path: Path): first_file = file_list[0] async with aiofiles.open(first_file, 'ab') as f: for idx in range(1, len(file_list)): async with aiofiles.open(file_list[idx], 'rb') as fa: await f.write(await fa.read()) os.remove(file_list[idx]) os.rename(first_file, new_path) async def req_retry(client: httpx.AsyncClient, url_or_urls: Union[str, Sequence[str]], method='GET', follow_redirects=False, retry=5, **kwargs) -> httpx.Response: """Client request with multiple backup urls and retry""" pre_exc = None # predefine to avoid warning for times in range(1 + retry): url = url_or_urls if type(url_or_urls) is str else random.choice(url_or_urls) try: res = await client.request(method, url, follow_redirects=follow_redirects, **kwargs) res.raise_for_status() except httpx.TransportError as e: msg = f'{method} {e.__class__.__name__} url: {url}' logger.warning(msg) if times > 0 else logger.debug(msg) pre_exc = e await asyncio.sleep(.1 * (times + 1)) except httpx.HTTPStatusError as e: logger.warning(f'{method} {e.response.status_code} {url}') pre_exc = e await asyncio.sleep(1. * (times + 1)) except Exception as e: logger.warning(f'{method} {e.__class__.__name__} 未知异常 url: {url}') raise e else: return res logger.error(f"{method} 超过重复次数 {url_or_urls}") raise pre_exc def eclipse_str(s: str, max_len: int = 100): if len(s) <= max_len: return s else: half_len = (max_len - 1) // 2 return f"{s[:half_len]}…{s[-half_len:]}" def path_check(path: Path, retry: int = 100) -> Tuple[bool, Path]: """ check whether path exist, if filename too long, truncate and return valid path :param path: path to check :param retry: max retry times :return: exist, path """ for times in range(retry): try: exist = path.exists() return exist, path except OSError as e: if e.errno == errno.ENAMETOOLONG: # filename too long for os if times == 0: logger.warning(f"filename too long for os, truncate will be applied. filename: {path.name}") else: logger.debug(f"filename too long for os {path.name}") path = path.with_stem(eclipse_str(path.stem, int(len(path.stem) * .8))) else: raise e raise OSError(f"filename too long for os {path.name}") def raise_api_error(func): """Decorator to catch exceptions except APIError and HTTPError and raise APIParseError""" @wraps(func) async def wrapped(client: httpx.AsyncClient, *args, **kwargs): try: return await func(client, *args, **kwargs) except (APIError, httpx.HTTPError): raise except Exception as e: raise APIParseError(e, func) from e return wrapped ================================================ FILE: bilix/exception.py ================================================ class APIError(Exception): """API Error during request to website""" def __init__(self, msg: str, resource): self.msg = msg self.resource = resource def __str__(self): return f"{self.msg} resource: {self.resource}" class APIParseError(APIError): """API Parse Error, maybe cased by website interface change, raise by decorator""" def __init__(self, e, func): self.e = e self.func = func def __str__(self): return f"APIParseError Caused by {self.e.__class__.__name__} in <{self.func.__module__}:{self.func.__name__}>" class APIResourceError(APIError): """API Error that resource is not available (like deleted by uploader)""" class APIUnsupportedError(APIError): """The resource parse is not supported yet""" class APIInvalidError(APIError): """API request is invalid""" class HandleError(Exception): """the error related to bilix cli handle""" class HandleMethodError(HandleError): """the error that handler can not recognize the method""" def __init__(self, executor_cls, method): self.executor_cls = executor_cls self.method = method def __str__(self): return f"For {self.executor_cls.__name__} method '{self.method}' is not available" ================================================ FILE: bilix/ffmpeg.py ================================================ """ just some useful ffmpeg commands wrapped in python """ import os from anyio import run_process from typing import List from pathlib import Path import tempfile async def concat(path_lst: List[Path], output_path: Path, remove=True): with tempfile.NamedTemporaryFile('w', dir=output_path.parent, delete=False) as fp: for path in path_lst: fp.write(f"file '{path.name}'\n") cmd = ['ffmpeg', '-f', 'concat', '-safe', '0', '-i', fp.name, '-c', 'copy', '-loglevel', 'quiet', str(output_path)] # print(' '.join(map(lambda x: f'"{x}"', cmd))) await run_process(cmd) os.remove(fp.name) if remove: for path in path_lst: os.remove(path) async def combine(path_lst: List[Path], output_path: Path, remove=True): cmd = ['ffmpeg'] for path in path_lst: cmd.extend(['-i', str(path)]) # for flac, use -strict -2 cmd.extend(['-c', 'copy', '-strict', '-2', '-loglevel', 'quiet', str(output_path)]) # print(' '.join(map(lambda x: f'"{x}"', cmd))) await run_process(cmd) if remove: for path in path_lst: os.remove(path) async def time_range_clip(input_path: Path, start: int, t: int, output_path: Path, remove=True): # for flac, use -strict -2 cmd = ['ffmpeg', '-ss', f'{start:.1f}', '-t', f'{t:.1f}', '-i', str(input_path), '-codec', 'copy', '-strict', '-2', '-loglevel', 'quiet', '-f', 'mp4', str(output_path)] # print(' '.join(map(lambda x: f'"{x}"', cmd))) await run_process(cmd) if remove: os.remove(input_path) ================================================ FILE: bilix/log.py ================================================ import logging from rich.logging import RichHandler def get_logger(): bilix_logger = logging.getLogger("bilix") # 如果logger已经配置过handler,直接返回logger实例 if bilix_logger.hasHandlers(): return bilix_logger bilix_logger.setLevel(logging.INFO) # 创建自定义的RichHandler custom_rich_handler = RichHandler( show_time=False, show_path=False, markup=True, keywords=RichHandler.KEYWORDS + ['STREAM'], rich_tracebacks=True ) # 设置日志格式 formatter = logging.Formatter("{message}", style="{", datefmt="[%X]") custom_rich_handler.setFormatter(formatter) # 为logger添加自定义的RichHandler bilix_logger.addHandler(custom_rich_handler) return bilix_logger logger = get_logger() ================================================ FILE: bilix/progress/abc.py ================================================ from abc import ABC, abstractmethod from typing import Optional, Any class Progress(ABC): """Abstract Class for bilix download progress, checkout to design your own progress""" @classmethod @abstractmethod def start(cls): """start to show the progress""" @classmethod @abstractmethod def stop(cls): """stop to show the progress""" @abstractmethod def tasks(self): """return the tasks""" @abstractmethod def active_speed(self) -> Optional[float]: """return current active speed (bit/s)""" @abstractmethod async def add_task( self, description: str, start: bool = True, total: Optional[float] = None, completed: int = 0, visible: bool = True, **fields, ): """async add a task to progress""" @abstractmethod async def update( self, task_id, *, total: Optional[float] = None, completed: Optional[float] = None, advance: Optional[float] = None, description: Optional[str] = None, visible: Optional[bool] = None, refresh: bool = False, **fields: Any ): """async update a task status""" ================================================ FILE: bilix/progress/cli_progress.py ================================================ from bilix.progress.abc import Progress from typing import Optional, Any, Set from rich.theme import Theme from rich.style import Style from rich.spinner import Spinner from rich.progress import Progress as RichProgress, TaskID, \ TextColumn, BarColumn, DownloadColumn, TransferSpeedColumn, TimeRemainingColumn, ProgressColumn class SpinnerColumn(ProgressColumn): def __init__(self, style="progress.spinner", speed: float = 1.0): self.waiting = Spinner("dqpb", style=style) self.downloading = Spinner("dots", style=style, speed=speed) self.merging = Spinner("line", style=style, speed=speed) super().__init__() def render(self, task): t = task.get_time() if task.total is None: return self.waiting.render(t) elif task.finished: return self.merging.render(t) else: return self.downloading.render(t) class CLIProgress(Progress): # Only one live display may be active at once _progress = RichProgress( SpinnerColumn(speed=2.), TextColumn("[progress.description]{task.description}"), TextColumn("[progress.percentage]{task.percentage:>4.1f}%"), BarColumn(), DownloadColumn(), TransferSpeedColumn(), TextColumn('ETA'), TimeRemainingColumn(), transient=True, ) def __init__(self): self._active_ids: Set[TaskID] = set() @classmethod def start(cls): cls._progress.start() @classmethod def stop(cls): cls._progress.stop() @property def tasks(self): return self._progress.tasks @staticmethod def _cat_description(description, max_length=33): mid = (max_length - 3) // 2 return description if len(description) < max_length else f'{description[:mid]}...{description[-mid:]}' async def add_task( self, description: str, start: bool = True, total: Optional[float] = None, completed: int = 0, visible: bool = True, **fields: Any, ) -> TaskID: task_id = self._progress.add_task(description=self._cat_description(description), start=start, total=total, completed=completed, visible=visible, **fields) self._active_ids.add(task_id) return task_id @property def active_speed(self): return sum(self._progress.tasks[task_id].speed for task_id in self._active_ids if self._progress.tasks[task_id].speed) async def update( self, task_id: TaskID, *, total: Optional[float] = None, completed: Optional[float] = None, advance: Optional[float] = None, description: Optional[str] = None, visible: Optional[bool] = None, refresh: bool = False, **fields: Any, ) -> None: if description: description = self._cat_description(description) self._progress.update(task_id, total=total, completed=completed, advance=advance, description=description, visible=visible, refresh=refresh, **fields) if self._progress.tasks[task_id].finished and task_id in self._active_ids: self._active_ids.remove(task_id) @classmethod def switch_theme(cls, bs="rgb(95,138,239)", gs="rgb(65,165,189)"): cls._progress.console.push_theme(Theme({ # "progress.data.speed": Style(color=bs), "progress.download": Style(color=gs), "progress.percentage": Style(color=gs), "progress.spinner": Style(color=bs), "progress.remaining": Style(color=gs), # "bar.back": Style(color="grey23"), "bar.complete": Style(color=bs), "bar.finished": Style(color=gs), "bar.pulse": Style(color=bs), })) ================================================ FILE: bilix/progress/ws_progress.py ================================================ import asyncio import json from bilix.progress.cli_progress import CLIProgress class WebSocketProgress(CLIProgress): def __init__(self, sockets): super().__init__() self._sockets = sockets async def broadcast(self, msg: str): cors = [s.send_text(msg) for s in self._sockets] await asyncio.gather(*cors) async def add_task(self, **kwargs): task_id = await super().add_task(**kwargs) asyncio.create_task( self.broadcast(json.dumps({'method': 'add_task', 'task_id': task_id, **kwargs})) ) return task_id async def update(self, task_id, **kwargs) -> None: await super().update(task_id, **kwargs) asyncio.create_task( self.broadcast(json.dumps({'method': 'update', "task_id": task_id, **kwargs})) ) ================================================ FILE: bilix/sites/bilibili/__init__.py ================================================ from .downloader import DownloaderBilibili from .informer import InformerBilibili __all__ = ['DownloaderBilibili', 'InformerBilibili'] ================================================ FILE: bilix/sites/bilibili/api.py ================================================ import asyncio import json import re from urllib.parse import quote import httpx from pydantic import field_validator, BaseModel, Field from typing import Union, List, Tuple, Dict, Optional import json5 from danmakuC.bilibili import parse_view from bilix.download.utils import req_retry, raise_api_error from bilix.sites.bilibili.utils import parse_ids_from_url from bilix.utils import legal_title from bilix.exception import APIInvalidError, APIError, APIResourceError, APIUnsupportedError import hashlib import time dft_client_settings = { 'headers': {'user-agent': 'PostmanRuntime/7.29.0', 'referer': 'https://www.bilibili.com'}, 'cookies': {'CURRENT_FNVAL': '4048'}, 'http2': True } @raise_api_error async def get_cate_meta(client: httpx.AsyncClient) -> dict: """ 获取b站分区元数据 :param client: :return: """ cate_info = {} res = await req_retry(client, 'https://s1.hdslb.com/bfs/static/laputa-channel/client/assets/index.c0ea30e6.js') cate_data = re.search('Za=([^;]*);', res.text).groups()[0] cate_data = json5.loads(cate_data)['channelList'] for i in cate_data: if 'sub' in i: for j in i['sub']: cate_info[j['name']] = j cate_info[i['name']] = i return cate_info @raise_api_error async def get_list_info(client: httpx.AsyncClient, url_or_sid: str, ): """ 获取视频列表信息 :param url_or_sid: :param client: :return: """ if url_or_sid.startswith('http'): sid = re.search(r'sid=(\d+)', url_or_sid).groups()[0] else: sid = url_or_sid res = await req_retry(client, f'https://api.bilibili.com/x/series/series?series_id={sid}') # meta api meta = json.loads(res.text) mid = meta['data']['meta']['mid'] params = {'mid': mid, 'series_id': sid, 'ps': meta['data']['meta']['total']} list_res, up_info = await asyncio.gather( req_retry(client, 'https://api.bilibili.com/x/series/archives', params=params), get_up_info(client, str(mid)), ) list_info = json.loads(list_res.text) list_name = meta['data']['meta']['name'] up_name = up_info.get('name', '') bvids = [i['bvid'] for i in list_info['data']['archives']] return list_name, up_name, bvids @raise_api_error async def get_collect_info(client: httpx.AsyncClient, url_or_sid: str): """ 获取合集信息 :param url_or_sid: :param client: :return: """ sid = re.search(r'sid=(\d+)', url_or_sid).groups()[0] if url_or_sid.startswith('http') else url_or_sid params = {'season_id': sid} res = await req_retry(client, 'https://api.bilibili.com/x/space/fav/season/list', params=params) data = json.loads(res.text) medias = data['data']['medias'] info = data['data']['info'] col_name, up_name = info['title'], medias[0]['upper']['name'] bvids = [i['bvid'] for i in data['data']['medias']] return col_name, up_name, bvids @raise_api_error async def get_favour_page_info(client: httpx.AsyncClient, url_or_fid: str, pn=1, ps=20, keyword=''): """ 获取收藏夹信息(分页) :param url_or_fid: :param pn: :param ps: :param keyword: :param client: :return: """ if url_or_fid.startswith('http'): fid = re.findall(r'fid=(\d+)', url_or_fid)[0] else: fid = url_or_fid params = {'media_id': fid, 'pn': pn, 'ps': ps, 'keyword': keyword, 'order': 'mtime'} res = await req_retry(client, 'https://api.bilibili.com/x/v3/fav/resource/list', params=params) data = json.loads(res.text)['data'] fav_name, up_name = data['info']['title'], data['info']['upper']['name'] bvids = [i['bvid'] for i in data['medias'] if i['title'] != '已失效视频'] total_size = data['info']['media_count'] return fav_name, up_name, total_size, bvids @raise_api_error async def get_cate_page_info(client: httpx.AsyncClient, cate_id, time_from, time_to, pn=1, ps=30, order='click', keyword=''): """ 获取分区视频信息(分页) :param cate_id: :param pn: :param ps: :param order: :param keyword: :param time_from: :param time_to: :param client: :return: """ params = {'search_type': 'video', 'view_type': 'hot_rank', 'cate_id': cate_id, 'pagesize': ps, 'keyword': keyword, 'page': pn, 'order': order, 'time_from': time_from, 'time_to': time_to} res = await req_retry(client, 'https://s.search.bilibili.com/cate/search', params=params) info = json.loads(res.text) bvids = [i['bvid'] for i in info['result']] return bvids async def _add_sign(client: httpx.AsyncClient, params: dict): """添加b站api签名到params中 :param params: :return: """ OE = [46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, 36, 20, 34, 44, 52] res = await req_retry( client, "https://api.bilibili.com/x/web-interface/nav" ) info = json.loads(res.text) img_val = info['data']['wbi_img']['img_url'].split('/')[-1].split('.')[0] sub_val = info['data']['wbi_img']['sub_url'].split('/')[-1].split('.')[0] val = img_val + sub_val request_token = ''.join([val[v] for v in OE])[:32] wts = int(time.time()) params["wts"] = wts data = dict(sorted(params.items())) data_str = "&".join([f"{k}={v}" for k, v in data.items()]) + request_token md5 = hashlib.md5(data_str.encode("utf-8")).hexdigest() params["w_rid"] = md5 return params def _find_mid(space_url: str): return re.search(r'^https://space.bilibili.com/(\d+)/?', space_url).group(1) @raise_api_error async def get_up_video_info(client: httpx.AsyncClient, url_or_mid: str, pn=1, ps=30, order="pubdate", keyword=""): """ 获取up主信息 :param url_or_mid: :param pn: :param ps: :param order: :param keyword: :param client: :return: """ if url_or_mid.startswith("http"): mid = re.findall(r"/(\d+)", url_or_mid)[0] else: mid = url_or_mid params = {"mid": mid, "order": order, "ps": ps, "pn": pn, "keyword": quote(keyword or "")} await _add_sign(client, params) res = await req_retry(client, "https://api.bilibili.com/x/space/wbi/arc/search", params=params) info = json.loads(res.text) up_name = info["data"]["list"]["vlist"][0]["author"] total_size = info["data"]["page"]["count"] bv_ids = [i["bvid"] for i in info["data"]["list"]["vlist"]] return up_name, total_size, bv_ids async def get_up_info(client: httpx.AsyncClient, url_or_mid: str): if url_or_mid.startswith("http"): mid = _find_mid(url_or_mid) else: mid = url_or_mid params = {"mid": mid} await _add_sign(client, params) res = await req_retry(client, "https://api.bilibili.com/x/space/wbi/acc/info", params=params) data = json.loads(res.text)['data'] return data class Media(BaseModel): base_url: str backup_url: Optional[List[str]] = None size: Optional[int] = None width: Optional[int] = None height: Optional[int] = None suffix: Optional[str] = None quality: Optional[str] = None codec: Optional[str] = None segment_base: Optional[dict] = None @property def urls(self): """the copy of all url including backup""" return [self.base_url, *self.backup_url] if self.backup_url else [self.base_url] class Dash(BaseModel): duration: int videos: List[Media] audios: List[Media] video_formats: Dict[str, Dict[str, Media]] audio_formats: Dict[str, Optional[Media]] @classmethod def from_dict(cls, play_info: dict): dash = play_info['dash'] # may raise KeyError video_formats = {} quality_map = {} for d in play_info['support_formats']: quality_map[d['quality']] = d['new_description'] video_formats[d['new_description']] = {} videos = [] for d in dash['video']: if d['id'] not in quality_map: continue # https://github.com/HFrost0/bilix/issues/93 quality = quality_map[d['id']] m = Media(quality=quality, codec=d['codecs'], **d) video_formats[quality][m.codec] = m videos.append(m) audios = [] audio_formats = {} if dash.get('audio', None): # some video have NO audio d = dash['audio'][0] m = Media(quality="default", suffix='.aac', codec=d['codecs'], **d) audios.append(m) audio_formats[m.quality] = m if dash['dolby']['type'] != 0: quality = "dolby" audio_formats[quality] = None if dash['dolby'].get('audio', None): d = dash['dolby']['audio'][0] m = Media(quality=quality, suffix='.eac3', codec=d['codecs'], **d) audios.append(m) audio_formats[m.quality] = m if dash.get('flac', None): quality = "flac" audio_formats[quality] = None if d := dash['flac']['audio']: m = Media(quality=quality, suffix='.flac', codec=d['codecs'], **d) audios.append(m) audio_formats[m.quality] = m return cls(duration=dash['duration'], videos=videos, audios=audios, video_formats=video_formats, audio_formats=audio_formats) def choose_video(self, quality: Union[int, str], video_codec: str) -> Media: # 1. absolute choice with quality name like 4k 1080p '1080p 60帧' if isinstance(quality, str): for k in self.video_formats: if k.upper().startswith(quality.upper()): # incase 1080P->1080p for c in self.video_formats[k]: if c.startswith(video_codec): return self.video_formats[k][c] # 2. relative choice else: keys = [k for k in self.video_formats.keys() if self.video_formats[k]] quality = min(quality, len(keys) - 1) k = keys[quality] for c in self.video_formats[k]: if c.startswith(video_codec): return self.video_formats[k][c] raise KeyError(f"no match for video quality: {quality} codec: {video_codec}") def choose_audio(self, audio_codec: str) -> Optional[Media]: if len(self.audios) == 0: # some video has no audio return for k in self.audio_formats: if self.audio_formats[k] and self.audio_formats[k].codec.startswith(audio_codec): return self.audio_formats[k] raise KeyError(f'no match for audio codec: {audio_codec}') def choose_quality(self, quality: Union[str, int], codec: str = '') -> Tuple[Media, Optional[Media]]: v_codec, a_codec, *_ = codec.split(':') + [""] video, audio = self.choose_video(quality, v_codec), self.choose_audio(a_codec) return video, audio class Status(BaseModel): view: int = Field(description="播放量") danmaku: int = Field(description="弹幕数") coin: int = Field(description="硬币数") like: int = Field(description="点赞数") reply: int = Field(description="回复数") favorite: int = Field(description="收藏数") share: int = Field(description="分享数") follow: Optional[int] = Field(default=None, description="追剧数/追番数") @field_validator('view', mode="before") @classmethod def no_view(cls, v): return 0 if v == '--' else v class Page(BaseModel): p_name: str p_url: str class VideoInfo(BaseModel): title: str aid: int cid: int ep_id: Optional[int] = None p: int pages: List[Page] # [[p_name, p_url], ...] img_url: str status: Status bvid: Optional[str] = None dash: Optional[Dash] = None other: Optional[List[Media]] = None # durl resource: flv, mp4. desc: Optional[str] = None tags: Optional[List[str]] = None def _parse_bv_html(url, html: str) -> VideoInfo: init_info = re.search(r'', html).groups()[0] data = json.loads(data) queries = data['props']['pageProps']['dehydratedState']['queries'] season_info = queries[0]['state']['data']['seasonInfo'] media_info = season_info['mediaInfo'] stat = media_info['stat'] status = Status(coin=stat['coins'], view=stat['views'], danmaku=stat['danmakus'], share=stat['share'], like=stat['likes'], reply=stat['reply'], favorite=stat['favorite'], follow=stat['favorites']) title = legal_title(media_info['title']) desc = media_info['evaluate'] episodes = media_info['episodes'] path: str = url.split('?')[0].split('/')[-1] ep_id = path[2:] if path.startswith('ep') else str(episodes[0]["ep_id"]) p = 0 aid, cid, bvid = 0, 0, "" pages = [] img_url = '' for i, ep in enumerate(episodes): if str(ep["ep_id"]) == ep_id: p = i aid, cid, bvid = ep["aid"], ep["cid"], ep["bvid"] img_url = ep["cover"] pages.append(Page(p_name=legal_title(ep["playerEpTitle"]), p_url=ep["link"])) video_info = VideoInfo( title=title, status=status, desc=desc, aid=aid, cid=cid, bvid=bvid, p=p, pages=pages, img_url=img_url, ep_id=ep_id, ) return video_info @raise_api_error async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: try: # try to get video info from web front-end first return await _get_video_info_from_html(client, url) except APIInvalidError: # try to get video info from api if web front-end is banned return await _get_video_info_from_api(client, url) async def _get_video_info_from_html(client: httpx.AsyncClient, url: str) -> VideoInfo: res = await req_retry(client, url, follow_redirects=True) if str(res.url).startswith("https://www.bilibili.com/festival"): raise APIInvalidError("特殊节日页面", url) html = res.text if "window._riskdata_" in html: raise APIInvalidError("web 前端访问被风控", url) if "window.__INITIAL_STATE__" in html: return _parse_bv_html(url, html) elif "__NEXT_DATA__" in html: video_info = _parse_ep_html(url, html) await _attach_ep_dash(client, video_info) return video_info else: raise APIUnsupportedError("未知页面类型", url) async def _get_video_info_from_api(client: httpx.AsyncClient, url: str) -> VideoInfo: assert '/av' in url or '/BV' in url # TODO: only support BV or av url video_info = await _get_video_basic_info_from_api(client, url) # can not be parallelized since we need to get cid first await _attach_dash_and_durl_from_api(client, video_info) return video_info async def _attach_ep_dash(client: httpx.AsyncClient, video_info: VideoInfo): params = { 'support_multi_audio': True, 'avid': video_info.aid, 'cid': video_info.cid, 'fnver': 0, 'fnval': 4048, 'fourk': 1, 'ep_id': video_info.ep_id, } res = await req_retry(client, 'https://api.bilibili.com/pgc/player/web/v2/playurl', params=params) res = json.loads(res.text) data = res['result']['video_info'] if "dash" in data: video_info.dash = Dash.from_dict(data) if "durl" in data: other = [] for i in data['durl']: suffix = re.search(r'\.([a-zA-Z0-9]+)\?', i['url']).group(1) other.append(Media(base_url=i['url'], backup_url=i['backup_url'], size=i['size'], suffix=suffix)) video_info.other = other async def _attach_dash_and_durl_from_api(client: httpx.AsyncClient, video_info: VideoInfo): params = {'cid': video_info.cid, 'bvid': video_info.bvid, 'qn': 120, # 如无 dash 资源(少数老视频),fallback 到 4K 超清 durl 'fnval': 4048, # 如 dash 资源可用,请求 dash 格式的全部可用流 'fourk': 1, # 请求 4k 资源 'fnver': 0, 'platform': 'pc', 'otype': 'json'} dash_response = await req_retry(client, 'https://api.bilibili.com/x/player/playurl', params=params, follow_redirects=True) dash_json = json.loads(dash_response.text) if dash_json['code'] != 0: raise APIResourceError(dash_json['message'], video_info.bvid) dash, other = None, [] if 'dash' in dash_json['data']: dash = Dash.from_dict(dash_json['data']) if 'durl' in dash_json['data']: for i in dash_json['data']['durl']: suffix = re.search(r'\.([a-zA-Z0-9]+)\?', i['url']).group(1) other.append(Media(base_url=i['url'], backup_url=i['backup_url'], size=i['size'], suffix=suffix)) video_info.dash, video_info.other = dash, other async def _get_video_basic_info_from_api(client: httpx.AsyncClient, url) -> VideoInfo: """通过 view api 获取视频的基本信息,不包括 dash 或 durl(other) 视频流资源""" aid, bvid, selected_page_num = parse_ids_from_url(url) params = {'bvid': bvid} if bvid else {'aid': aid} r = await req_retry(client, 'https://api.bilibili.com/x/web-interface/view', params=params, follow_redirects=True) raw_json = json.loads(r.text) if raw_json['code'] != 0: raise APIResourceError(raw_json['message'], raw_json['message']) title = legal_title(raw_json['data']['title']) h1_title = title # TODO: 根据视频类型,使 h1_title 与实际网页标题的格式一致 aid = raw_json['data']['aid'] bvid = raw_json['data']['bvid'] base_url = f"https://www.bilibili.com/video/{bvid}/" status = Status(**raw_json['data']['stat']) pages = [] p = None cid = None for idx, i in enumerate(raw_json['data']['pages']): page_num = int(i['page']) if page_num == selected_page_num: p = idx # selected_page_num 的分p 在 pages 列表中的 index 位置 cid = int(i['cid']) # selected_page_num 的分p 的 cid p_url = f"{base_url}?p={page_num}" p_name = f"P{page_num}-{i['part']}" pages.append(Page(p_name=p_name, p_url=p_url)) assert p is not None, f"没有找到分P: p{selected_page_num},请检查输入" # cid 也会是 None img_url = raw_json['data']['pic'] basic_video_info = VideoInfo(title=title, h1_title=h1_title, aid=aid, cid=cid, status=status, p=p, pages=pages, img_url=img_url, bvid=bvid, dash=None, other=None) return basic_video_info @raise_api_error async def get_subtitle_info(client: httpx.AsyncClient, bvid, cid): params = {'bvid': bvid, 'cid': cid} res = await req_retry(client, 'https://api.bilibili.com/x/player/v2', params=params) info = json.loads(res.text) if info['code'] == -400: raise APIError(f'未找到字幕信息', params) return [[f'http:{i["subtitle_url"]}', i['lan_doc']] for i in info['data']['subtitle']['subtitles']] @raise_api_error async def get_dm_urls(client: httpx.AsyncClient, aid, cid) -> List[str]: params = {'oid': cid, 'pid': aid, 'type': 1} res = await req_retry(client, f'https://api.bilibili.com/x/v2/dm/web/view', params=params) view = parse_view(res.content) total = int(view['dmSge']['total']) return [f'https://api.bilibili.com/x/v2/dm/web/seg.so?oid={cid}&type=1&segment_index={i + 1}' for i in range(total)] ================================================ FILE: bilix/sites/bilibili/api_test.py ================================================ import httpx import pytest import asyncio from datetime import datetime, timedelta from bilix.sites.bilibili import api client = httpx.AsyncClient(**api.dft_client_settings) # https://stackoverflow.com/questions/61022713/pytest-asyncio-has-a-closed-event-loop-but-only-when-running-all-tests @pytest.fixture(scope="session") def event_loop(): try: loop = asyncio.get_running_loop() except RuntimeError: loop = asyncio.new_event_loop() yield loop loop.close() @pytest.mark.asyncio async def test_get_cate_meta(): data = await api.get_cate_meta(client) assert '舞蹈' in data and "sub" in data["舞蹈"] assert "宅舞" in data and 'tid' in data['宅舞'] @pytest.mark.asyncio async def test_get_list_info(): list_name, up_name, bvids = await api.get_list_info( client, "https://space.bilibili.com/369750017/channel/seriesdetail?sid=2458228") assert list_name == '瘦腰腹跟练' assert len(bvids) > 0 and bvids[0].startswith('BV') @pytest.mark.asyncio async def test_get_collect_info(): list_name, up_name, bvids = await api.get_collect_info( client, "https://space.bilibili.com/54296062/channel/collectiondetail?sid=412818&ctype=0") assert list_name == 'asyncio协程' assert len(bvids) > 0 and bvids[0].startswith('BV') @pytest.mark.asyncio async def test_get_favour_page_info(): fav_name, up_name, total_size, bvids = await api.get_favour_page_info(client, "69072721") assert fav_name == '默认收藏夹' assert len(bvids) > 0 and bvids[0].startswith('BV') @pytest.mark.asyncio async def test_get_cate_page_info(): time_to = datetime.now() time_from = time_to - timedelta(days=7) time_from, time_to = time_from.strftime('%Y%m%d'), time_to.strftime('%Y%m%d') meta = await api.get_cate_meta(client) bvids = await api.get_cate_page_info(client, cate_id=meta['宅舞']['tid'], time_from=time_from, time_to=time_to) assert len(bvids) > 0 and bvids[0].startswith('BV') @pytest.mark.asyncio async def test_get_up_video_info(): up_name, total_size, bvids = await api.get_up_video_info(client, "316568752", keyword="什么") assert len(bvids) > 0 and bvids[0].startswith('BV') # GitHub actions problem... # @pytest.mark.asyncio # async.md def test_get_special_audio(): # # Dolby # data = await api.get_video_info(client, 'https://www.bilibili.com/video/BV13L4y1K7th') # assert data.dash['dolby']['type'] != 0 # # Hi-Res # data = await api.get_video_info(client, 'https://www.bilibili.com/video/BV16K411S7sk') # assert data.dash['flac']['display'] @pytest.mark.asyncio async def test_get_video_info(): methods = (api._get_video_info_from_html, api._get_video_info_from_api) for method in methods: # 单个bv视频 data = await method(client, "https://www.bilibili.com/video/BV1sS4y1b7qb?spm_id_from=333.999.0.0") assert len(data.pages) == 1 assert data.p == 0 assert data.bvid assert data.img_url.startswith('http://') or data.img_url.startswith('https://') assert data.dash # 多个bv视频 data = await method(client, "https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") assert len(data.pages) > 1 assert data.p == 4 assert data.bvid if method is api._get_video_info_from_api: continue # 电视剧 data = await method(client, "https://www.bilibili.com/bangumi/play/ss24053?spm_id_from=333.337.0.0") assert len(data.pages) > 1 assert data.status.follow # 动漫 data = await method(client, "https://www.bilibili.com/bangumi/play/ss5043?spm_id_from=333.337.0.0") assert len(data.pages) > 1 assert data.status.follow # 电影 data = await method(client, "https://www.bilibili.com/bangumi/play/ss33343?theme=movie&spm_id_from=333.337.0.0") assert data.title == '天气之子' assert data.status.follow # 纪录片 data = await method(client, "https://www.bilibili.com/bangumi/play/ss40509?from_spmid=666.9.hotlist.3") assert len(data.pages) > 1 assert data.status.follow @pytest.mark.asyncio async def test_get_subtitle_info(): data = await api.get_video_info(client, "https://www.bilibili.com/video/BV1hS4y1m7Ma") data = await api.get_subtitle_info(client, data.bvid, data.cid) assert data[0][0].startswith('http') assert data[0][1] @pytest.mark.asyncio async def test_get_dm_info(): data = await api.get_video_info(client, "https://www.bilibili.com/bangumi/play/ss33343?theme=movie&spm_id_from=333.337.0.0") data = await api.get_dm_urls(client, data.aid, data.cid) assert len(data) > 0 ================================================ FILE: bilix/sites/bilibili/downloader.py ================================================ import asyncio import functools import re from pathlib import Path from typing import Union, Sequence, Tuple, List import aiofiles import httpx from datetime import datetime, timedelta from . import api from bilix.download.base_downloader_part import BaseDownloaderPart from bilix._process import SingletonPPE from bilix.utils import legal_title, cors_slice, valid_sess_data, t2s, json2srt from bilix.download.utils import req_retry, path_check from bilix.exception import HandleMethodError, APIUnsupportedError, APIResourceError, APIError from bilix.cli.assign import kwargs_filter, auto_assemble from bilix import ffmpeg from danmakuC.bilibili import proto2ass class DownloaderBilibili(BaseDownloaderPart): cookie_domain = "bilibili.com" # for load cookies quickly pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(bilibili\.com|b23\.tv)") def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int, None] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, # unique params sess_data: str = None, video_concurrency: Union[int, asyncio.Semaphore] = 3, hierarchy: bool = True, ): """ :param client: :param browser: :param speed_limit: :param stream_retry: :param progress: :param logger: :param sess_data: bilibili SESSDATA cookie :param part_concurrency: 媒体分段并发数 :param video_concurrency: 视频并发数 :param hierarchy: 是否使用层级目录 """ client = client or httpx.AsyncClient(**api.dft_client_settings) super(DownloaderBilibili, self).__init__( client=client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency, ) client.cookies.set('SESSDATA', valid_sess_data(sess_data)) self._cate_meta = None self.v_sema = asyncio.Semaphore(video_concurrency) self.api_sema = asyncio.Semaphore(video_concurrency) self.hierarchy = hierarchy self.title_overflow = 50 @classmethod def parse_url(cls, url: str): if re.match(r'https://space\.bilibili\.com/\d+/favlist\?fid=\d+', url): return cls.get_favour elif re.match(r'https://space\.bilibili\.com/\d+/channel/seriesdetail\?sid=\d+', url): return cls.get_collect_or_list elif re.match(r'https://space\.bilibili\.com/\d+/channel/collectiondetail\?sid=\d+', url): return cls.get_collect_or_list elif re.match(r'https://space\.bilibili\.com/\d+', url): # up space url return cls.get_up elif re.search(r'(www\.bilibili\.com)|(b23\.tv)', url): return cls.get_video raise ValueError(f'{url} no match for bilibili') async def get_collect_or_list(self, url, path=Path('.'), quality=0, image=False, subtitle=False, dm=False, only_audio=False, codec: str = ''): """ 下载合集或视频列表 :cli: short: col :param url: 合集或视频列表详情页url :param path: 保存路径 :param quality: :param image: :param subtitle: :param dm: :param only_audio: :param codec: :return: """ if 'series' in url: list_name, up_name, bvids = await api.get_list_info(self.client, url) name = legal_title(f"【视频列表】{up_name}", list_name) elif 'collection' in url: col_name, up_name, bvids = await api.get_collect_info(self.client, url) name = legal_title(f"【合集】{up_name}", col_name) else: raise ValueError(f'{url} invalid for get_collect_or_list') if self.hierarchy: path /= name path.mkdir(parents=True, exist_ok=True) await asyncio.gather( *[self.get_series(f"https://www.bilibili.com/video/{i}", path=path, quality=quality, codec=codec, image=image, subtitle=subtitle, dm=dm, only_audio=only_audio) for i in bvids]) async def get_favour(self, url_or_fid, path=Path('.'), num=20, keyword='', quality=0, series=True, image=False, subtitle=False, dm=False, only_audio=False, codec: str = ''): """ 下载收藏夹内的视频 :cli: short: fav :param url_or_fid: 收藏夹url或收藏夹id :param path: 保存路径 :param num: 下载数量 :param keyword: 搜索关键词 :param quality: 画面质量,0为可以观看的最高画质,越大质量越低,超过范围时自动选择最低画质,或者直接使用字符串指定'1080p'等名称 :param series: 每个视频是否下载所有p,False时仅下载系列中的第一个视频 :param image: 是否下载封面 :param subtitle: 是否下载字幕 :param dm: 是否下载弹幕 :param only_audio: 是否仅下载音频 :param codec: :return: """ fav_name, up_name, total_size, bvids = await api.get_favour_page_info(self.client, url_or_fid, keyword=keyword) if self.hierarchy: name = legal_title(f"【收藏夹】{up_name}-{fav_name}") path /= name path.mkdir(parents=True, exist_ok=True) total = min(total_size, num) ps = 20 page_nums = total // ps + min(1, total % ps) cors = [] for i in range(page_nums): if i + 1 == page_nums: num = total - (page_nums - 1) * ps else: num = ps cors.append(self._get_favor_by_page( url_or_fid, path, i + 1, num, keyword, quality, series, image, subtitle, dm, only_audio, codec=codec)) await asyncio.gather(*cors) async def _get_favor_by_page(self, url_or_fid, path: Path, pn=1, num=20, keyword='', quality=0, series=True, image=False, subtitle=False, dm=False, only_audio=False, codec=''): ps = 20 num = min(ps, num) _, _, _, bvids = await api.get_favour_page_info(self.client, url_or_fid, pn, ps, keyword) cors = [] for i in bvids[:num]: func = self.get_series if series else self.get_video # noinspection PyArgumentList cors.append(func(f'https://www.bilibili.com/video/{i}', path=path, quality=quality, codec=codec, image=image, subtitle=subtitle, dm=dm, only_audio=only_audio)) await asyncio.gather(*cors) @property async def cate_meta(self): if not self._cate_meta: self._cate_meta = asyncio.ensure_future(api.get_cate_meta(self.client)) self._cate_meta = await self._cate_meta elif asyncio.isfuture(self._cate_meta): await self._cate_meta return self._cate_meta async def get_cate(self, cate_name: str, path=Path('.'), num=10, order='click', keyword='', days=7, quality=0, series=True, image=False, subtitle=False, dm=False, only_audio=False, codec='', ): """ 下载分区视频 :cli: short: cate :param cate_name: 分区名称 :param path: 保存路径 :param num: 下载数量 :param order: 何种排序,click播放数,scores评论数,stow收藏数,coin硬币数,dm弹幕数 :param keyword: 搜索关键词 :param days: 过去days天中的结果 :param quality: 画面质量,0为可以观看的最高画质,越大质量越低,超过范围时自动选择最低画质,或者直接使用字符串指定'1080p'等名称 :param series: 每个视频是否下载所有p,False时仅下载系列中的第一个视频 :param image: 是否下载封面 :param subtitle: 是否下载字幕 :param dm: 是否下载弹幕 :param only_audio: 是否仅下载音频 :param codec: :return: """ cate_meta = await self.cate_meta if cate_name not in cate_meta: return self.logger.error(f'未找到分区 {cate_name}') if 'subChannelId' not in cate_meta[cate_name]: sub_names = [i['name'] for i in cate_meta[cate_name]['sub']] return self.logger.error(f'{cate_name} 是主分区,仅支持子分区,试试 {sub_names}') if self.hierarchy: path /= legal_title(f"【分区】{cate_name}") path.mkdir(parents=True, exist_ok=True) cate_id = cate_meta[cate_name]['tid'] time_to = datetime.now() time_from = time_to - timedelta(days=days) time_from, time_to = time_from.strftime('%Y%m%d'), time_to.strftime('%Y%m%d') pagesize = 30 page = 1 cors = [] while num > 0: cors.append(self._get_cate_by_page( cate_id, path, time_from, time_to, page, min(pagesize, num), order, keyword, quality, series, image=image, subtitle=subtitle, dm=dm, only_audio=only_audio, codec=codec)) num -= pagesize page += 1 await asyncio.gather(*cors) async def _get_cate_by_page( self, cate_id, path: Path, time_from, time_to, pn=1, num=30, order='click', keyword='', quality=0, series=True, image=False, subtitle=False, dm=False, only_audio=False, codec=''): bvids = await api.get_cate_page_info(self.client, cate_id, time_from, time_to, pn, 30, order, keyword) bvids = bvids[:num] func = self.get_series if series else self.get_video # noinspection PyArgumentList cors = [func(f"https://www.bilibili.com/video/{i}", path=path, quality=quality, codec=codec, image=image, subtitle=subtitle, dm=dm, only_audio=only_audio) for i in bvids] await asyncio.gather(*cors) async def get_up( self, url_or_mid: str, path=Path('.'), num=10, order='pubdate', keyword='', quality=0, series=True, image=False, subtitle=False, dm=False, only_audio=False, codec='', ): """ 下载up主视频 :cli: short: up :param url_or_mid: b站用户空间页面url 或b站用户id,在空间页面的url中可以找到 :param path: 保存路径 :param num: 下载总数 :param order: 何种排序,b站支持:最新发布pubdate,最多播放click,最多收藏stow :param keyword: 过滤关键词 :param quality: 画面质量,0为可以观看的最高画质,越大质量越低,超过范围时自动选择最低画质,或者直接使用字符串指定'1080p'等名称 :param series: 每个视频是否下载所有p,False时仅下载系列中的第一个视频 :param image: 是否下载封面 :param subtitle: 是否下载字幕 :param dm: 是否下载弹幕 :param only_audio: 是否仅下载音频 :param codec: :return: """ ps = 30 up_name, total_size, bv_ids = await api.get_up_video_info(self.client, url_or_mid, 1, ps, order, keyword) if self.hierarchy: path /= legal_title(f"【up】{up_name}") path.mkdir(parents=True, exist_ok=True) num = min(total_size, num) page_nums = num // ps + min(1, num % ps) cors = [] for i in range(page_nums): if i + 1 == page_nums: p_num = num - (page_nums - 1) * ps else: p_num = ps cors.append(self._get_up_by_page( url_or_mid, path, i + 1, p_num, order, keyword, quality, series, image=image, subtitle=subtitle, dm=dm, only_audio=only_audio, codec=codec)) await asyncio.gather(*cors) async def _get_up_by_page(self, url_or_mid, path: Path, pn=1, num=30, order='pubdate', keyword='', quality=0, series=True, image=False, subtitle=False, dm=False, only_audio=False, codec='', ): ps = 30 num = min(ps, num) _, _, bvids = await api.get_up_video_info(self.client, url_or_mid, pn, ps, order, keyword) bvids = bvids[:num] func = self.get_series if series else self.get_video # noinspection PyArgumentList await asyncio.gather( *[func(f'https://www.bilibili.com/video/{bv}', path=path, quality=quality, codec=codec, image=image, subtitle=subtitle, dm=dm, only_audio=only_audio) for bv in bvids]) async def get_series(self, url: str, path=Path('.'), quality: Union[str, int] = 0, image=False, subtitle=False, dm=False, only_audio=False, p_range: Sequence[int] = None, codec: str = ''): """ 下载某个系列(包括up发布的多p投稿,动画,电视剧,电影等)的所有视频。只有一个视频的情况下仍然可用该方法 :cli: short: s :param url: 系列中任意一个视频的url :param path: 保存路径 :param quality: 画面质量,0为可以观看的最高画质,越大质量越低,超过范围时自动选择最低画质,或者直接使用字符串指定'1080p'等名称 :param image: 是否下载封面 :param subtitle: 是否下载字幕 :param dm: 是否下载弹幕 :param only_audio: 是否仅下载音频 :param p_range: 下载集数范围,例如(1, 3):P1至P3 :param codec: 视频编码(可通过info获取) :return: """ try: async with self.api_sema: video_info = await api.get_video_info(self.client, url) except (APIResourceError, APIUnsupportedError) as e: return self.logger.warning(e) if self.hierarchy and len(video_info.pages) > 1: path /= video_info.title path.mkdir(parents=True, exist_ok=True) cors = [self.get_video(p.p_url, path=path, quality=quality, image=image, subtitle=subtitle, dm=dm, only_audio=only_audio, codec=codec, video_info=video_info if idx == video_info.p else None) for idx, p in enumerate(video_info.pages)] if p_range: cors = cors_slice(cors, p_range) await asyncio.gather(*cors) async def get_video(self, url: str, path=Path('.'), quality: Union[str, int] = 0, image=False, subtitle=False, dm=False, only_audio=False, codec: str = '', time_range: Tuple[int, int] = None, video_info: api.VideoInfo = None): """ 下载单个视频 :cli: short: v :param url: 视频的url :param path: 保存路径 :param quality: 画面质量,0为可以观看的最高画质,越大质量越低,超过范围时自动选择最低画质,或者直接使用字符串指定'1080p'等名称 :param image: 是否下载封面 :param subtitle: 是否下载字幕 :param dm: 是否下载弹幕 :param only_audio: 是否仅下载音频 :param codec: 视频编码(可通过codec获取) :param time_range: 切片的时间范围 :param video_info: 额外数据,提供时不用再次请求页面 :return: """ async with self.v_sema: if not video_info: try: video_info = await api.get_video_info(self.client, url) except (APIResourceError, APIUnsupportedError) as e: return self.logger.warning(e) p_name = legal_title(video_info.pages[video_info.p].p_name) task_name = legal_title(video_info.title, p_name) # if title is too long, use p_name as base_name base_name = p_name if len(video_info.title) > self.title_overflow and self.hierarchy and p_name else \ task_name media_name = base_name if not time_range else legal_title(base_name, *map(t2s, time_range)) media_cors = [] task_id = await self.progress.add_task(total=None, description=task_name) if video_info.dash: try: # choose video quality video, audio = video_info.dash.choose_quality(quality, codec) except KeyError: self.logger.warning( f"{task_name} 清晰度<{quality}> 编码<{codec}>不可用,请检查输入是否正确或是否需要大会员") else: tmp: List[Tuple[api.Media, Path]] = [] # 1. only video if not audio and not only_audio: tmp.append((video, path / f'{media_name}.mp4')) # 2. video and audio elif audio and not only_audio: exists, media_path = path_check(path / f'{media_name}.mp4') if exists: self.logger.info(f'[green]已存在[/green] {media_path.name}') else: tmp.append((video, path / f'{media_name}-v')) tmp.append((audio, path / f'{media_name}-a')) # task need to be merged await self.progress.update(task_id=task_id, upper=ffmpeg.combine) # 3. only audio elif audio and only_audio: tmp.append((audio, path / f'{media_name}{audio.suffix}')) else: self.logger.warning(f"No audio for {task_name}") # convert to coroutines if not time_range: media_cors.extend(self.get_file(t[0].urls, path=t[1], task_id=task_id) for t in tmp) else: if len(tmp) > 0: fut = asyncio.Future() # to fix key frame v = tmp[0] media_cors.append(self.get_media_clip(v[0].urls, v[1], time_range, init_range=v[0].segment_base['initialization'], seg_range=v[0].segment_base['index_range'], set_s=fut, task_id=task_id)) if len(tmp) > 1: # with audio a = tmp[1] media_cors.append(self.get_media_clip(a[0].urls, a[1], time_range, init_range=a[0].segment_base['initialization'], seg_range=a[0].segment_base['index_range'], get_s=fut, task_id=task_id)) elif video_info.other: self.logger.warning( f"{task_name} 未解析到dash资源,转入durl mp4/flv下载(不需要会员的电影/番剧预览,不支持dash的视频)") media_name = base_name if len(video_info.other) == 1: m = video_info.other[0] media_cors.append( self.get_file(m.urls, path=path / f'{media_name}.{m.suffix}', task_id=task_id)) else: exist, media_path = path_check(path / f'{media_name}.mp4') if exist: self.logger.info(f'[green]已存在[/green] {media_path.name}') else: p_sema = asyncio.Semaphore(self.part_concurrency) async def _get_file(media: api.Media, p: Path) -> Path: async with p_sema: return await self.get_file(media.urls, path=p, task_id=task_id) for i, m in enumerate(video_info.other): f = f'{media_name}-{i}.{m.suffix}' media_cors.append(_get_file(m, path / f)) await self.progress.update(task_id=task_id, upper=ffmpeg.concat) else: self.logger.warning(f'{task_name} 需要大会员或该地区不支持') # additional task add_cors = [] if image or subtitle or dm: extra_path = path / "extra" if self.hierarchy else path extra_path.mkdir(exist_ok=True) if image: add_cors.append(self.get_static(video_info.img_url, path=extra_path / base_name)) if subtitle: add_cors.append(self.get_subtitle(url, path=extra_path, video_info=video_info)) if dm: try: width, height = video.width, video.height except UnboundLocalError: width, height = 1920, 1080 add_cors.append(self.get_dm( url, path=extra_path, convert_func=self._dm2ass_factory(width, height), video_info=video_info)) path_lst, _ = await asyncio.gather(asyncio.gather(*media_cors), asyncio.gather(*add_cors)) if upper := self.progress.tasks[task_id].fields.get('upper', None): await upper(path_lst, media_path) self.logger.info(f'[cyan]已完成[/cyan] {media_path.name}') await self.progress.update(task_id, visible=False) @staticmethod def _dm2ass_factory(width: int, height: int): async def dm2ass(protobuf_bytes: bytes) -> bytes: loop = asyncio.get_event_loop() f = functools.partial(proto2ass, protobuf_bytes, width, height, font_size=width / 40, ) content = await loop.run_in_executor(SingletonPPE(), f) return content.encode('utf-8') return dm2ass async def get_dm(self, url, path=Path('.'), update=False, convert_func=None, video_info=None): """ 下载视频的弹幕 :cli: short: dm :param url: 视频url :param path: 保存路径 :param update: 是否更新覆盖之前下载的弹幕文件 :param convert_func: :param video_info: 额外数据,提供则不再访问前端 :return: """ if not video_info: video_info = await api.get_video_info(self.client, url) aid, cid = video_info.aid, video_info.cid file_type = '.' + ('pb' if not convert_func else convert_func.__name__.split('2')[-1]) p_name = video_info.pages[video_info.p].p_name # to avoid file name too long bug if len(video_info.title) > self.title_overflow and self.hierarchy and p_name: file_name = legal_title(p_name, "弹幕") + file_type else: file_name = legal_title(video_info.title, p_name, "弹幕") + file_type file_path = path / file_name exist, file_path = path_check(file_path) if not update and exist: self.logger.info(f"[green]已存在[/green] {file_name}") return file_path dm_urls = await api.get_dm_urls(self.client, aid, cid) cors = [req_retry(self.client, dm_url) for dm_url in dm_urls] results = await asyncio.gather(*cors) content = b''.join(res.content for res in results) content = convert_func(content) if convert_func else content if asyncio.iscoroutine(content): content = await content async with aiofiles.open(file_path, 'wb') as f: await f.write(content) self.logger.info(f"[cyan]已完成[/cyan] {file_name}") return file_path async def get_subtitle(self, url, path=Path('.'), convert_func=json2srt, video_info=None): """ 下载视频的字幕文件 :cli: short: sub :param url: 视频url :param path: 字幕文件保存路径 :param convert_func: function used to convert original subtitle text :param video_info: 额外数据,提供则不再访问前端 :return: """ if not video_info: video_info = await api.get_video_info(self.client, url) p, cid = video_info.p, video_info.cid p_name = video_info.pages[p].p_name try: subtitles = await api.get_subtitle_info(self.client, video_info.bvid, cid) except APIError as e: return self.logger.warning(e) cors = [] for sub_url, sub_name in subtitles: if len(video_info.title) > self.title_overflow and self.hierarchy and p_name: file_name = legal_title(p_name, sub_name) else: file_name = legal_title(video_info.title, p_name, sub_name) cors.append(self.get_static(sub_url, path / file_name, convert_func=convert_func)) paths = await asyncio.gather(*cors) return paths @classmethod @auto_assemble def handle(cls, method: str, keys: Tuple[str, ...], options: dict): if cls.pattern.match(keys[0]) or method == 'cate' or method == 'get_cate': if method in {'auto', 'a'}: m = cls.parse_url(keys[0]) elif method in cls._cli_map: m = cls._cli_map[method] else: raise HandleMethodError(cls, method=method) d = cls(sess_data=options['cookie'], **kwargs_filter(cls, options)) return d, m ================================================ FILE: bilix/sites/bilibili/downloader_test.py ================================================ import httpx import pytest from bilix.sites.bilibili import DownloaderBilibili @pytest.mark.asyncio async def test_get_collect_or_list(): d = DownloaderBilibili() await d.get_collect_or_list('https://space.bilibili.com/54296062/channel/collectiondetail?sid=412818&ctype=0', quality=999) await d.get_collect_or_list('https://space.bilibili.com/8251621/channel/seriesdetail?sid=2323334&ctype=0', quality=999) await d.aclose() @pytest.mark.asyncio async def test_get_favour(): d = DownloaderBilibili() await d.get_favour("69072721", num=1, quality=999) await d.aclose() @pytest.mark.asyncio async def test_get_cate(): d = DownloaderBilibili() await d.get_cate("宅舞", num=1, order="click", keyword="jk", quality=1) await d.aclose() @pytest.mark.asyncio async def test_get_up(): d = DownloaderBilibili() await d.get_up("455511061", num=1, order="pubdate", quality=1) await d.aclose() @pytest.mark.asyncio async def test_get_series(): d = DownloaderBilibili() await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(5, 5), quality=999) # only audio await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(5, 5), only_audio=True) # image await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=3", p_range=(1, 1), image=True, quality=999) # 单个视频 await d.get_series("https://www.bilibili.com/video/BV1sS4y1b7qb?spm_id_from=333.999.0.0", quality=999) await d.aclose() @pytest.mark.asyncio async def test_get_dm(): d = DownloaderBilibili() await d.get_dm('https://www.bilibili.com/video/BV11Z4y1z7s8?spm_id_from=333.337.search-card.all.click') await d.aclose() @pytest.mark.asyncio async def test_get_subtitle(): d = DownloaderBilibili() await d.get_subtitle("https://www.bilibili.com/video/BV1hS4y1m7Ma") await d.aclose() @pytest.mark.asyncio async def test_choose_quality(): import os from bilix.sites.bilibili import api client = httpx.AsyncClient() client.cookies.set('SESSDATA', os.getenv('BILI_TOKEN')) # dolby data = await api.get_video_info(client, "https://www.bilibili.com/video/BV13L4y1K7th") try: video, audio = data.dash.choose_quality(quality=999, codec=":ec-3") except KeyError: assert not os.getenv("BILI_TOKEN") # normal data.dash.choose_quality(quality="360P", codec="hev") # hi-res data = await api.get_video_info(client, "https://www.bilibili.com/video/BV16K411S7sk") try: video, audio = data.dash.choose_quality(quality='1080P', codec="hev:fLaC") except KeyError: assert not os.getenv("BILI_TOKEN") ================================================ FILE: bilix/sites/bilibili/informer.py ================================================ import asyncio from typing import Tuple from rich.tree import Tree from .downloader import DownloaderBilibili from . import api from bilix.log import logger from rich import print as rprint from bilix.utils import convert_size from bilix.download.utils import req_retry from bilix.cli.assign import kwargs_filter class InformerBilibili(DownloaderBilibili): """A special downloader with functionality to log info of bilibili resources""" @classmethod def parse_url(cls, url: str): res = super().parse_url(url) func_name = res.__name__.replace("get_", "info_") return getattr(cls, func_name) async def info_key(self, key): await self.parse_url(key)(self, key) async def info_up(self, url: str): up_name, total_size, bvids = await api.get_up_video_info(self.client, url) rprint(up_name) async def info_favour(self, url: str): pass async def info_collect_or_list(self, url: str): pass async def info_video(self, url: str): video_info = await api.get_video_info(self.client, url) if video_info.dash is None and video_info.other is None: return logger.warning(f'{video_info.title} 需要大会员或该地区不支持') elif video_info.other and video_info.dash is None: return rprint(video_info.other) # todo: beautify durl info async def ensure_size(m: api.Media): if m.size is None: res = await req_retry(self.client, m.base_url, method='GET', headers={'Range': 'bytes=0-1'}) m.size = int(res.headers['Content-Range'].split('/')[-1]) dash = video_info.dash cors = [ensure_size(m) for m in dash.videos] + [ensure_size(m) for m in dash.audios] await asyncio.gather(*cors) tree = Tree( f"[bold reverse] {video_info.title}-{video_info.pages[video_info.p].p_name} [/]" f" {video_info.status.view:,}👀 {video_info.status.like:,}👍 {video_info.status.coin:,}🪙", guide_style="bold cyan") video_tree = tree.add("[bold]画面 Video") audio_tree = tree.add("[bold]声音 Audio") leaf_fmt = "codec: {codec:32} size: {size}" # for video for quality in dash.video_formats: p_tree = video_tree.add(quality) for c in dash.video_formats[quality]: m = dash.video_formats[quality][c] p_tree.add(leaf_fmt.format(codec=m.codec, size=convert_size(m.size))) if len(p_tree.children) == 0: p_tree.style = "rgb(242,93,142)" p_tree.add("需要登录或大会员") # for audio name_map = {"default": "默认音质", "dolby": "杜比全景声 Dolby", "flac": "Hi-Res无损"} for k in dash.audio_formats: sub_tree = audio_tree.add(name_map[k]) if m := dash.audio_formats[k]: sub_tree.add(leaf_fmt.format(codec=m.codec, size=convert_size(m.size))) else: sub_tree.style = "rgb(242,93,142)" sub_tree.add("需要登录或大会员") rprint(tree) @classmethod def handle(cls, method: str, keys: Tuple[str, ...], options: dict): if cls.pattern.match(keys[0]) and 'info' == method: informer = InformerBilibili(sess_data=options['cookie'], **kwargs_filter(cls, options)) # in order to maintain order async def temp(): for key in keys: if len(keys) > 1: logger.info(f"For {key}") await informer.info_key(key) return informer, temp() ================================================ FILE: bilix/sites/bilibili/informer_test.py ================================================ import pytest from bilix.sites.bilibili import InformerBilibili informer = InformerBilibili() @pytest.mark.asyncio async def test_bilibili_informer(): await informer.info_video('https://www.bilibili.com/video/BV1sG411A7r3') await informer.info_video('https://www.bilibili.com/video/BV1oG4y1Z7fx') await informer.info_video('https://www.bilibili.com/video/BV1eV411W7tt') await informer.info_video("https://www.bilibili.com/bangumi/play/ep508404/") ================================================ FILE: bilix/sites/bilibili/utils.py ================================================ import re def parse_ids_from_url(url_or_string: str): bvid, aid, page_num = None, None, 1 if re.match(r'https?://www.bilibili.com/video/BV\w+', url_or_string) or re.match(r'BV\w+', url_or_string): bvid = re.search(r'(BV\w+)', url_or_string).groups()[0] assert bvid.isalnum() elif re.match(r'https?://www.bilibili.com/video/av\d+', url_or_string) or re.match(r'av\d+', url_or_string): aid = re.search(r'av(\d+)', url_or_string).groups()[0] assert aid.isdigit() aid = int(aid) else: raise ValueError(f"{url_or_string} is not a valid bilibili video url") # ?p=123 or &p=123 if m := re.match(r'.*[?&]p=(\d+)', url_or_string): page_num = int(m.groups()[0]) assert page_num >= 1 return aid, bvid, page_num ================================================ FILE: bilix/sites/bilibili/utils_test.py ================================================ from bilix.sites.bilibili.utils import parse_ids_from_url def test_parse_ids_from_url(): strings = [ "https://www.bilibili.com/video/av170001", "http://www.bilibili.com/video/BV1Xx41117Tz/?ba=labala&p=3#time=1234", "av170001", "BV1sE411w7tQ?p=2&from=search", "https://www.bilibili.com/video/BV1xx411c7HW?p=1" ] results = [ (170001, None, 1), (None, 'BV1Xx41117Tz', 3), (170001, None, 1), (None, 'BV1sE411w7tQ', 2), (None, 'BV1xx411c7HW', 1) ] for index, string in enumerate(strings): assert parse_ids_from_url(string) == results[index] ================================================ FILE: bilix/sites/cctv/__init__.py ================================================ from .downloader import DownloaderCctv __all__ = ['DownloaderCctv'] ================================================ FILE: bilix/sites/cctv/api.py ================================================ import asyncio import re import json from typing import Sequence, Tuple import httpx import m3u8 from bilix.download.utils import req_retry, raise_api_error from bilix.utils import legal_title dft_client_settings = { 'headers': {'user-agent': 'PostmanRuntime/7.29.0'}, 'http2': True } @raise_api_error async def get_id(client: httpx.AsyncClient, url: str) -> Tuple[str, str, str]: res_web = await req_retry(client, url) pid = re.findall(r'guid ?= ?"(\w+)"', res_web.text)[0] vide = re.findall(r'/(VIDE\w+)\.', url)[0] try: vida = re.findall(r'videotvCodes ?= ?"(\w+)"', res_web.text)[0] except IndexError: vida = None return pid, vide, vida @raise_api_error async def get_media_info(client: httpx.AsyncClient, pid: str) -> Tuple[str, Sequence[str]]: """ :param pid: :param client: :return: title and m3u8 urls sorted by quality """ res = await req_retry(client, f'https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid={pid}') info_data = json.loads(res.text) # extract title = legal_title(info_data['title']) m3u8_main_url = info_data['hls_url'] res = await req_retry(client, m3u8_main_url) m3u8_info = m3u8.loads(res.text) if m3u8_info.base_uri is None: m3u8_info.base_uri = re.match(r'(https?://[^/]*)/', m3u8_main_url).groups()[0] m3u8_urls = list(sorted((i.absolute_uri for i in m3u8_info.playlists), reverse=True, key=lambda s: int(re.findall(r'/(\d+).m3u8', s)[0]))) return title, m3u8_urls @raise_api_error async def get_series_info(client: httpx.AsyncClient, vide: str, vida: str) -> Tuple[str, Sequence[str]]: """ :param vide: :param vida: :param client: :return: title and list of guid(pid) """ params = {'mode': 0, 'id': vida, 'serviceId': 'tvcctv', 'p': 1, 'n': 999} res_meta, res_list = await asyncio.gather( req_retry(client, f"https://api.cntv.cn/NewVideoset/getVideoAlbumInfoByVideoId?id={vide}&serviceId=tvcctv"), req_retry(client, f'https://api.cntv.cn/NewVideo/getVideoListByAlbumIdNew', params=params) ) meta_data = json.loads(res_meta.text) list_data = json.loads(res_list.text) # extract title = legal_title(meta_data['data']['title']) pids = [i['guid'] for i in list_data['data']['list']] return title, pids ================================================ FILE: bilix/sites/cctv/api_test.py ================================================ import httpx import pytest from bilix.sites.cctv import api client = httpx.AsyncClient(**api.dft_client_settings) @pytest.mark.asyncio async def test_get_video_info(): pid, vide, vida = await api.get_id(client, "https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml") data = await api.get_media_info(client, pid) data = await api.get_series_info(client, vide, vida) pass ================================================ FILE: bilix/sites/cctv/downloader.py ================================================ import asyncio import re from pathlib import Path from typing import Union, Tuple import httpx from . import api from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 class DownloaderCctv(BaseDownloaderM3u8): pattern = re.compile(r'https?://(?:tv\.cctv\.com|tv\.cctv\.cn)/?[?/](?:pid=)?(\d+)(?:&vid=(\d+))?(?:&v=(\d+))?') def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, video_concurrency: Union[int, asyncio.Semaphore] = 3, # unique params hierarchy: bool = True, ): client = client or httpx.AsyncClient(**api.dft_client_settings) super(DownloaderCctv, self).__init__( client=client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency, video_concurrency=video_concurrency, ) self.hierarchy = hierarchy async def get_series(self, url: str, path=Path('.'), quality: int = 0): """ :cli: short: s :param url: :param path: :param quality: :return: """ pid, vide, vida = await api.get_id(self.client, url) if vida is None: # 单个视频 await self.get_video(pid, quality=quality) else: # 剧集 title, pids = await api.get_series_info(self.client, vide, vida) if self.hierarchy: path /= title path.mkdir(parents=True, exist_ok=True) await asyncio.gather(*[self.get_video(pid, path, quality) for pid in pids]) async def get_video(self, url_or_pid: str, path=Path('.'), quality: int = 0, time_range: Tuple[int, int] = None): """ :cli: short: v :param url_or_pid: :param path: :param quality: :param time_range: :return: """ if url_or_pid.startswith('http'): pid, _, _ = await api.get_id(self.client, url_or_pid) else: pid = url_or_pid title, m3u8_urls = await api.get_media_info(self.client, pid) m3u8_url = m3u8_urls[min(quality, len(m3u8_urls) - 1)] file_path = await self.get_m3u8_video(m3u8_url, path / f"{title}.mp4", time_range=time_range) return file_path ================================================ FILE: bilix/sites/douyin/__init__.py ================================================ from .downloader import DownloaderDouyin __all__ = ['DownloaderDouyin'] ================================================ FILE: bilix/sites/douyin/api.py ================================================ """ Originally From @Author: https://github.com/Evil0ctal/ https://github.com/Evil0ctal/Douyin_TikTok_Download_API Modified by @Author: https://github.com/HFrost0/ """ import asyncio import re import json from typing import List import httpx from pydantic import BaseModel from bilix.utils import legal_title from bilix.download.utils import req_retry, raise_api_error dft_client_settings = { 'headers': {'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012)' ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile' ' Safari/537.36 Edg/87.0.664.66'}, 'http2': True } class VideoInfo(BaseModel): title: str author_name: str wm_urls: List[str] nwm_urls: List[str] cover: str dynamic_cover: str origin_cover: str @raise_api_error async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: if short_url := re.findall(r'https://v.douyin.com/\w+/', url): res = await req_retry(client, short_url[0], follow_redirects=True) url = str(res.url) if key := re.search(r'/video/(\d+)', url): key = key.groups()[0] else: key = re.search(r"modal_id=(\d+)", url).groups()[0] res = await req_retry(client, f'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={key}') data = json.loads(res.text) data = data['item_list'][0] # 视频标题 title = legal_title(data['desc']) # 视频作者昵称 author_name = data['author']['nickname'] # 有水印视频链接 wm_urls = data['video']['play_addr']['url_list'] # 无水印视频链接 (在回执JSON中将关键字'playwm'替换为'play'即可获得无水印地址) nwm_urls = list(map(lambda x: x.replace('playwm', 'play'), wm_urls)) # 视频封面 cover = data['video']['cover']['url_list'][0] # 视频动态封面 dynamic_cover = data['video']['dynamic_cover']['url_list'][0] # 视频原始封面 origin_cover = data['video']['origin_cover']['url_list'][0] video_info = VideoInfo(title=title, author_name=author_name, wm_urls=wm_urls, nwm_urls=nwm_urls, cover=cover, dynamic_cover=dynamic_cover, origin_cover=origin_cover) return video_info if __name__ == '__main__': async def main(): client = httpx.AsyncClient(**dft_client_settings) data = await get_video_info(client, 'https://www.douyin.com/video/7132430286415252773') print(data) asyncio.run(main()) ================================================ FILE: bilix/sites/douyin/api_test.py ================================================ import httpx import pytest from bilix.sites.douyin import api client = httpx.AsyncClient(**api.dft_client_settings) @pytest.mark.asyncio async def test_get_video_info(): data = await api.get_video_info(client, "https://www.douyin.com/video/7132430286415252773") pass ================================================ FILE: bilix/sites/douyin/downloader.py ================================================ import asyncio import re from pathlib import Path from typing import Union import httpx from . import api from bilix.download.base_downloader_part import BaseDownloaderPart from bilix.utils import legal_title class DownloaderDouyin(BaseDownloaderPart): pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(douyin\.com)") def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int, None] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, ): client = client or httpx.AsyncClient(**api.dft_client_settings) super(DownloaderDouyin, self).__init__( client=client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency, ) async def get_video(self, url: str, path=Path('.'), image=False): """ :cli: short: v :param url: :param path: :param image: :return: """ video_info = await api.get_video_info(self.client, url) title = legal_title(video_info.author_name, video_info.title) cors = [self.get_file(video_info.nwm_urls, path=path / f"{title}.mp4")] if image: cors.append(self.get_static(video_info.cover, path / title)) await asyncio.gather(*cors) ================================================ FILE: bilix/sites/douyin/downloader_test.py ================================================ import pytest from bilix.sites.douyin import DownloaderDouyin @pytest.mark.asyncio async def test_get_video(): async with DownloaderDouyin() as d: await d.get_video('https://v.douyin.com/r4tm4Pe/') ================================================ FILE: bilix/sites/hanime1/__init__.py ================================================ from .downloader import DownloaderHanime1 __all__ = ['DownloaderHanime1'] ================================================ FILE: bilix/sites/hanime1/api.py ================================================ from pydantic import BaseModel import httpx from bilix.utils import legal_title from bilix.download.utils import req_retry, raise_api_error from bs4 import BeautifulSoup BASE_URL = "https://hanime1.me" dft_client_settings = { 'headers': {'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012)' ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile' ' Safari/537.36 Edg/87.0.664.66', "Referer": BASE_URL}, 'http2': False } class VideoInfo(BaseModel): url: str avid: str title: str video_url: str img_url: str @raise_api_error async def get_video_info(client: httpx.AsyncClient, url_or_avid: str) -> VideoInfo: if url_or_avid.startswith('http'): url = url_or_avid avid = url.split('=')[-1] else: url = f'{BASE_URL}/watch?v={url_or_avid}' avid = url_or_avid res = await req_retry(client, url) soup = BeautifulSoup(res.text, "html.parser") title = soup.find('meta', property="og:title")['content'] title = legal_title(title) img_url = soup.find('meta', property="og:image")['content'] video_url = soup.find('input', {'id': 'video-sd'})['value'] video_info = VideoInfo(url=url, avid=avid, title=title, img_url=img_url, video_url=video_url) return video_info ================================================ FILE: bilix/sites/hanime1/api_test.py ================================================ import httpx import pytest from bilix.sites.hanime1 import api client = httpx.AsyncClient(**api.dft_client_settings) @pytest.mark.asyncio async def test_get_video_info(): data = await api.get_video_info(client, "https://hanime1.me/watch?v=39123") assert data.title data = await api.get_video_info(client, "https://hanime1.me/watch?v=13658") assert data.title ================================================ FILE: bilix/sites/hanime1/downloader.py ================================================ import asyncio import re from pathlib import Path from typing import Union, Tuple import httpx from . import api from bilix.download.base_downloader_part import BaseDownloaderPart from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 class DownloaderHanime1(BaseDownloaderM3u8, BaseDownloaderPart): pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(hanime1\.me)") def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, video_concurrency: Union[int, asyncio.Semaphore] = 3, ): self.client = client or httpx.AsyncClient(**api.dft_client_settings) super().__init__( client=self.client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency, video_concurrency=video_concurrency, ) async def get_video(self, url: str, path=Path('.'), image=False, time_range: Tuple[int, int] = None): """ :cli: short: v :param url: :param path: :param image: :param time_range: :return: """ video_info = await api.get_video_info(self.client, url) video_url = video_info.video_url cors = [ self.get_m3u8_video( video_url, path=path / f'{video_info.title}.mp4', time_range=time_range) if '.m3u8' in video_url else self.get_file(video_url, path=path / f'{video_info.title}.mp4')] if image: cors.append(self.get_static(video_info.img_url, path=path / video_info.title)) await asyncio.gather(*cors) ================================================ FILE: bilix/sites/jable/__init__.py ================================================ from .downloader import DownloaderJable __all__ = ['DownloaderJable'] ================================================ FILE: bilix/sites/jable/api.py ================================================ import re from pydantic import BaseModel import httpx from bs4 import BeautifulSoup from bilix.utils import legal_title from bilix.download.utils import raise_api_error, req_retry BASE_URL = "https://jable.tv" dft_client_settings = { 'headers': {'user-agent': 'PostmanRuntime/7.29.0', "Referer": BASE_URL}, 'http2': False } class VideoInfo(BaseModel): url: str avid: str title: str actor_name: str m3u8_url: str img_url: str @raise_api_error async def get_actor_info(client: httpx.AsyncClient, url: str): res = await req_retry(client, url) soup = BeautifulSoup(res.text, "html.parser") actor_name = soup.find('h2', class_='h3-md mb-1').text urls = [h6.a['href'] for h6 in soup.find('section', class_='pb-3 pb-e-lg-40').find_all('h6')] return {'actor_name': actor_name, 'urls': urls} @raise_api_error async def get_video_info(client: httpx.AsyncClient, url_or_avid: str) -> VideoInfo: if url_or_avid.startswith('http'): url = url_or_avid avid = url.split('/')[-2] else: url = f'{BASE_URL}/videos/{url_or_avid}/' avid = url_or_avid avid = avid.upper() res = await req_retry(client, url) # proxies default global in httpx soup = BeautifulSoup(res.text, "html.parser") title = soup.find('meta', property="og:title")['content'] title = legal_title(title) if span := soup.find("span", class_="placeholder rounded-circle"): actor_name = span['title'] else: # https://github.com/HFrost0/bilix/issues/45 for some video actor name in different place actor_name = soup.find("img", class_="avatar rounded-circle")['title'] img_url = soup.find('meta', property="og:image")['content'] m3u8_url = re.findall(r'http.*m3u8', res.text)[0] video_info = VideoInfo(url=url, avid=avid, title=title, img_url=img_url, m3u8_url=m3u8_url, actor_name=actor_name) return video_info ================================================ FILE: bilix/sites/jable/api_test.py ================================================ import httpx import pytest from bilix.sites.jable import api client = httpx.AsyncClient(**api.dft_client_settings) @pytest.mark.asyncio async def test_get_video_info(): data = await api.get_video_info(client, "https://jable.tv/videos/ssis-533/") assert data.actor_name data = await api.get_video_info(client, "https://jable.tv/videos/ssis-448/") assert data.actor_name @pytest.mark.asyncio async def test_get_actor_info(): data = await api.get_actor_info(client, 'https://jable.tv/models/393ec3548aecc34004d54e03becd2ea9/') assert data['actor_name'].encode('utf8') == b'\xe4\xbd\x90\xe4\xb9\x85\xe8\x89\xaf\xe5\x92\xb2\xe5\xb8\x8c' assert data['urls'] ================================================ FILE: bilix/sites/jable/downloader.py ================================================ import asyncio import re from pathlib import Path from typing import Union, Tuple import httpx from . import api from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 class DownloaderJable(BaseDownloaderM3u8): pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(jable\.tv)") def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, video_concurrency: Union[int, asyncio.Semaphore] = 3, # unique params hierarchy: bool = True, ): client = client or httpx.AsyncClient(**api.dft_client_settings) super(DownloaderJable, self).__init__( client=client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency, video_concurrency=video_concurrency, ) self.hierarchy = hierarchy async def get_actor(self, url: str, path=Path("."), image=True): """ download videos of a actor :cli: short: a :param url: actor page url :param path: save path :param image: download cover :return: """ data = await api.get_actor_info(self.client, url) if self.hierarchy: path /= data['actor_name'] path.mkdir(parents=True, exist_ok=True) await asyncio.gather(*[self.get_video(url, path, image) for url in data['urls']]) async def get_video(self, url: str, path=Path("."), image=True, time_range: Tuple[int, int] = None): """ :cli: short: v :param url: :param path: :param image: :param time_range: :return: """ video_info = await api.get_video_info(self.client, url) if self.hierarchy: path /= f"{video_info.avid} {video_info.actor_name}" path.mkdir(parents=True, exist_ok=True) cors = [self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f"{video_info.title}.mp4", time_range=time_range)] if image: cors.append(self.get_static(video_info.img_url, path=path / video_info.title, )) await asyncio.gather(*cors) ================================================ FILE: bilix/sites/tiktok/__init__.py ================================================ from .downloader import DownloaderTiktok __all__ = ['DownloaderTiktok'] ================================================ FILE: bilix/sites/tiktok/api.py ================================================ """ Originally From @Author: https://github.com/Evil0ctal/ https://github.com/Evil0ctal/Douyin_TikTok_Download_API """ import re import json import random from typing import List import httpx from pydantic import BaseModel from bilix.utils import legal_title from bilix.download.utils import req_retry, raise_api_error dft_client_settings = { 'headers': {'user-agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;' '+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+' '(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'}, 'http2': True } class VideoInfo(BaseModel): title: str author_name: str wm_urls: List[str] nwm_urls: List[str] cover: str dynamic_cover: str origin_cover: str @raise_api_error async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: if short_url := re.findall(r'https://www.tiktok.com/t/\w+/', url): res = await req_retry(client, short_url[0], follow_redirects=True) url = str(res.url) if key := re.search(r'/video/(\d+)', url): key = key.groups()[0] else: key = re.search(r"/v/(\d+)", url).groups()[0] params = {'aweme_id': key, 'aid': 1180, 'iid': 6165993682518218889, 'device_id': random.randint(10 * 10 * 10, 9 * 10 ** 10)} res = await req_retry(client, 'https://api16-normal-c-useast1a.tiktokv.com/aweme/v1/feed/', params=params) data = json.loads(res.text) data = data['aweme_list'][0] # 视频标题 (如果为空则使用分享标题) title = legal_title(data['desc'] if data['desc'] != '' else data['share_info']['share_title']) # 视频作者昵称 author_name = data['author']['nickname'] # 有水印视频链接 wm_urls = data['video']['download_addr']['url_list'] # 无水印视频链接 nwm_urls = data['video']['bit_rate'][0]['play_addr']['url_list'] # 视频封面 cover = data['video']['cover']['url_list'][0] # 视频动态封面 dynamic_cover = data['video']['dynamic_cover']['url_list'][0] # 视频原始封面 origin_cover = data['video']['origin_cover']['url_list'][0] video_info = VideoInfo(title=title, author_name=author_name, wm_urls=wm_urls, nwm_urls=nwm_urls, cover=cover, dynamic_cover=dynamic_cover, origin_cover=origin_cover) return video_info ================================================ FILE: bilix/sites/tiktok/api_test.py ================================================ import httpx import pytest from bilix.sites.tiktok import api client = httpx.AsyncClient(**api.dft_client_settings) @pytest.mark.asyncio async def test_get_video_info(): data = await api.get_video_info(client, "https://www.tiktok.com/@lindaselection/video/7171715528124271877") assert data.nwm_urls ================================================ FILE: bilix/sites/tiktok/downloader.py ================================================ import asyncio import re from pathlib import Path from typing import Union import httpx from . import api from bilix.download.base_downloader_part import BaseDownloaderPart from bilix.utils import legal_title class DownloaderTiktok(BaseDownloaderPart): pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(titok\.com)") def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int, None] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, ): client = client or httpx.AsyncClient(**api.dft_client_settings) super(DownloaderTiktok, self).__init__( client=client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency, ) async def get_video(self, url: str, path=Path('.'), image=False): """ :cli: short: v :param url: :param path: :param image: :return: """ video_info = await api.get_video_info(self.client, url) title = legal_title(video_info.author_name, video_info.title) # since TikTok backup not fast enough some time, use the first one cors = [self.get_file(video_info.nwm_urls[0], path / f'{title}.mp4')] if image: cors.append(self.get_static(video_info.cover, path=path / title, )) await asyncio.gather(*cors) ================================================ FILE: bilix/sites/tiktok/downloader_test.py ================================================ import pytest from bilix.sites.tiktok import DownloaderTiktok @pytest.mark.asyncio async def test_get_video(): async with DownloaderTiktok() as d: await d.get_video('https://www.tiktok.com/@evil0ctal/video/7168978761973550378') ================================================ FILE: bilix/sites/yhdmp/__init__.py ================================================ from .downloader import DownloaderYhdmp __all__ = ['DownloaderYhdmp'] ================================================ FILE: bilix/sites/yhdmp/api.py ================================================ import asyncio import json import random import re from pathlib import Path from pydantic import BaseModel from typing import Union, List import httpx import execjs from bs4 import BeautifulSoup from bilix.utils import legal_title from bilix.download.utils import req_retry as rr, raise_api_error BASE_URL = "https://www.yhdmp.cc" dft_client_settings = { 'headers': {'user-agent': 'PostmanRuntime/7.29.0', "Referer": BASE_URL}, 'http2': False } _js = None def _get_js(): global _js if _js is None: with open(Path(__file__).parent / 'yhdmp.js', 'r') as f: _js = execjs.compile(f.read()) return _js def _get_t2_k2(t1: str, k1: str) -> dict: new_cookies = _get_js().call("get_t2_k2", t1, k1) return new_cookies def _decode(data: str) -> str: return _get_js().call('__getplay_rev_data', data) async def req_retry(client: httpx.AsyncClient, url_or_urls: Union[str, List[str]], method: str = 'GET', follow_redirects: bool = False, **kwargs): if 't1' in client.cookies and 'k1' in client.cookies: new_cookies = _get_t2_k2(client.cookies['t1'], client.cookies['k1']) if 't2' in client.cookies: client.cookies.delete('t2') if 'k2' in client.cookies: client.cookies.delete('k2') client.cookies.update(new_cookies) res = await rr(client, url_or_urls, method, follow_redirects, **kwargs) return res class VideoInfo(BaseModel): aid: Union[str, int] play_idx: int ep_idx: int title: str sub_title: str play_info: List[Union[List[str], List]] # may be empty m3u8_url: str @raise_api_error async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: aid, play_idx, ep_idx = url.split('/')[-1].split('.')[0].split('-') play_idx, ep_idx = int(play_idx), int(ep_idx) # request res_web = req_retry(client, url) m3u8_url = get_m3u8_url(url=url, client=client) if 't1' in client.cookies and 'k1' in client.cookies: res_web, m3u8_url = await asyncio.gather(res_web, m3u8_url) else: res_web, m3u8_url = await res_web, await m3u8_url # extract title, sub_title = map(legal_title, re.search(r'target="_self">([^<]+):([^<]+)', res_web.text).groups()) soup = BeautifulSoup(res_web.text, 'html.parser') divs = soup.find_all('div', class_="movurl") play_info = [] for div in divs: play_info.append([[legal_title(a["title"]), f"{BASE_URL}/{a['href']}"] for a in div.find_all("a")]) video_info = VideoInfo(aid=aid, play_idx=play_idx, ep_idx=ep_idx, title=title, sub_title=sub_title, play_info=play_info, m3u8_url=m3u8_url) return video_info @raise_api_error async def get_m3u8_url(client: httpx.AsyncClient, url): aid, play_idx, ep_idx = url.split('/')[-1].split('.')[0].split('-') params = {"aid": aid, "playindex": play_idx, "epindex": ep_idx, "r": random.random()} res_play = await req_retry(client, f"{BASE_URL}/_getplay", params=params) if res_play.text.startswith("err"): # maybe first time res_play = await req_retry(client, f"{BASE_URL}/_getplay", params=params) data = json.loads(res_play.text) purl, vurl = _decode(data['purl']), _decode(data['vurl']) m3u8_url = purl.split("url=")[-1] + vurl return m3u8_url ================================================ FILE: bilix/sites/yhdmp/api_test.py ================================================ import httpx import pytest from bilix.sites.yhdmp import api client = httpx.AsyncClient(**api.dft_client_settings) @pytest.mark.asyncio async def test_get_video_info(): data = await api.get_video_info(client, "https://www.yhdmp.cc/vp/22224-1-0.html") data = await api.get_m3u8_url(client, "https://www.yhdmp.cc/vp/22224-1-0.html") pass ================================================ FILE: bilix/sites/yhdmp/downloader.py ================================================ import asyncio from pathlib import Path import httpx from typing import Sequence, Union, Tuple from . import api from bilix.utils import legal_title, cors_slice from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 class DownloaderYhdmp(BaseDownloaderM3u8): def __init__( self, *, api_client: httpx.AsyncClient = None, stream_client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, video_concurrency: Union[int, asyncio.Semaphore] = 3, hierarchy: bool = True, ): stream_client = stream_client or httpx.AsyncClient() super(DownloaderYhdmp, self).__init__( client=stream_client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency, video_concurrency=video_concurrency, ) self.api_client = api_client or httpx.AsyncClient(**api.dft_client_settings) self.hierarchy = hierarchy async def get_series(self, url: str, path=Path('.'), p_range: Sequence[int] = None): """ :cli: short: s :param url: :param path: :param p_range: :return: """ video_info = await api.get_video_info(self.api_client, url) ep_idx = video_info.ep_idx play_idx = video_info.play_idx title = video_info.title if self.hierarchy: path = path / title path.mkdir(parents=True, exist_ok=True) # no need to reuse get_video since we only need m3u8_url async def get_video(page_url, name): m3u8_url = await api.get_m3u8_url(self.api_client, page_url) await self.get_m3u8_video(m3u8_url=m3u8_url, path=path / name) cors = [] for idx, (sub_title, url) in enumerate(video_info.play_info[play_idx]): if ep_idx == idx: cors.append(self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f'{legal_title(title, sub_title)}.mp4')) else: cors.append(get_video(url, legal_title(title, sub_title))) if p_range: cors = cors_slice(cors, p_range) await asyncio.gather(*cors) async def get_video(self, url: str, path=Path('.'), time_range=None): """ :cli: short: v :param url: :param path: :param time_range: :return: """ video_info = await api.get_video_info(self.api_client, url) name = legal_title(video_info.title, video_info.sub_title) await self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f'{name}.mp4', time_range=time_range) @classmethod def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict) -> bool: return 'yhdmp' in keys[0] ================================================ FILE: bilix/sites/yhdmp/yhdmp.js ================================================ function __getplay_rev_data(_in_data) { if (_in_data.indexOf('{') < 0) { ;var encode_version = 'jsjiami.com.v5', unthu = '__0xb5aef', __0xb5aef = ['wohHHQdR', 'dyXDlMOIw5M=', 'dA9wwoRS', 'U8K2w7FvETZ9csKtEFTCjQ==', 'wo7ChVE=', 'VRrDhMOnw6I=', 'wr5LwoQkKBbDkcKwwqk=']; (function (_0x22b97e, _0x2474ca) { var _0x5b074e = function (_0x5864d0) { while (--_0x5864d0) { _0x22b97e['push'](_0x22b97e['shift']()); } }; _0x5b074e(++_0x2474ca); }(__0xb5aef, 0x1ae)); var _0x2c0f = function (_0x19a33a, _0x9a1ebf) { _0x19a33a = _0x19a33a - 0x0; var _0x40a3ce = __0xb5aef[_0x19a33a]; if (_0x2c0f['initialized'] === undefined) { (function () { var _0x4d044c = typeof window !== 'undefined' ? window : typeof process === 'object' && typeof require === 'function' && typeof global === 'object' ? global : this; var _0x1268d6 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='; _0x4d044c['atob'] || (_0x4d044c['atob'] = function (_0x2993de) { var _0x467e1d = String(_0x2993de)['replace'](/=+$/, ''); for (var _0x22a01d = 0x0, _0x1ee2a1, _0x2cf5ea, _0x3a84f7 = 0x0, _0x5c0e64 = ''; _0x2cf5ea = _0x467e1d['charAt'](_0x3a84f7++); ~_0x2cf5ea && (_0x1ee2a1 = _0x22a01d % 0x4 ? _0x1ee2a1 * 0x40 + _0x2cf5ea : _0x2cf5ea, _0x22a01d++ % 0x4) ? _0x5c0e64 += String['fromCharCode'](0xff & _0x1ee2a1 >> (-0x2 * _0x22a01d & 0x6)) : 0x0) { _0x2cf5ea = _0x1268d6['indexOf'](_0x2cf5ea); } return _0x5c0e64; }); }()); var _0x3c81da = function (_0x457f21, _0x6cb980) { var _0x133a9b = [], _0x749ec5 = 0x0, _0x3ceeee, _0x1df5a4 = '', _0x35a2a6 = ''; _0x457f21 = atob(_0x457f21); for (var _0x9a0e47 = 0x0, _0x4a71aa = _0x457f21['length']; _0x9a0e47 < _0x4a71aa; _0x9a0e47++) { _0x35a2a6 += '%' + ('00' + _0x457f21['charCodeAt'](_0x9a0e47)['toString'](0x10))['slice'](-0x2); } _0x457f21 = decodeURIComponent(_0x35a2a6); for (var _0x2ef02e = 0x0; _0x2ef02e < 0x100; _0x2ef02e++) { _0x133a9b[_0x2ef02e] = _0x2ef02e; } for (_0x2ef02e = 0x0; _0x2ef02e < 0x100; _0x2ef02e++) { _0x749ec5 = (_0x749ec5 + _0x133a9b[_0x2ef02e] + _0x6cb980['charCodeAt'](_0x2ef02e % _0x6cb980['length'])) % 0x100; _0x3ceeee = _0x133a9b[_0x2ef02e]; _0x133a9b[_0x2ef02e] = _0x133a9b[_0x749ec5]; _0x133a9b[_0x749ec5] = _0x3ceeee; } _0x2ef02e = 0x0; _0x749ec5 = 0x0; for (var _0xa5d5ef = 0x0; _0xa5d5ef < _0x457f21['length']; _0xa5d5ef++) { _0x2ef02e = (_0x2ef02e + 0x1) % 0x100; _0x749ec5 = (_0x749ec5 + _0x133a9b[_0x2ef02e]) % 0x100; _0x3ceeee = _0x133a9b[_0x2ef02e]; _0x133a9b[_0x2ef02e] = _0x133a9b[_0x749ec5]; _0x133a9b[_0x749ec5] = _0x3ceeee; _0x1df5a4 += String['fromCharCode'](_0x457f21['charCodeAt'](_0xa5d5ef) ^ _0x133a9b[(_0x133a9b[_0x2ef02e] + _0x133a9b[_0x749ec5]) % 0x100]); } return _0x1df5a4; }; _0x2c0f['rc4'] = _0x3c81da; _0x2c0f['data'] = {}; _0x2c0f['initialized'] = !![]; } var _0x4222af = _0x2c0f['data'][_0x19a33a]; if (_0x4222af === undefined) { if (_0x2c0f['once'] === undefined) { _0x2c0f['once'] = !![]; } _0x40a3ce = _0x2c0f['rc4'](_0x40a3ce, _0x9a1ebf); _0x2c0f['data'][_0x19a33a] = _0x40a3ce; } else { _0x40a3ce = _0x4222af; } return _0x40a3ce; }; var panurl = _in_data; var hf_panurl = ''; const keyMP = 0x100000; const panurl_len = panurl['length']; for (var i = 0x0; i < panurl_len; i += 0x2) { var mn = parseInt(panurl[i] + panurl[i + 0x1], 0x10); mn = (mn + keyMP - (panurl_len / 0x2 - 0x1 - i / 0x2)) % 0x100; hf_panurl = String[_0x2c0f('0x0', '1JYE')](mn) + hf_panurl; } _in_data = hf_panurl; ;(function (_0x5be96b, _0x58d96a, _0x2d2c35) { var _0x13ecbc = { 'luTaD': function _0x478551(_0x58d2f3, _0x3c17c5) { return _0x58d2f3 !== _0x3c17c5; }, 'dkPfD': function _0x52a07f(_0x5999d5, _0x5de375) { return _0x5999d5 === _0x5de375; }, 'NJDNu': function _0x386503(_0x39f385, _0x251b7b) { return _0x39f385 + _0x251b7b; }, 'mNqKE': '版本号,js会定期弹窗,还请支持我们的工作', 'GllzR': '删除版本号,js会定期弹窗' }; _0x2d2c35 = 'al'; try { _0x2d2c35 += _0x2c0f('0x1', 's^Zc'); _0x58d96a = encode_version; if (!(_0x13ecbc[_0x2c0f('0x2', '(fbB')](typeof _0x58d96a, _0x2c0f('0x3', '*OI!')) && _0x13ecbc[_0x2c0f('0x4', '8iw%')](_0x58d96a, 'jsjiami.com.v5'))) { _0x5be96b[_0x2d2c35](_0x13ecbc[_0x2c0f('0x5', '(fbB')]('删除', _0x13ecbc['mNqKE'])); } } catch (_0x57623d) { _0x5be96b[_0x2d2c35](_0x13ecbc[_0x2c0f('0x6', '126j')]); } }("undefined")); ;encode_version = 'jsjiami.com.v5'; } return decodeURIComponent(_in_data); } function __getplay_pck() { ;var encode_version = 'sojson.v5', yqpcz = '__0x6d4a1', __0x6d4a1 = ['wq4mw7/CmF4=', 'w6XDrMOmwprCgg==', 'eRfDo8OoZQ==', 'IUnCmSzDgyfDjw==', 'S0pEJ8KxUMOSwqlq', 'asOow5tBwqk=', '5Lqc6ICk5Yi16ZuCw7A4wqEAwqHCisKHwr0/', 'TjpSwqZ3WMOmG8Oz', 'MhvDm8OOwqk=', 'XsKOwrAgwrFzwoU=', 'UyHCmcOyREsv', 'N2DDnXUC', 'BcOIwowrdgc=', 'GcOwNxbDqg==', 'JcKMw4ZORw==', 'Jm/ChVfDhw==', 'w7U3w4PCksKm', 'w7jDnHDCpcOF', 'wrgOw5PDlcO7', 'w4HDkMODYcK/D8O0PMKjShFZcw==', 'F8KFT8Ktwp3Ckw/CqXI=', 'M8O0dUFY', 'e1zDtMOGZg==', 'w6LChsKLCBo=', 'EMKJXSbDjQ==', 'T8KPWMK2wp3ChA==', 'wpRjw5BEZQ==', 'JHsWwq3DoQ==', 'HsKKUAvDqw==', 'wopnw5BzZA3DgQ==', 'wqAkw5PCpmw=', 'w68MBSvDow==', 'MljDsVQq', 'FMKIw6xETQ==']; (function (_0x3aee46, _0x59ba69) { var _0x3ea520 = function (_0x1dd9c6) { while (--_0x1dd9c6) { _0x3aee46['push'](_0x3aee46['shift']()); } }; _0x3ea520(++_0x59ba69); }(__0x6d4a1, 0x15b)); var _0x15f5 = function (_0x36bc78, _0xbd2420) { _0x36bc78 = _0x36bc78 - 0x0; var _0xfd0a5f = __0x6d4a1[_0x36bc78]; if (_0x15f5['initialized'] === undefined) { (function () { var _0x4b7bb1 = typeof window !== 'undefined' ? window : typeof process === 'object' && typeof require === 'function' && typeof global === 'object' ? global : this; var _0x531bb8 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='; _0x4b7bb1['atob'] || (_0x4b7bb1['atob'] = function (_0x1870ad) { var _0x576c80 = String(_0x1870ad)['replace'](/=+$/, ''); for (var _0x44d56e = 0x0, _0x1a3ebb, _0x42d2dc, _0x1cf4b1 = 0x0, _0x2af9b7 = ''; _0x42d2dc = _0x576c80['charAt'](_0x1cf4b1++); ~_0x42d2dc && (_0x1a3ebb = _0x44d56e % 0x4 ? _0x1a3ebb * 0x40 + _0x42d2dc : _0x42d2dc, _0x44d56e++ % 0x4) ? _0x2af9b7 += String['fromCharCode'](0xff & _0x1a3ebb >> (-0x2 * _0x44d56e & 0x6)) : 0x0) { _0x42d2dc = _0x531bb8['indexOf'](_0x42d2dc); } return _0x2af9b7; }); }()); var _0x1897b8 = function (_0x3c0b9b, _0x2579f3) { var _0x5a0327 = [], _0x330679 = 0x0, _0x12b19f, _0x3ebfbf = '', _0x20630f = ''; _0x3c0b9b = atob(_0x3c0b9b); for (var _0x514228 = 0x0, _0x4f7f74 = _0x3c0b9b['length']; _0x514228 < _0x4f7f74; _0x514228++) { _0x20630f += '%' + ('00' + _0x3c0b9b['charCodeAt'](_0x514228)['toString'](0x10))['slice'](-0x2); } _0x3c0b9b = decodeURIComponent(_0x20630f); for (var _0x53cc80 = 0x0; _0x53cc80 < 0x100; _0x53cc80++) { _0x5a0327[_0x53cc80] = _0x53cc80; } for (_0x53cc80 = 0x0; _0x53cc80 < 0x100; _0x53cc80++) { _0x330679 = (_0x330679 + _0x5a0327[_0x53cc80] + _0x2579f3['charCodeAt'](_0x53cc80 % _0x2579f3['length'])) % 0x100; _0x12b19f = _0x5a0327[_0x53cc80]; _0x5a0327[_0x53cc80] = _0x5a0327[_0x330679]; _0x5a0327[_0x330679] = _0x12b19f; } _0x53cc80 = 0x0; _0x330679 = 0x0; for (var _0x25c772 = 0x0; _0x25c772 < _0x3c0b9b['length']; _0x25c772++) { _0x53cc80 = (_0x53cc80 + 0x1) % 0x100; _0x330679 = (_0x330679 + _0x5a0327[_0x53cc80]) % 0x100; _0x12b19f = _0x5a0327[_0x53cc80]; _0x5a0327[_0x53cc80] = _0x5a0327[_0x330679]; _0x5a0327[_0x330679] = _0x12b19f; _0x3ebfbf += String['fromCharCode'](_0x3c0b9b['charCodeAt'](_0x25c772) ^ _0x5a0327[(_0x5a0327[_0x53cc80] + _0x5a0327[_0x330679]) % 0x100]); } return _0x3ebfbf; }; _0x15f5['rc4'] = _0x1897b8; _0x15f5['data'] = {}; _0x15f5['initialized'] = !![]; } var _0x597ef6 = _0x15f5['data'][_0x36bc78]; if (_0x597ef6 === undefined) { if (_0x15f5['once'] === undefined) { _0x15f5['once'] = !![]; } _0xfd0a5f = _0x15f5['rc4'](_0xfd0a5f, _0xbd2420); _0x15f5['data'][_0x36bc78] = _0xfd0a5f; } else { _0xfd0a5f = _0x597ef6; } return _0xfd0a5f; }; if (!![]) { var _0x36d031 = _0x15f5('0x0', 'CuZW')[_0x15f5('0x1', '^Ou5')]('|'), _0x5a77e0 = 0x0; while (!![]) { switch (_0x36d031[_0x5a77e0++]) { case'0': f2 = function (_0x369589, _0x22305e) { var _0x3df411 = { 'DUWem': function _0x172fb9(_0x5ec61c, _0x564208) { return _0x5ec61c + _0x564208; }, 'chgqL': function _0xdabcda(_0x221552, _0x9f16bb) { return _0x221552 * _0x9f16bb; }, 'ueYPD': function _0x42de89(_0x168663, _0x45775b) { return _0x168663 + _0x45775b; }, 'FyVON': function _0x132543(_0x14cf95, _0x5f0613) { return _0x14cf95 + _0x5f0613; }, 'rImkg': function _0x3ee8de(_0x50917a, _0x5aa05b) { return _0x50917a + _0x5aa05b; }, 'EhXgt': ';expires=', 'eglgt': _0x15f5('0x2', 'y4Vs') }; var _0x355c8f = 0x1e; var _0x36f590 = new Date(); _0x36f590['setTime'](_0x3df411['DUWem'](_0x36f590[_0x15f5('0x3', 'wmgi')](), _0x3df411[_0x15f5('0x4', 'Put*')](_0x3df411['chgqL'](_0x3df411['chgqL'](_0x355c8f, 0x18), 0x3c) * 0x3c, 0x3e8))); var cookie = _0x3df411['DUWem'](_0x3df411[_0x15f5('0x6', 'PIK)')](_0x3df411['FyVON'](_0x3df411['rImkg'](_0x3df411[_0x15f5('0x7', 'MDzc')](_0x369589, '='), escape(_0x22305e)), _0x3df411[_0x15f5('0x8', 'bDPL')]), _0x36f590['toGMTString']()), _0x3df411[_0x15f5('0x9', 'Doro')]) updateDoc(cookie) }; continue; case'1': t1 = Math[_0x15f5('0xa', 'Q5gT')](Number(f('t1')) / 0x3e8) >> 0x5; continue; case'2': f = function (_0x30755b) { var _0x2061a3 = { 'JwcjB': function _0x4d63cc(_0x53138c, _0x57679f) { return _0x53138c + _0x57679f; }, 'zWwUP': _0x15f5('0xb', 'Doro'), 'zMNwJ': _0x15f5('0xc', 'mu(g'), 'QLLCz': function _0xcf9e5b(_0x22b423, _0x4bb2df) { return _0x22b423(_0x4bb2df); }, 'tNCZl': 'BSp', 'fPKPd': function _0x1e8a5f(_0x1b5aa9, _0x4db818) { return _0x1b5aa9 + _0x4db818; }, 'BbKyG': function _0x1758f2(_0x471863, _0x128f5e) { return _0x471863 * _0x128f5e; }, 'xIvIx': function _0x25258e(_0xf7b32b, _0x717bc1) { return _0xf7b32b * _0x717bc1; }, 'CMGam': function _0x5cb526(_0x32dc57, _0x589dad) { return _0x32dc57 + _0x589dad; }, 'hRgnV': function _0x30a4e5(_0x401fb4, _0x49024c) { return _0x401fb4 + _0x49024c; }, 'QNctg': _0x15f5('0xd', 'KvKZ') }; var _0x583897, _0x3a66ce = new RegExp(_0x2061a3[_0x15f5('0xe', 'Ox#l')](_0x2061a3[_0x15f5('0xf', 'v78#')](_0x2061a3[_0x15f5('0x10', '7jQL')], _0x30755b), _0x2061a3[_0x15f5('0x11', '6O7p')])); if (_0x583897 = document[_0x15f5('0x12', 'KvKZ')][_0x15f5('0x13', 'Z@&Q')](_0x3a66ce)) { return _0x2061a3[_0x15f5('0x14', 'g#CQ')](unescape, _0x583897[0x2]); } else { if (_0x2061a3['tNCZl'] !== _0x2061a3[_0x15f5('0x15', '6O7p')]) { var _0x2856c4 = 0x1e; var _0x412bd3 = new Date(); _0x412bd3[_0x15f5('0x16', 'Z@&Q')](_0x2061a3[_0x15f5('0x17', '0USv')](_0x412bd3['getTime'](), _0x2061a3['BbKyG'](_0x2061a3[_0x15f5('0x18', 'x]l]')](_0x2856c4, 0x18) * 0x3c * 0x3c, 0x3e8))); var key = _0x2061a3[_0x15f5('0x19', 'Put*')](_0x2061a3['fPKPd'](_0x2061a3[_0x15f5('0x1a', 'MDzc')](_0x2061a3[_0x15f5('0x1b', '0USv')](_0x30755b + '=', _0x2061a3[_0x15f5('0x1c', 'd$Fs')](escape, value)), _0x2061a3[_0x15f5('0x1d', 's1ve')]), _0x412bd3['toGMTString']()), ';path=/') updateDoc(key) } else { return null; } } }; continue; case'3': f2('t2', new Date()[_0x15f5('0x1e', '9k4F')]()); continue; case'4': f2('k2', (t1 * (t1 % 0x1000) + 0x99d6) * (t1 % 0x1000) + t1); continue; } break; } } ; if (!(typeof encode_version !== 'undefined' && encode_version === _0x15f5('0x1f', 'wZ(I'))) { window[_0x15f5('0x20', 'KbZ5')](_0x15f5('0x21', 'YAu4')); } ;encode_version = 'sojson.v5'; } function __getplay_pck2() { ;var encode_version = 'sojson.v5', woaew = '__0x6d4a2', __0x6d4a2 = ['w4TCkxtLwofCuBE=', 'YsKYwok/w5M=', 'DWwZJDPDksOi', 'wocjwrkSXQ==', 'XG5tw6Y2', 'OMOpSErDhw==', 'AA7DksO/w4gM', 'w5prw6vCrFI=', 'w7U3L8K1bQ==', 'Z8K5wrJIwrE=', 'L8OKZcKaGcOoTcOUwqIFYw==', 'YCPDs1bDrQPDvg==', 'dcOrVsOlwoA=', 'OcORb2nDtg==', 'FcKQdxtY', 'dsKSQz8V', 'McKZVzd2Xg==', 'VyEpUy4=', 'ASUlQC97HGdz', 'wqzDryzCjMKSWAE=']; (function (_0x57c88f, _0x2383d8) { var _0x4b2391 = function (_0x58c926) { while (--_0x58c926) { _0x57c88f['push'](_0x57c88f['shift']()); } }; _0x4b2391(++_0x2383d8); }(__0x6d4a2, 0xad)); var _0x1691 = function (_0x3c08d1, _0xc096f) { _0x3c08d1 = _0x3c08d1 - 0x0; var _0x2babb8 = __0x6d4a2[_0x3c08d1]; if (_0x1691['initialized'] === undefined) { (function () { var _0x2f1e69 = typeof window !== 'undefined' ? window : typeof process === 'object' && typeof require === 'function' && typeof global === 'object' ? global : this; var _0x4f603c = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='; _0x2f1e69['atob'] || (_0x2f1e69['atob'] = function (_0x2c68bb) { var _0x492998 = String(_0x2c68bb)['replace'](/=+$/, ''); for (var _0x5ee61a = 0x0, _0x2ac634, _0x1d1013, _0x6f4d80 = 0x0, _0x4a006d = ''; _0x1d1013 = _0x492998['charAt'](_0x6f4d80++); ~_0x1d1013 && (_0x2ac634 = _0x5ee61a % 0x4 ? _0x2ac634 * 0x40 + _0x1d1013 : _0x1d1013, _0x5ee61a++ % 0x4) ? _0x4a006d += String['fromCharCode'](0xff & _0x2ac634 >> (-0x2 * _0x5ee61a & 0x6)) : 0x0) { _0x1d1013 = _0x4f603c['indexOf'](_0x1d1013); } return _0x4a006d; }); }()); var _0xa0b1f0 = function (_0x2fa32b, _0x4608dc) { var _0x4f2019 = [], _0x4a28e8 = 0x0, _0x19767d, _0x4cf800 = '', _0x4bb512 = ''; _0x2fa32b = atob(_0x2fa32b); for (var _0x36c759 = 0x0, _0x20d6ad = _0x2fa32b['length']; _0x36c759 < _0x20d6ad; _0x36c759++) { _0x4bb512 += '%' + ('00' + _0x2fa32b['charCodeAt'](_0x36c759)['toString'](0x10))['slice'](-0x2); } _0x2fa32b = decodeURIComponent(_0x4bb512); for (var _0x3ac32b = 0x0; _0x3ac32b < 0x100; _0x3ac32b++) { _0x4f2019[_0x3ac32b] = _0x3ac32b; } for (_0x3ac32b = 0x0; _0x3ac32b < 0x100; _0x3ac32b++) { _0x4a28e8 = (_0x4a28e8 + _0x4f2019[_0x3ac32b] + _0x4608dc['charCodeAt'](_0x3ac32b % _0x4608dc['length'])) % 0x100; _0x19767d = _0x4f2019[_0x3ac32b]; _0x4f2019[_0x3ac32b] = _0x4f2019[_0x4a28e8]; _0x4f2019[_0x4a28e8] = _0x19767d; } _0x3ac32b = 0x0; _0x4a28e8 = 0x0; for (var _0x3b73f2 = 0x0; _0x3b73f2 < _0x2fa32b['length']; _0x3b73f2++) { _0x3ac32b = (_0x3ac32b + 0x1) % 0x100; _0x4a28e8 = (_0x4a28e8 + _0x4f2019[_0x3ac32b]) % 0x100; _0x19767d = _0x4f2019[_0x3ac32b]; _0x4f2019[_0x3ac32b] = _0x4f2019[_0x4a28e8]; _0x4f2019[_0x4a28e8] = _0x19767d; _0x4cf800 += String['fromCharCode'](_0x2fa32b['charCodeAt'](_0x3b73f2) ^ _0x4f2019[(_0x4f2019[_0x3ac32b] + _0x4f2019[_0x4a28e8]) % 0x100]); } return _0x4cf800; }; _0x1691['rc4'] = _0xa0b1f0; _0x1691['data'] = {}; _0x1691['initialized'] = !![]; } var _0x4cce77 = _0x1691['data'][_0x3c08d1]; if (_0x4cce77 === undefined) { if (_0x1691['once'] === undefined) { _0x1691['once'] = !![]; } _0x2babb8 = _0x1691['rc4'](_0x2babb8, _0xc096f); _0x1691['data'][_0x3c08d1] = _0x2babb8; } else { _0x2babb8 = _0x4cce77; } return _0x2babb8; }; if (!![]) { f = function (_0x1d75de) { var _0x37083b = { 'QPnEZ': function _0x60d408(_0x47b907, _0x1e139b) { return _0x47b907 + _0x1e139b; }, 'GfOGG': function _0x3d3c72(_0x1f55be, _0x4a6029) { return _0x1f55be + _0x4a6029; }, 'HMzQD': '=([^;]*)(;|$)' }; var _0x4d0811, _0x524d79 = new RegExp(_0x37083b[_0x1691('0x0', 'H$R$')](_0x37083b[_0x1691('0x1', '@5Y)')]('(^|\x20)', _0x1d75de), _0x37083b[_0x1691('0x2', '&6Xe')])); if (_0x4d0811 = document[_0x1691('0x3', '@5Y)')][_0x1691('0x4', 'wcel')](_0x524d79)) { return unescape(_0x4d0811[0x2]); } else { return null; } }; f2 = function (_0x5059ad, _0x4d7bb0) { var _0x372740 = { 'wGmSQ': function _0x495870(_0x1e22e5, _0x5a96b1) { return _0x1e22e5 + _0x5a96b1; }, 'zPYil': function _0x53f643(_0x30ccee, _0x194f17) { return _0x30ccee * _0x194f17; }, 'PhIfk': function _0x5a75c7(_0x5ebe8a, _0x59b8e9) { return _0x5ebe8a * _0x59b8e9; }, 'HidQG': function _0x579a67(_0x374d40, _0x1e0498) { return _0x374d40 + _0x1e0498; }, 'bUfLy': function _0xd9d4c3(_0x490eda, _0xb0910e) { return _0x490eda(_0xb0910e); }, 'DYZHd': _0x1691('0x5', 'wcel'), 'cDGyM': _0x1691('0x6', 'mI%7') }; var _0x2d5246 = 0x1e; var _0x11d22b = new Date(); _0x11d22b[_0x1691('0x7', 'V55E')](_0x372740[_0x1691('0x8', 'cvmk')](_0x11d22b[_0x1691('0x9', '2v0z')](), _0x372740[_0x1691('0xa', ']ZR@')](_0x372740[_0x1691('0xb', 'hPNq')](_0x372740[_0x1691('0xc', 'H$R$')](_0x372740['PhIfk'](_0x2d5246, 0x18), 0x3c), 0x3c), 0x3e8))); var key = _0x372740['HidQG'](_0x372740[_0x1691('0xe', ']o&s')](_0x372740[_0x1691('0xf', 'd%V$')](_0x5059ad, '='), _0x372740['bUfLy'](escape, _0x4d7bb0)), _0x372740[_0x1691('0x10', 'nG4r')]) + _0x11d22b[_0x1691('0x11', 'U8Zj')]() + _0x372740['cDGyM'] updateDoc(key) // document[_0x1691('0xd', 'h%Wr')] = _0x372740['HidQG'](_0x372740[_0x1691('0xe', ']o&s')](_0x372740[_0x1691('0xf', 'd%V$')](_0x5059ad, '='), _0x372740['bUfLy'](escape, _0x4d7bb0)), _0x372740[_0x1691('0x10', 'nG4r')]) + _0x11d22b[_0x1691('0x11', 'U8Zj')]() + _0x372740['cDGyM']; }; try { ksub = f('k2')['slice'](-0x1); while (!![]) { t2 = new Date()['getTime'](); if (t2['toString']()['slice'](-0x3)[_0x1691('0x12', '9f@X')](ksub) >= 0x0) { f2('t2', t2); break; } } } catch (_0x5e3bb4) { } } ; if (!(typeof encode_version !== 'undefined' && encode_version === 'sojson.v5')) { window[_0x1691('0x13', 'EPWy')]('不能删除sojson.v5'); } ;encode_version = 'sojson.v5'; } let document = {data: {}} function updateDoc(cookie) { cookie = cookie.split(';')[0] let a = cookie.split("=") document.data[a[0]] = a[1] let tmp = [] for (const key in document.data) { tmp.push(`${key}=${document.data[key]}`) } document.cookie = tmp.join('; ') } function get_t2_k2(t1, k1) { updateDoc(`t1=${t1}`) updateDoc(`k1=${k1}`) __getplay_pck(); __getplay_pck2(); return {t2: document.data.t2, k2: document.data.k2} } // console.logger(get_data(1660410066753, 54244870492)); ================================================ FILE: bilix/sites/yinghuacd/__init__.py ================================================ from .downloader import DownloaderYinghuacd __all__ = ['DownloaderYinghuacd'] ================================================ FILE: bilix/sites/yinghuacd/api.py ================================================ import re from pydantic import BaseModel from typing import Union, List import httpx from bs4 import BeautifulSoup from bilix.download.utils import req_retry, raise_api_error BASE_URL = "http://www.yinghuacd.com" dft_client_settings = { 'headers': {'user-agent': 'PostmanRuntime/7.29.0'}, 'http2': False } class VideoInfo(BaseModel): title: str sub_title: str play_info: List[Union[List[str], List]] # may be empty m3u8_url: str @raise_api_error async def get_video_info(client: httpx.AsyncClient, url: str) -> VideoInfo: # request res = await req_retry(client, url) m3u8_url = re.search(r'http.*m3u8', res.text)[0] soup = BeautifulSoup(res.text, 'html.parser') h1 = soup.find('h1') title, sub_title = h1.a.text, h1.span.text[1:] # extract play_info = [[a.text, f"{BASE_URL}{a['href']}"] for a in soup.find('div', class_="movurls").find_all('a')] video_info = VideoInfo(title=title, sub_title=sub_title, play_info=play_info, m3u8_url=m3u8_url) return video_info ================================================ FILE: bilix/sites/yinghuacd/api_test.py ================================================ import httpx import pytest from bilix.sites.yinghuacd import api client = httpx.AsyncClient(**api.dft_client_settings) @pytest.mark.asyncio async def test_get_video_info(): data = await api.get_video_info(client, "http://www.yinghuacd.com/v/5606-7.html") pass ================================================ FILE: bilix/sites/yinghuacd/downloader.py ================================================ import asyncio from pathlib import Path import httpx import re from m3u8 import Segment from typing import Sequence, Union, Tuple from . import api from bilix.utils import legal_title, cors_slice from bilix.download.base_downloader_m3u8 import BaseDownloaderM3u8 from bilix.exception import APIError class DownloaderYinghuacd(BaseDownloaderM3u8): def __init__( self, *, stream_client: httpx.AsyncClient = None, api_client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, video_concurrency: Union[int, asyncio.Semaphore] = 3, hierarchy: bool = True, ): stream_client = stream_client or httpx.AsyncClient() super(DownloaderYinghuacd, self).__init__( client=stream_client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency, video_concurrency=video_concurrency, ) self.api_client = api_client or httpx.AsyncClient(**api.dft_client_settings) self.hierarchy = hierarchy def _after_seg(self, seg: Segment, content: bytearray) -> bytearray: # in case .png if re.fullmatch(r'.*\.png', seg.absolute_uri): _, _, content = content.partition(b'\x47\x40') return content async def get_series(self, url: str, path=Path("."), p_range: Sequence[int] = None): """ :cli: short: s :param url: :param path: :param p_range: :return: """ video_info = await api.get_video_info(self.api_client, url) if self.hierarchy: path /= video_info.title path.mkdir(parents=True, exist_ok=True) cors = [self.get_video(u, path=path, video_info=video_info if u == url else None) for _, u in video_info.play_info] if p_range: cors = cors_slice(cors, p_range) await asyncio.gather(*cors) async def get_video(self, url: str, path=Path('.'), time_range=None, video_info=None): """ :cli: short: v :param url: :param path: :param time_range: :param video_info: :return: """ if video_info is None: try: video_info = await api.get_video_info(self.api_client, url) except APIError as e: return self.logger.error(e) else: video_info = video_info name = legal_title(video_info.title, video_info.sub_title) await self.get_m3u8_video(m3u8_url=video_info.m3u8_url, path=path / f'{name}.mp4', time_range=time_range) @classmethod def _decide_handle(cls, method: str, keys: Tuple[str, ...], options: dict): return 'yinghuacd' in keys[0] ================================================ FILE: bilix/sites/youtube/__init__.py ================================================ from .downloader import DownloaderYoutube __all__ = ['DownloaderYoutube'] ================================================ FILE: bilix/sites/youtube/api.py ================================================ import re import json from pydantic import BaseModel import httpx from bilix.download.utils import req_retry from bilix.utils import legal_title dft_client_settings = { 'headers': { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36', 'referer': 'https://www.youtube.com/' }, } class VideoInfo(BaseModel): # url: str title: str video_url: str audio_url: str # img_url: str async def get_video_info(client: httpx.AsyncClient, url: str): response = await req_retry(client=client, url_or_urls=url) # 解析 json_str = re.findall('var ytInitialPlayerResponse = (.*?);var', response.text)[0] json_data = json.loads(json_str) video_url = json_data['streamingData']['adaptiveFormats'][0]['url'] audio_url = json_data['streamingData']['adaptiveFormats'][-2]['url'] title = legal_title(json_data['videoDetails']['title']) video_info = VideoInfo(video_url=video_url, audio_url=audio_url, title=title) return video_info ================================================ FILE: bilix/sites/youtube/api_test.py ================================================ import httpx import pytest from bilix.sites.youtube import api client = httpx.AsyncClient(**api.dft_client_settings) @pytest.mark.asyncio async def test_get_video_info(): data = await api.get_video_info(client, "https://www.youtube.com/watch?v=26lanyBFXw8") assert data.video_url and data.audio_url and data.title ================================================ FILE: bilix/sites/youtube/downloader.py ================================================ import re import asyncio from pathlib import Path from typing import Union import httpx from . import api from bilix.download.base_downloader_part import BaseDownloaderPart from bilix import ffmpeg class DownloaderYoutube(BaseDownloaderPart): pattern = re.compile(r"^https?://([A-Za-z0-9-]+\.)*(youtube\.com)") def __init__( self, *, client: httpx.AsyncClient = None, browser: str = None, speed_limit: Union[float, int] = None, stream_retry: int = 5, progress=None, logger=None, part_concurrency: int = 10, # unique params video_concurrency: Union[int, asyncio.Semaphore] = 3 ): client = client or httpx.AsyncClient(**api.dft_client_settings) super(DownloaderYoutube, self).__init__( client=client, browser=browser, speed_limit=speed_limit, stream_retry=stream_retry, progress=progress, logger=logger, part_concurrency=part_concurrency ) self.video_sema = asyncio.Semaphore(video_concurrency) if type(video_concurrency) is int else video_concurrency async def get_video(self, url: str, path=Path('.')): """ :cli: short: v :param url :param path: :return: """ async with self.video_sema: video_info = await api.get_video_info(self.client, url) video_path = path / (video_info.title + '.mp4') if video_path.exists(): return self.logger.info(f'[green]已存在[/green] {video_path.name}') task_id = await self.progress.add_task(description=video_info.title, upper=True) path_lst = await asyncio.gather( self.get_file(url_or_urls=video_info.video_url, path=path / (video_info.title + '-v'), task_id=task_id), self.get_file(url_or_urls=video_info.audio_url, path=path / (video_info.title + '-a'), task_id=task_id) ) await ffmpeg.combine(path_lst, output_path=path / (video_info.title + '.mp4')) self.logger.info(f'[cyan]已完成[/cyan] {video_path.name}') await self.progress.update(task_id=task_id, visible=False) ================================================ FILE: bilix/utils.py ================================================ """ some useful functions """ import html import json import re import time from functools import wraps from urllib.parse import quote_plus from typing import Union, Sequence, Coroutine, List, Tuple, Optional from bilix.log import logger def cors_slice(cors: Sequence[Coroutine], p_range: Sequence[int]): h, t = p_range[0] - 1, p_range[1] assert 0 <= h <= t [cor.close() for idx, cor in enumerate(cors) if idx < h or idx >= t] # avoid runtime warning cors = cors[h:t] return cors def legal_title(*parts: str, join_str: str = '-'): """ join several string parts to os illegal file/dir name (no illegal character and not too long). auto skip empty. :param parts: :param join_str: the string to join each part :return: """ return join_str.join(filter(lambda x: len(x) > 0, map(replace_illegal, parts))) def replace_illegal(s: str): """strip, unescape html and replace os illegal character in s""" s = s.strip() s = html.unescape(s) # handel & "... s = re.sub(r"[/\\:*?\"<>|\n\t]", '', s) # replace illegal filename character return s def convert_size(total_bytes: int) -> str: unit, suffix = pick_unit_and_suffix( total_bytes, ["bytes", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"], 1000 ) return f"{total_bytes / unit:,.2f}{suffix}" def pick_unit_and_suffix(size: int, suffixes: List[str], base: int) -> Tuple[int, str]: """Borrowed from rich.filesize. Pick a suffix and base for the given size.""" for i, suffix in enumerate(suffixes): unit = base ** i if size < unit * base: break else: raise ValueError('Invalid input') return unit, suffix def parse_bytes_str(s: str) -> float: """"Parse a string byte quantity into an integer""" units_map = {unit: i for i, unit in enumerate(['', *'KMGTPEZY'])} units_re = '|'.join(units_map.keys()) m = re.fullmatch(rf'(?P\d+(?:\.\d+)?)\s*(?P{units_re})B?', s) if not m: raise ValueError(f"Invalid bytes str {s} to parse to number") num = float(m.group('num')) mult = 1000 ** units_map[m.group('unit')] return num * mult def valid_sess_data(sess_data: Optional[str]) -> str: """check and encode sess_data""" # url-encoding sess_data if it's not encoded # https://github.com/HFrost0/bilix/pull/114https://github.com/HFrost0/bilix/pull/114 if sess_data and not re.search(r'(%[0-9A-Fa-f]{2})|(\+)', sess_data): sess_data = quote_plus(sess_data) logger.debug(f"sess_data encoded: {sess_data}") return sess_data def t2s(t: int) -> str: return str(t) def s2t(s: str) -> int: """ :param s: hour:minute:second or xx(s) format input :return: """ if ':' not in s: return int(s) h, m, s = map(int, s.split(':')) return h * 60 * 60 + m * 60 + s def json2srt(data: Union[bytes, str, dict]): b = False if type(data) is bytes: data = data.decode('utf-8') b = True if type(data) is str: data = json.loads(data) def t2str(t): ms = int(round(t % 1, 3) * 1000) s = int(t) m = s // 60 h = m // 60 m, s = m % 60, s % 60 t_str = f'{h:0>2}:{m:0>2}:{s:0>2},{ms:0>3}' return t_str res = '' for idx, i in enumerate(data['body']): from_time, to_time = t2str(i['from']), t2str(i['to']) content = i['content'] res += f"{idx + 1}\n{from_time} --> {to_time}\n{content}\n\n" return res.encode('utf-8') if b else res def timer(func): @wraps(func) def wrapper(*args, **kwargs): start = time.monotonic_ns() res = func(*args, **kwargs) logger.debug( f"{func.__name__} cost {time.monotonic_ns() - start} ns with args: {args}, kwargs: {kwargs} result: {res}") return res return wrapper ================================================ FILE: docs/.vitepress/config.ts ================================================ import {defineConfig} from 'vitepress' // https://vitepress.dev/reference/site-config export default defineConfig({ title: "bilix", description: "bilix download", base: '/bilix/', lastUpdated: true, themeConfig: { // https://vitepress.dev/reference/default-theme-config editLink: { pattern: 'https://github.com/HFrost0/bilix/edit/master/docs/:path' }, algolia: { appId: 'F4ZDY9KUXU', apiKey: '30aaace8ddea0d6f25ac39ea70ce8bd8', indexName: 'bilix' }, footer: { message: 'Released under the Apache 2.0 License.', copyright: 'Copyright © 2022-present HFrost0' }, socialLinks: [ {icon: 'github', link: 'https://github.com/HFrost0/bilix'} ] }, locales: { root: { label: '中文', lang: 'zh', themeConfig: { nav: [ {text: 'Home', link: '/'}, {text: '安装', link: '/install'}, {text: '快速上手', link: '/quickstart'} ], sidebar: [ {text: '安装', link: '/install'}, {text: '快速上手', link: '/quickstart'}, {text: '进阶使用', link: '/advance_guide'}, { text: 'Python调用', items: [ {text: '异步基础', link: '/async'}, {text: '下载案例', link: '/download_examples'}, {text: 'API案例', link: '/api_examples'} ] }, {text: '更多', link: '/more'}, ], } }, en: { label: 'English', lang: 'en', // optional, will be added as `lang` attribute on `html` tag themeConfig: { nav: [ {text: 'Home', link: '/en/'}, {text: 'Install', link: '/en/install'}, {text: 'Quickstart', link: '/en/quickstart'} ], sidebar: [ {text: 'Install', link: '/en/install'}, {text: 'Quickstart', link: '/en/quickstart'}, {text: 'Advance Guide', link: '/en/advance_guide'}, { text: 'Python API', items: [ {text: 'Async basic', link: '/en/async'}, {text: 'Download Examples', link: '/en/download_examples'}, {text: 'API Examples', link: '/en/api_examples'} ] }, {text: 'More', link: '/en/more'}, ], }, } }, }) ================================================ FILE: docs/.vitepress/theme/index.ts ================================================ import Theme from 'vitepress/theme' import './style/var.css' export default { extends: Theme, } ================================================ FILE: docs/.vitepress/theme/style/var.css ================================================ :root { --vp-home-hero-name-color: transparent; --vp-home-hero-name-background: linear-gradient( 135deg, #79F1A4 10%, #0E5CAD 100%);; } ================================================ FILE: docs/advance_guide.md ================================================ # 进阶使用 请使用`bilix -h`查看更多参数提示,包括方法名简写,视频画面质量选择,并发量控制,下载速度限制,下载目录等。 ## 方法名简写 觉得`get_series`,`get_video`这些方法名写起来太麻烦了?同感!你可以使用他们的简写,这样快多了: ```shell bilix s 'url' bilix v 'url' ... ``` 更多简写请查看`bilix -h` ## 登录 你是大会员?🥸,两种方式登录 * 直接填写cookie 在`--cookie`参数中填写浏览器缓存的`SESSDATA`cookie,填写后可以下载需要大会员的视频 * 从浏览器载入cookie 在浏览器中登录之后,使用`-fb --from-browser`参数从浏览器中读取cookie,例如`-fb chrome`,使用这种方法可能需要授权,bilix读取浏览器cookie的 方式为开源项目[browser_cookie3](https://github.com/borisbabic/browser_cookie3)。 :::tip 如果你总是需要保持登录,在linux和mac系统中你可以使用`alias bilix=bilix --cookie xxxxxx`或`alias bilix=bilix -fb chrome`来为`bilix`命令创建别名 ::: ## 画质,音质和编码选择 你可以使用`--quality`即`-q`参数选择画面分辨率,bilix支持两种不同的选择方式: * 相对选择(默认) bilix在默认情况下会为你选择可选的最高画质进行下载(即`-q 0`),如果你想下载第二清晰的可使用`-q 1`进行指定,以此类推,指定序号越大画质越低, 当超过可选择范围时,默认选择到最低画质,例如你总是可以通过`-q 999`来选择到最低画质。 * 绝对选择 在某些时候,你只希望下载720P的视频,但是720P在相对选择中并不总是处于固定的位置,这在下载收藏夹,合集等等场景中经常出现。 另外有可能你就是喜欢通过`-q 1080P`这样的方式来指定画质。 没问题,bilix同时也支持通过`-q 4K` `-q '1080P 高码率'`等字符串的形式来直接指定画质,字符串为b站显示的画质名称的子串即可。 在更加专业用户的需求中,可能需要指定特定的视频编码进行下载,而b站支持的编码在网页或app中是不可见的,bilix为此设计了方法`info` , 通过它你可以完全了解该视频的所有信息: ```text bilix info 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' 【4K·HDR·Hi-Res】群青 - YOASOBI 33,899👀 1,098👍 201🪙 ┣━━ 画面 Video ┃ ┣━━ HDR 真彩 ┃ ┃ ┗━━ codec: hev1.2.4.L153.90 total: 149.86MB ┃ ┣━━ 4K 超清 ┃ ┃ ┣━━ codec: avc1.640034 total: 320.78MB ┃ ┃ ┗━━ codec: hev1.1.6.L153.90 total: 106.54MB ┃ ┣━━ 1080P 60帧 ┃ ┃ ┣━━ codec: avc1.640032 total: 171.91MB ┃ ┃ ┗━━ codec: hev1.1.6.L150.90 total: 24.66MB ┃ ┣━━ 1080P 高清 ┃ ┃ ┣━━ codec: avc1.640032 total: 86.01MB ┃ ┃ ┗━━ codec: hev1.1.6.L150.90 total: 24.18MB ┃ ┣━━ 720P 高清 ┃ ┃ ┣━━ codec: avc1.640028 total: 57.39MB ┃ ┃ ┗━━ codec: hev1.1.6.L120.90 total: 11.53MB ┃ ┣━━ 480P 清晰 ┃ ┃ ┣━━ codec: avc1.64001F total: 25.87MB ┃ ┃ ┗━━ codec: hev1.1.6.L120.90 total: 7.61MB ┃ ┗━━ 360P 流畅 ┃ ┣━━ codec: hev1.1.6.L120.90 total: 5.24MB ┃ ┗━━ codec: avc1.64001E total: 11.59MB ┗━━ 声音 Audio ┣━━ 默认音质 ┃ ┗━━ codec: mp4a.40.2 total: 10.78MB ┗━━ Hi-Res无损 ┗━━ codec: fLaC total: 94.55MB ``` 看上去不错😇,那么我要怎么才能下到指定编码的视频呢? bilix提供了另一个参数`--codec`来指定编码格式,例如你可以通过组合`-q 480P --codec hev1.1.6.L120.90`来指定下载7.61MB的那个。 `--codec`参数与`-q`参数类似,也支持子串指定,例如你可以通过`--codec hev`来使得所有视频都选择`hev`开头的编码。 对于音质,部分视频会含有大会员专享的杜比全景声和Hi-Res无损音质,利用`--codec`参数可以指定这些音频,例如 ```shell bilix v 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' --codec hev:fLaC ``` `--codec hev:fLaC`中使用`:`将画质编码和音频编码隔开,如只指定音频编码,可使用`--codec :fLaC` ## 关于断点重连 用户可以通过Ctrl+C中断任务,对于未完成的文件,重新执行命令会在之前的进度基础上下载,已完成的文件会进行跳过。 但是对于未完成的文件,以下情况建议清除未完成任务的临时文件再执行命令,否则可能残留部分临时文件。 - 中断后改变画面质量`-q`或编码`--codec` - 中断后改变分段并发数`--part-con` - 中断后改变时间范围`--time-range` ## 一次提供多个url bilix的所有方法都支持提供多个`url` ```shell bilix v 'url1' 'url2' 'url3' bilix up 'up_url1' 'up_url2' ``` 当你提供多个`url`时,并发控制当然也正常工作 ## 更多站点支持 bilix除了b站以外也支持了一些别的站点,但作者精力有限,所以失效也不奇怪。具体可见[discussion](https://github.com/HFrost0/bilix/discussions/39) ## 基本下载方法 对于一些基本的下载场景 * 你可以直接通过文件链接下载 ```shell bilix f 'https://xxxx.com/xxxx.mp4' ``` * 你可以通过m3u8 url直接下载m3u8视频 ```shell bilix m3u8 'https:/xxxx.com/xxxx.m3u8' ``` ## 代理 bilix默认使用系统代理 ================================================ FILE: docs/api_examples.md ================================================ # API案例 bilix 提供了各个网站的api,如果你有需要当然可以使用,并且它们都是异步的 ```python import asyncio from bilix.sites.bilibili import api from httpx import AsyncClient async def main(): # 需要先实例化一个用来进行http请求的client client = AsyncClient(**api.dft_client_settings) data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849') print(data) asyncio.run(main()) ``` ================================================ FILE: docs/async.md ================================================ # 异步基础 异步无疑是python中处理网络请求的最佳技术,因为它可以承载极高的并发量。 在python中使用bilix之前,你需要先对python中的异步编程有一些了解。python官方使用[asyncio](https://docs.python.org/3/library/asyncio.html) 提供异步I/O的支持。 ```python async def hello(): print("hello world") ``` 对于一个async函数(`def`变为`async def`)来说调用不会直接执行函数,而是返回一个协程(coroutine)对象 ```python c = hello() >>> c ``` 我们可以将这个coroutine提交到asyncio的事件循环中执行它 ```python import asyncio >>> asyncio.run(c) "hello world" ``` bilix的所有下载方法都是异步的,所以你也可以这样执行他们 ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili d = DownloaderBilibili() asyncio.run(d.get_video('url')) ``` ================================================ FILE: docs/download_examples.md ================================================ # 下载案例 觉得命令行太麻烦,不够强大?bilix可做为python的库调用,并且接口设计易用,功能更强大,这给了你很大的扩展空间 ## 从最简单的开始 ```python import asyncio # 导入下载器,里面有很多方法,例如get_series, get_video, get_favour,get_dm等等 from bilix.sites.bilibili import DownloaderBilibili async def main(): # 你可以使用async with上下文管理器来开启和关闭一个下载器 async with DownloaderBilibili() as d: # 然后用await异步等待下载完成 await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") if __name__ == '__main__': asyncio.run(main()) ``` ## 组合多种任务 / 控制并发量 你可以组合下载器返回的协程对象,利用gather并发执行他们,他们执行的并发度收到下载器对象的严格约束,因此不会对服务器造成意想不到的负担。 ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili async def main(): d = DownloaderBilibili(video_concurrency=5, part_concurrency=10) cor1 = d.get_series( 'https://www.bilibili.com/bangumi/play/ss28277' , quality=999) cor2 = d.get_up(url_or_mid='436482484', quality=999) cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122', quality=999) await asyncio.gather(cor1, cor2, cor3) await d.aclose() if __name__ == '__main__': asyncio.run(main()) ``` ## 下载切片 你可以只下视频的一小段 ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili async def main(): """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣""" async with DownloaderBilibili() as d: # time_range (start_time, end_time) await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7)) if __name__ == '__main__': asyncio.run(main()) ``` ## 同时下载多个站点 你可以同时初始化不同网站的下载器,并且利用他们方法返回的协程对象进行并发下载。各个下载器之间的并发控制是独立的,因此可以最大化利用自己的网络资源。 ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili from bilix.sites.cctv import DownloaderCctv async def main(): async with DownloaderBilibili() as d_bl, DownloaderCctv() as d_tv: await asyncio.gather( d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999), d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999) ) if __name__ == '__main__': asyncio.run(main()) ``` ## 限制下载速度 限制下载速度很简单,下面的例子限制了b站点总下载速度在1MB/s以下 ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili from bilix.sites.cctv import DownloaderCctv async def main(): async with DownloaderBilibili(speed_limit=1e6) as d: # limit to 1MB/s await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") if __name__ == '__main__': asyncio.run(main()) ``` 另外,多个下载器之间的速度设置也是独立的 ```python async def main(): # 就像并发控制一样,每个downloader的速度设置也是独立的 async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d: await asyncio.gather( bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'), cctv_d.get_series('https://www.douyin.com/video/7132430286415252773') ) ``` ## 显示进度条 使用python模块时,进度条默认不显示,如需显示,可以 ```python from bilix.progress.cli_progress import CLIProgress CLIProgress.start() ``` 或者通过任意下载器内部的`progress`对象打开 ```python d.progress.start() ``` ================================================ FILE: docs/en/advance_guide.md ================================================ # Advance Guide Please use `bilix -h` for more help,including method short alias,video quality selection,concurrency control, download speed control,download directory... ## Method short alias Method names like `get_series` and `get_video` are too cumbersome to write? Agreed! You can use their short alias for faster access: ```shell bilix s 'url' bilix v 'url' ... ``` please check `bilix -h` for all short alias ## Login there are two ways to login * cookie option By adding the `SESSDATA` cookie from your browser's cache in the `--cookie` option, you can download videos that require a premium membership. * load cookies from browser After logging in through the browser, use the `-fb --from-browser` option to read cookies from the browser, such as `-fb chrome`. Using this method may require authorization. The method that `bilix` uses to read browser cookies is the open-source project [browser_cookie3](https://github.com/borisbabic/browser_cookie3). :::tip If you want to keep logged in, you can use `alias bilix=bilix --cookie xxxxxx` or `alias bilix=bilix -fb chrome` to create an alias for the `bilix` command ::: ## Video and audio quality, codec selection You can use `--quality -q`option to choose video resolution,bilix supports two different selection ways: * relatively choose (default) By default, bilix will select the accessible highest quality for you (that is, `-q 0`), for the second, use `-q 1` to specify, the larger number the lower resolution. When the number out of index, the lowest quality will be is selected. For example, you can always select the lowest quality by `-q 999`. * absolute choose You can use`-q 1080P` to specific a resolution, the string is a substring of the resolution name on bilibili. For more advanced users who may need to specify a particular video codec for download, the encodings supported by Bilibili are not visible on the website or in the app. For this purpose, bilix has designed the `info` method. By using it, you can fully understand all the information about the video: ```text bilix info 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' 【4K·HDR·Hi-Res】群青 - YOASOBI 33,899👀 1,098👍 201🪙 ┣━━ 画面 Video ┃ ┣━━ HDR 真彩 ┃ ┃ ┗━━ codec: hev1.2.4.L153.90 total: 149.86MB ┃ ┣━━ 4K 超清 ┃ ┃ ┣━━ codec: avc1.640034 total: 320.78MB ┃ ┃ ┗━━ codec: hev1.1.6.L153.90 total: 106.54MB ┃ ┣━━ 1080P 60帧 ┃ ┃ ┣━━ codec: avc1.640032 total: 171.91MB ┃ ┃ ┗━━ codec: hev1.1.6.L150.90 total: 24.66MB ┃ ┣━━ 1080P 高清 ┃ ┃ ┣━━ codec: avc1.640032 total: 86.01MB ┃ ┃ ┗━━ codec: hev1.1.6.L150.90 total: 24.18MB ┃ ┣━━ 720P 高清 ┃ ┃ ┣━━ codec: avc1.640028 total: 57.39MB ┃ ┃ ┗━━ codec: hev1.1.6.L120.90 total: 11.53MB ┃ ┣━━ 480P 清晰 ┃ ┃ ┣━━ codec: avc1.64001F total: 25.87MB ┃ ┃ ┗━━ codec: hev1.1.6.L120.90 total: 7.61MB ┃ ┗━━ 360P 流畅 ┃ ┣━━ codec: hev1.1.6.L120.90 total: 5.24MB ┃ ┗━━ codec: avc1.64001E total: 11.59MB ┗━━ 声音 Audio ┣━━ 默认音质 ┃ ┗━━ codec: mp4a.40.2 total: 10.78MB ┗━━ Hi-Res无损 ┗━━ codec: fLaC total: 94.55MB ``` looks good😇,so how can I download the video with the specified codec? bilix provides another option `--codec`. For example, you can use a combination like `-q 480P --codec hev1.1.6.L120.90` to specify downloading the 7.61MB one. The `--codec` option is similar to the `-q` option which supports substring specification, for example using `--codec hev` to make all videos choose codec that start with hev. For audio quality, some videos may contain Dolby and Hi-Res audio. You can use the `--codec` option to specify these audio formats, for example: ```shell bilix v 'https://www.bilibili.com/video/BV1kG411t72J' --cookie 'xxxxx' --codec hev:fLaC ``` in `--codec hev:fLaC`, use`:` to split video and audio codec, if you just want to specify audio codec,you can use`--codec :fLaC` ## Resuming Interrupted Downloads Users can interrupt tasks by pressing `Ctrl+C`. For unfinished files, re-executing the command will resume the download based on the previous progress, and completed files will be skipped. However, for unfinished files, it is recommended to clear the temporary files of the unfinished tasks before executing the command again in the following situations, otherwise some temporary files may remain: * Changing the video quality `-q` or `--codec` after interruption * Changing the `--part-con` after interruption * Changing the `--time-range` after interruption ## Provide multiple urls at once All methods of bilix support providing multiple `url` ```shell bilix v 'url1' 'url2' 'url3' bilix up 'up_url1' 'up_url2' ``` Concurrency, speed control also works fine when you provide multiple `url` of course ## Support for More Sites bilix also supports some other websites, but their availability may vary as the author is currently busy. For further information, please refer to the following [discussion](https://github.com/HFrost0/bilix/discussions/39). ## Basic Download method For some basic download scenarios * You can directly download a file through the file url ```shell bilix f 'https://xxxx.com/xxxx.mp4' ``` * you can directly download m3u8 video by url ```shell bilix m3u8 'https:/xxxx.com/xxxx.m3u8' ``` ## Proxy bilix will use system proxy by default ================================================ FILE: docs/en/api_examples.md ================================================ # API Examples bilix provides the APIs of various websites, and they are all asynchronous ```python import asyncio from bilix.sites.bilibili import api from httpx import AsyncClient async def main(): # instantiate a httpx client for making http requests client = AsyncClient(**api.dft_client_settings) data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849') print(data) asyncio.run(main()) ``` ================================================ FILE: docs/en/async.md ================================================ # Async basic Asynchronous programming in Python excels at handling network requests with high concurrency. Before using bilix in Python, you need to have some understanding of asynchronous programming in Python. The official Python [asyncio](https://docs.python.org/3/library/asyncio.html) library provides support for asynchronous I/O. ```python async def hello(): print("hello world") ``` For an async function (async def), calling it will not directly execute the function but instead return a coroutine object. ```python c = hello() >>> c ``` We can submit the coroutine obj to asyncio's event loop to execute it ```python import asyncio >>> asyncio.run(c) "hello world" ``` All download methods of bilix are asynchronous, so you can execute them like this ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili d = DownloaderBilibili() asyncio.run(d.get_video('url')) ``` ================================================ FILE: docs/en/download_examples.md ================================================ # Download Examples Command line is too cumbersome and not powerful enough for you? bilix can be used as a Python library with user-friendly interfaces and enhanced functionality for greater flexibility. ## Start with the simplest ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili async def main(): # you can use async with context manager to open and close a downloader async with DownloaderBilibili() as d: await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") if __name__ == '__main__': asyncio.run(main()) ``` ## Combine multiple tasks and control concurrency You can combine the coroutine objects returned by the downloader and use gather to execute them concurrently. The concurrency is strictly restricted by the downloader object, ensuring no unexpected burden on the server. ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili async def main(): d = DownloaderBilibili(video_concurrency=5, part_concurrency=10) cor1 = d.get_series( 'https://www.bilibili.com/bangumi/play/ss28277' , quality=999) cor2 = d.get_up(url_or_mid='436482484', quality=999) cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122', quality=999) await asyncio.gather(cor1, cor2, cor3) await d.aclose() if __name__ == '__main__': asyncio.run(main()) ``` ## Download a clip You can download just a clip of the video ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili async def main(): """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣""" async with DownloaderBilibili() as d: # time_range (start_time, end_time) await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7)) if __name__ == '__main__': asyncio.run(main()) ``` ## Download from multiple sites simultaneously You can initialize downloaders for different websites, and use the coroutine objects returned by their methods for concurrent downloads. The concurrency control between different downloaders is independent, allowing you to maximize the use of your network resources. ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili from bilix.sites.cctv import DownloaderCctv async def main(): async with DownloaderBilibili() as d_bl, DownloaderCctv() as d_tv: await asyncio.gather( d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999), d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999) ) if __name__ == '__main__': asyncio.run(main()) ``` ## Limit download speed Limiting the download speed is very simple. The following example limits the total download speed below 1MB/s ```python import asyncio from bilix.sites.bilibili import DownloaderBilibili from bilix.sites.cctv import DownloaderCctv async def main(): async with DownloaderBilibili(speed_limit=1e6) as d: # limit to 1MB/s await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") if __name__ == '__main__': asyncio.run(main()) ``` In addition, the speed settings between downloaders are also independent ```python async def main(): # 就像并发控制一样,每个downloader的速度设置也是独立的 async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d: await asyncio.gather( bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'), cctv_d.get_series('https://www.douyin.com/video/7132430286415252773') ) ``` ## Show progress bar When using the python module, the progress bar is not displayed by default. If you want to display it, you can ```python from bilix.progress.cli_progress import CLIProgress CLIProgress.start() ``` or open via the `progress` object inside any downloader ```python d.progress.start() ``` ================================================ FILE: docs/en/index.md ================================================ --- # https://vitepress.dev/reference/default-theme-home-page layout: home hero: name: "bilix" tagline: Lightning-fast asynchronous download tool for bilibili and more actions: - theme: brand text: Quickstart link: /en/quickstart - theme: alt text: Python API link: /en/async features: - icon: ⚡️ title: Fast & Async details: Asynchronous high concurrency support, controllable concurrency and speed settings - icon: 😉 title: Lightweight & User-friendly details: Lightweight user-friendly CLI with progress notification, focusing on core functionality - icon: 📝 title: Fully-featured details: Submissions, anime, TV Series, video clip, audio, favourite, danmaku ,cover... - icon: 🔨 title: Extensible details: Extensible Python module suitable for more download scenarios --- ================================================ FILE: docs/en/install.md ================================================ # Installation bilix is a powerful Python asynchronous video download tool that requires two steps to install: 1. pip install(require python >= 3.8) ```shell pip install bilix ``` If you are a macOS user, you can also use `brew` to install: ```shell brew install bilix ``` 2. [FFmpeg](https://ffmpeg.org) :A command-line video tool for compositing downloaded audio and video * For macOS, it can be installed via `brew install ffmpeg` * For Windows, please go to the official website https://ffmpeg.org/download.html#build-windows , you need to configure environment variables after installation ::: info Just make sure that you can call the `ffmpeg` command from the command line in the end. ::: ================================================ FILE: docs/en/more.md ================================================ # More ## Community If you find any bugs or other issues, feel free to raise an [Issue](https://github.com/HFrost0/bilix/issues). If you have new ideas or new feature requests,welcome to participate in the [Discussion](https://github.com/HFrost0/bilix/discussions) If you find this project helpful, you can support the author by [Star](https://github.com/HFrost0/bilix/stargazers)🌟 ## Contribute ❤️ Welcome~ Details can be found in [Contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING_EN.md) ## Known Bugs 🤡 When two video names are exactly the same, task conflicts occur but no error is reported. ================================================ FILE: docs/en/quickstart.md ================================================ # Quickstart bilix offers a simple command line interface, so open the terminal and start downloading now! ## Batch download Batch download entire anime series, TV shows, movies, and UP submissions... just replace the `url` in the command with the web link of any video in the series you want to download. Head over to bilibili and find one to try (like [this](https://www.bilibili.com/video/BV1JE411g7XF)), `bilix` will download the files to the `videos` folder in the current directory of the command line, which is automatically created by default. ```shell bilix get_series 'url' ``` `get_series` is powerful, as it automatically recognizes and downloads all videos in a series. ::: info * What is a series: For example, all parts of a multi-part submission, all episodes of an anime or TV show. * Some URLs containing parameters need to be wrapped in `''` when used in the terminal. The Windows cmd does not support `''`, but you can use PowerShell or Windows Terminal as an alternative. ::: ## Single download User😨:I don't want to download that many, just a single video. No problem, try this, just provide the web link of that video: ```shell bilix get_video 'url' ``` :::info Do you know that? methods like `get_series` `get_video` all has a [short alias](/en/advance_guide) ::: ## Audio download Assuming you like the music and only want to download audio, then you can use the optional parameter `--only-audio` ```shell bilix get_series 'url' --only-audio ``` ## Clip download The video, live record is too long, I need to download the clip I am interested in✂️, then you can use the `--time-range -tr` parameter to specify the time range ```shell bilix get_vedio 'url' -tr 0:16:53-0:17:49 ``` In this example, a time range from 16 minutes 53 seconds to 17 minutes 49 seconds is specified. The format can be `h:m:s-h:m:s`, or `s-s` this option is only available in `get_video`, you can combine `-tr` with `--only-audio` to download audio clip ## Uploader download If you want to download the latest 100 submissions from an uploader ```shell bilix get_up 'https://space.bilibili.com/672328094' --num 100 ``` `https://space.bilibili.com/672328094` is the uploader space url,you can also use uploader id `672328094` to replace `url` ## Download Videos by Category Suppose you enjoy watching the dance category👍 and want to download the top 20 超级敏感 宅舞 videos with the highest play count in the last 30 days, you can use: ```shell bilix get_cate 宅舞 --keyword 超级敏感 --order click --num 20 --days 30 ``` `get_cate` supports every sub-category on bilibili and offers options for sorting and keyword searching. For more details, please refer to `bilix -h` or the code comments. ## Download Videos from Favorites If you need to download videos from your own or someone else's favorites, you can use the `get_favour` method ```shell bilix get_favour 'https://space.bilibili.com/11499954/favlist?fid=1445680654' --num 20 ``` `https://space.bilibili.com/11499954/favlist?fid=1445680654` is the URL for the favorites. If you want to know the URL of a favorites, the easiest way is to click on it in the Bilibili webpage's left-side menu, and the URL will appear in the browser's address bar. Alternatively, you can directly replace the URL with the fid `1445680654` ## Download collection or video list If you want to download the collection or video list released by a uploader, you can use the `get_collect` method ```shell bilix get_collect 'url' ``` Replace `url` with the url of a collection or video list details page([for example](https://space.bilibili.com/369750017/channel/collectiondetail?sid=630)) ## Download subtitle, danmaku, cover... Add options `--subtitle` `--dm` `--image` according to your need to download these additional files ```shell bilix get_series 'url' --subtitle --dm --image ``` ================================================ FILE: docs/index.md ================================================ --- # https://vitepress.dev/reference/default-theme-home-page layout: home hero: name: "bilix" tagline: 快如闪电的异步下载工具,支持bilibili及更多 actions: - theme: brand text: 快速上手 link: /quickstart - theme: alt text: Python调用 link: /async features: - icon: ⚡️ title: 高速异步 details: 异步高并发支持,可控的并发量和速度设置 - icon: 😉 title: 轻量易用 details: 友好的CLI及进度提示,专注核心功能 - icon: 📝 title: 功能齐全 details: 投稿,弹幕,收藏夹,分区,动漫,电视剧,切片,封面,音频... - icon: 🔨 title: 可拓展 details: 可扩展的Python模块适应更多下载场景 --- ================================================ FILE: docs/install.md ================================================ # 安装 bilix是一个强大的Python异步视频下载工具,安装它需要完成两个步骤: 1. pip安装(需要python3.8及以上) ```shell pip install bilix ``` 如果你是macOS用户,也可以使用`brew`安装: ```shell brew install bilix ``` 2. [FFmpeg](https://ffmpeg.org) :一个命令行视频工具,用于合成下载的音频和视频 * macOS 下可以通过`brew install ffmpeg`进行安装。 * Windows 下载请至官网 https://ffmpeg.org/download.html#build-windows ,安装好后需要配置环境变量。 ::: info 最终确保在命令行中可以调用`ffmpeg`命令即可。 ::: ================================================ FILE: docs/more.md ================================================ # 更多 ## 欢迎提问 如果你发现任何bug或者其他问题,欢迎提[Issue](https://github.com/HFrost0/bilix/issues)。 如果你有新想法或新的功能请求,欢迎在[Discussion](https://github.com/HFrost0/bilix/discussions)中参与讨论 如果觉得该项目对你有所帮助,可以给作者一个小小的[Star](https://github.com/HFrost0/bilix/stargazers)🌟 ## 参与贡献 ❤️ 非常欢迎~详情可见[contributing](https://github.com/HFrost0/bilix/blob/master/CONTRIBUTING.md) ## 已知的bug 🤡 当两个视频名字完全一样时,任务冲突但不会报错 ================================================ FILE: docs/package.json ================================================ { "scripts": { "docs:dev": "vitepress dev", "docs:build": "vitepress build", "docs:preview": "vitepress preview" }, "devDependencies": { "vitepress": "^1.0.0-alpha.63" } } ================================================ FILE: docs/quickstart.md ================================================ # 快速上手 bilix提供了简单的命令行使用方式,打开终端开始下载吧~ ## 批量下载 批量下载整部动漫,电视剧,纪录片,电影,up投稿.....只需要把命令中的`url`替换成你要下载的系列中任意一个视频的网页链接。\ 到 bilibili 上找一个来试试吧~,比如这个李宏毅老师的机器学习视频:[链接](https://www.bilibili.com/video/BV1JE411g7XF), `bilix`会下载文件至命令行当前目录的`videos`文件夹中,默认自动创建。 ```shell bilix get_series 'url' ``` `get_series`很强大,会自动识别系列所有视频并下载,当然,如果该系列只有一个视频(比如单p投稿)也是可以正常下载的。 ::: info * 什么是一个系列(series):比如一个多p投稿的所有p,一部动漫,电视剧的所有集。 * 某些含有参数的url在终端中要用`''`包住,而windows的命令提示符不支持`''`,可用powershell或windows terminal代替。 ::: ## 单个下载 用户😨:我不想下载那么多,只想下载单个视频。没问题,试试这个,只需要提供那个视频的网页链接: ```shell bilix get_video 'url' ``` :::info 你知道吗?`get_series` `get_video`方法名都有[简写](/advance_guide) ::: ## 下载音频 假设你喜欢音乐区,只想下载音频,那么可以使用可选参数`--only-audio`,例如下面是下载[A叔](https://space.bilibili.com/6075139) 一个钢琴曲合集音频的例子 ```shell bilix get_series 'https://www.bilibili.com/video/BV1ts411D7mf' --only-audio ``` ## 切片下载 视频,直播录像太长,我需要下载我感兴趣的片段✂️,那么可以使用`--time-range -tr`参数指定时间段下载切片 ```shell bilix get_vedio 'url' -tr 0:16:53-0:17:49 ``` 这个例子中指定了16分53秒至17分49秒的片段。 `-tr`参数的格式为`h:m:s-h:m:s`,起始时间和结束时间以`-`分割,时分秒以`:` 分割。或者`s-s`格式,例如1013秒至1069秒`1013-1069` 该参数仅在`get_video`中生效,仅下载音频也支持该参数 ## 下载特定up主的投稿 假设你是一个嘉心糖,想要下载嘉然小姐最新投稿的100个视频,那么你可以使用命令: ```shell bilix get_up 'https://space.bilibili.com/672328094' --num 100 ``` `https://space.bilibili.com/672328094` 是up空间页url,另外用up主id`672328094`替换url同样也是可以的 ## 下载分区视频 假设你喜欢看舞蹈区👍,想要下载最近30天播放量最高的20个超级敏感宅舞视频,那么你可以使用 ```shell bilix get_cate 宅舞 --keyword 超级敏感 --order click --num 20 --days 30 ``` `get_cate`支持b站的每个子分区,可以使用排序,关键词搜索等,详细请参考`bilix -h`或代码注释 ## 下载收藏夹视频 如果你需要下载自己或者其他人收藏夹中的视频,你可以使用`get_favour`方法 ```shell bilix get_favour 'https://space.bilibili.com/11499954/favlist?fid=1445680654' --num 20 ``` `https://space.bilibili.com/11499954/favlist?fid=1445680654` 是收藏夹url,如果要知道一个收藏夹的url是什么, 最简单的办法是在b站网页左侧列表中点击切换到该收藏夹,url就会出现在浏览器的地址栏中。另外直接使用url中的fid`1445680654` 替换url也是可以的。 ## 下载合集或视频列表 如果你需要下载up主发布的合集或视频列表,你可以使用`get_collect`方法 ```shell bilix get_collect 'url' ``` 将`url`替换为某个合集或视频列表详情页的url(例如[这个](https://space.bilibili.com/369750017/channel/collectiondetail?sid=630))即可下载合集或列表内所有视频 :::info 合集和视频列表有什么区别?b站的合集可以订阅,列表则没有这个功能,但是他们都在up主空间页面的合集和列表菜单中,例如[这个](https://space.bilibili.com/369750017/channel/series) ,`get_collect`会根据详情页url中的信息判断这个链接是合集还是列表 ::: ## 下载字幕,弹幕,封面... 在命令中加入可选参数`--subtitle`(字幕) `--dm`(弹幕) `--image`(封面),即可下载这些附属文件 ```shell bilix get_series 'url' --subtitle --dm --image ``` ================================================ FILE: examples/a_very_simple_example.py ================================================ """ 使用bilix在python中最简单的实践🤖 The simplest practice of using bilix in python """ import asyncio # 导入下载器,里面有很多方法,例如get_series, get_video, get_favour,get_dm等等,总能找到符合你需求的 # downloader with many method like get_series, get_video... from bilix.sites.bilibili import DownloaderBilibili async def main(): # 你可以使用with上下文管理器来开启和关闭一个下载器 # you can use with to open and close a downloader async with DownloaderBilibili() as d: # 然后用await等待下载完成 # and use await to download await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") async def main2(): d = DownloaderBilibili() await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") # 或者,手动关闭,一样很简单 # or you can call aclose() manually await d.aclose() if __name__ == '__main__': asyncio.run(main()) ================================================ FILE: examples/download_by_timerange.py ================================================ """ 你可以只下视频的一小段 You can download just a small clip of the video """ import asyncio from bilix.sites.bilibili import DownloaderBilibili async def main(): """download the 《嘉然我真的好喜欢你啊😭😭😭.mp4》 by timerange🤣""" async with DownloaderBilibili() as d: # time_range (start_time, end_time) await d.get_video('https://www.bilibili.com/video/BV1kK4y1A7tN', time_range=(0, 7)) if __name__ == '__main__': asyncio.run(main()) ================================================ FILE: examples/limit_download_rate.py ================================================ """ 限制下载速度很简单 limit download rate is simple """ import asyncio from bilix.sites.bilibili import DownloaderBilibili from bilix.sites.cctv import DownloaderCctv async def main(): async with DownloaderBilibili(speed_limit=1e6) as d: # limit to 1MB/s await d.get_series("https://www.bilibili.com/video/BV1jK4y1N7ST?p=5") async def main2(): # 就像并发控制一样,每个downloader的速度设置也是独立的 # Like concurrency control, the speed settings of each downloader are independent async with DownloaderBilibili(speed_limit=1e6) as bili_d, DownloaderCctv(speed_limit=3e6) as cctv_d: await asyncio.gather( bili_d.get_series('https://www.bilibili.com/video/BV1cd4y1Z7EG'), cctv_d.get_series('https://www.douyin.com/video/7132430286415252773') ) if __name__ == '__main__': asyncio.run(main()) ================================================ FILE: examples/multi_site_download_same_time.py ================================================ """ 你可以同时初始化不同网站的下载器,并且利用他们方法返回的协程对象进行并发下载。 各个下载器之间的并发控制是独立的,因此可以最大化利用自己的网络资源。 You can initialize the downloaders of different websites at the same time, and use the coroutine objects returned by their methods to download concurrently. The concurrency control between each downloader is independent, so you can maximize the use of your network resources. """ import asyncio from bilix.sites.bilibili import DownloaderBilibili from bilix.sites.douyin import DownloaderDouyin from bilix.sites.cctv import DownloaderCctv async def main(): async with DownloaderBilibili() as d_bl, DownloaderDouyin() as d_dy, DownloaderCctv() as d_tv: await asyncio.gather( d_bl.get_video('https://www.bilibili.com/video/BV1cd4y1Z7EG', quality=999), d_dy.get_video('https://www.douyin.com/video/7132430286415252773'), d_tv.get_video('https://tv.cctv.com/2012/05/02/VIDE1355968282695723.shtml', quality=999) ) if __name__ == '__main__': asyncio.run(main()) ================================================ FILE: examples/multi_type_tasks.py ================================================ """ 你可以组合下载器返回的协程对象,利用gather并发执行他们,他们执行的并发度收到下载器对象的严格约束,因此不会对服务器造成意想不到的负担。 You can combine coroutine objects returned by the downloader and use gather to execute them concurrently. The concurrency is strictly constrained by the downloader object, so it will not cause unexpected burden on the site server. """ import asyncio from bilix.sites.bilibili import DownloaderBilibili async def main(): d = DownloaderBilibili(video_concurrency=5, part_concurrency=10) cor1 = d.get_series( 'https://www.bilibili.com/bangumi/play/ss28277?spm_id_from=333.337.0.0', quality=999) cor2 = d.get_up(url_or_mid='436482484', quality=999) cor3 = d.get_video('https://www.bilibili.com/bangumi/play/ep477122?from_spmid=666.4.0.0', quality=999) await asyncio.gather(cor1, cor2, cor3) await d.aclose() if __name__ == '__main__': asyncio.run(main()) ================================================ FILE: examples/use_of_api.py ================================================ """ bilix 提供了各个网站的api,如果你有需要当然可以使用,并且它们都是异步的 bilix provides api for various websites. You can use them if you need, and they are asynchronous """ import asyncio from bilix.sites.bilibili import api from httpx import AsyncClient async def main(): # 需要先实例化一个用来进行http请求的client # first we should initialize a http client client = AsyncClient(**api.dft_client_settings) data = await api.get_video_info(client, 'https://www.bilibili.com/bangumi/play/ep90849') print(data) asyncio.run(main()) ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "bilix" dynamic = ["version"] description = "⚡️Lightning-fast asynchronous download tool for bilibili and more" readme = "README.md" license = "Apache-2.0" requires-python = ">=3.8" authors = [ { name = "HFrost0", email = "hhlfrost@gmail.com" }, ] classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] dependencies = [ "aiofiles>=0.8.0", "anyio", "danmakuC>=0.3.5", "bs4", "click>=8.0.3", "httpx[http2]>=0.23.3", "json5", "m3u8>=3.5.0", "pycryptodome", "pydantic>=2.5.3", "rich", "browser_cookie3>=0.17.1", "pymp4>=1.2.0", ] [project.scripts] bilix = "bilix.cli.main:main" [project.urls] Homepage = "https://github.com/HFrost0/bilix" [tool.hatch.version] path = "bilix/__init__.py" [tool.hatch.build.targets.sdist] include = [ "/bilix", ]