Repository: z-mio/parse_hub_bot Branch: main Commit: 252be67dc4ec Files: 29 Total size: 107.9 KB Directory structure: gitextract_bn3fr2tv/ ├── .dockerignore ├── .github/ │ └── workflows/ │ └── docker-image.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── bot.py ├── core/ │ ├── __init__.py │ ├── config.py │ ├── platform_config.py │ └── watchdog.py ├── log.py ├── plugins/ │ ├── __init__.py │ ├── filters.py │ ├── helpers.py │ ├── inline_parse.py │ ├── parse.py │ └── start.py ├── pyproject.toml ├── services/ │ ├── __init__.py │ ├── cache.py │ ├── parser.py │ └── pipeline.py └── utils/ ├── __init__.py ├── converter.py ├── event_loop.py ├── helpers.py ├── media_processing_unit.py └── ph.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ .env .venv data downloads logs ================================================ FILE: .github/workflows/docker-image.yml ================================================ name: Docker Image CI on: release: types: [ published ] jobs: build: runs-on: ubuntu-latest permissions: contents: read packages: write steps: - uses: actions/checkout@v4 - uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: 构建&推送镜像 run: | # 获取release标签版本 VERSION=${GITHUB_REF#refs/tags/} # 构建并推送带版本号的镜像 docker build . --file Dockerfile \ --tag ghcr.io/z-mio/parse_hub_bot:${VERSION} \ --tag ghcr.io/z-mio/parse_hub_bot:latest docker push ghcr.io/z-mio/parse_hub_bot:${VERSION} docker push ghcr.io/z-mio/parse_hub_bot:latest ================================================ FILE: .gitignore ================================================ /.venv /logs /.idea /downloads .env *.session /data ================================================ FILE: Dockerfile ================================================ FROM python:3.12-slim AS build COPY --from=ghcr.io/astral-sh/uv:0.10.11 /uv /uvx /bin/ WORKDIR /app ENV UV_COMPILE_BYTECODE=1 \ UV_LINK_MODE=copy COPY pyproject.toml uv.lock ./ RUN apt-get update && apt-get install -y --no-install-recommends \ gcc python3-dev \ && rm -rf /var/lib/apt/lists/* RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --no-install-project --frozen COPY . . RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --frozen FROM python:3.12-slim AS runtime RUN apt-get update && apt-get install -y --no-install-recommends \ libglib2.0-0 \ ffmpeg \ media-types \ curl unzip ca-certificates \ && curl -fsSL https://deno.land/install.sh | sh \ && rm -rf /var/lib/apt/lists/* ENV DENO_INSTALL="/root/.deno" ENV PATH="/app/.venv/bin:$DENO_INSTALL/bin:$PATH" WORKDIR /app COPY --from=build /app /app ENV PATH="/app/.venv/bin:$PATH" CMD ["python", "bot.py"] ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2024 梓澪 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================
# 🔗 ParseHubBot **Telegram 多平台聚合解析机器人**

License Python Telegram Bot uv

[**🤖 实例演示**](https://t.me/ParseHubot) · [**📚 相关项目**](https://github.com/z-mio/ParseHub) · [**🐛 问题反馈**](https://github.com/z-mio/Parse_Hub_Bot/issues)
--- > 官方实例:[@ParseHubot](https://t.me/ParseHubot) ## ✨ 功能特性 - 🎬 **多平台解析** — 抖音、B站、YouTube、小红书、Twitter 等 16+ 主流平台一站搞定 - ⚡ **内联模式** — 在任意聊天窗口输入 `@BotUsername <链接>` 即可解析 - 🖼️ **Tg 兼容** — 自动转码、长图切割、大视频分段 - 📦 **多种模式** — 在线预览, 原始文件, 打包下载 - 🐳 **Docker 部署** — 开箱即用 ## 📦 支持平台一览 | 平台 | 视频 | 图文 | 其他 | |:----------------|:--:|:--:|:-----:| | **Twitter / X** | ✅ | ✅ | 📝 文章 | | **Instagram** | ✅ | ✅ | | | **YouTube** | ✅ | | 🎵 音乐 | | **Facebook** | ✅ | | | | **Threads** | ✅ | ✅ | | | **Bilibili** | ✅ | | 📝 动态 | | **抖音** | ✅ | ✅ | | | **TikTok** | ✅ | ✅ | | | **微博** | ✅ | ✅ | | | **小红书** | ✅ | ✅ | | | **贴吧** | ✅ | ✅ | | | **微信公众号** | | ✅ | | | **快手** | ✅ | | | | **酷安** | ✅ | ✅ | | | **皮皮虾** | ✅ | ✅ | | | **最右** | ✅ | ✅ | | | **小黑盒** | ✅ | ✅ | | > 🔧 更多平台持续接入中... ## 🚀 快速开始 ### 🐳 Docker 运行 (推荐) ```bash mkdir parse_hub_bot && cd parse_hub_bot docker run -d \ --restart=always \ -e API_ID=你的API_ID \ -e API_HASH=你的API_HASH \ -e BOT_TOKEN=你的BOT_TOKEN \ -v ./logs:/app/logs \ -v ./data:/app/data \ --name parse-hub-bot \ ghcr.io/z-mio/parse_hub_bot:latest ``` ### 💻 源码运行 ```bash uv sync uv run bot.py ``` --- ## ⚙️ 配置说明 - **环境变量:** 基础配置 - **平台配置 (可选):** 平台代理和 Cookie ### 📝 环境变量 ```dotenv # ✅ 必填 API_ID= # Telegram API ID,登录 https://my.telegram.org 获取 API_HASH= # Telegram API Hash,同上获取 BOT_TOKEN= # 机器人 Token,向 @BotFather 申请 # 🔲 可选 BOT_PROXY= # Bot 连接 TG 使用的代理,例:http://127.0.0.1:7890 ``` ### 🌐 平台配置 用于为各解析平台单独配置**代理**和 **Cookie**,位于 `data/config/platform_config.yaml` ```yaml # ═══════════════════════ 全局默认代理 ═══════════════════════ # 当某平台未单独配置代理时,会使用全局默认代理 # 支持填写单个地址(字符串)或多个地址(列表,随机选取) default_parser_proxies: http://127.0.0.1:7890 # 解析代理(单个) default_downloader_proxies: # 下载代理(代理池) - http://127.0.0.1:7890 - http://127.0.0.1:7891 # ═══════════════════════ 平台独立配置 ═══════════════════════ platforms: : # 平台 ID,见下方支持列表 disable_parser_proxy: false # 是否禁用解析代理(直连) disable_downloader_proxy: false # 是否禁用下载代理(直连) parser_proxies: # 该平台专用解析代理池 - http://proxy1:port downloader_proxies: # 该平台专用下载代理池 - http://proxy2:port cookies: # 该平台 Cookie 列表(随机选取) - "cookie_string_1" - "cookie_string_2" ``` ### 🔀 代理优先级 解析代理和下载代理各自遵循相同的优先级逻辑: ``` 禁用代理 (disable_*_proxy: true) ↓ 未禁用 平台专用代理 (parser_proxies / downloader_proxies) ↓ 未配置 全局默认代理 (default_parser_proxies / default_downloader_proxies) ↓ 未配置 直连(不使用代理) ``` > 💡 当代理池中有多个地址时,每次请求会**随机选取**一个 ### 🔑 支持的平台 ID `` 必须是以下合法的平台 ID: | 平台 ID | 对应平台 | |:------------|:------------| | `twitter` | Twitter / X | | `instagram` | Instagram | | `youtube` | YouTube | | `facebook` | Facebook | | `threads` | Threads | | `bilibili` | 哔哩哔哩 | | `douyin` | 抖音 | | `tiktok` | TikTok | | `weibo` | 微博 | | `xhs` | 小红书 | | `tieba` | 百度贴吧 | | `wechat` | 微信公众号 | | `kuaishou` | 快手 | | `coolapk` | 酷安 | | `pipixia` | 皮皮虾 | | `zuiyou` | 最右 | | `xiaoheihe` | 小黑盒 | ### 🍪 支持 Cookie 的平台 - `Twitter / X` - `Instagram` - `YouTube` - `Bilibili` - `抖音` - `TikTok` - `快手` - `小红书` ### 📌 配置示例 ##### 示例 1:国内平台直连,海外平台走代理 ```yaml default_parser_proxies: http://127.0.0.1:7890 default_downloader_proxies: http://127.0.0.1:7890 platforms: bilibili: disable_parser_proxy: true disable_downloader_proxy: true douyin: disable_parser_proxy: true disable_downloader_proxy: true xhs: disable_parser_proxy: true disable_downloader_proxy: true ``` #### 示例 2:Twitter 配置 Cookie + 使用全局代理 ```yaml default_parser_proxies: http://127.0.0.1:7890 default_downloader_proxies: http://127.0.0.1:7890 platforms: twitter: cookies: - "auth_token=your_token_here; ct0=your_ct0_here" ``` #### 示例 3:YouTube 使用独立代理池 ```yaml platforms: youtube: parser_proxies: - http://proxy-us-1:8080 - http://proxy-us-2:8080 - http://proxy-eu-1:8080 downloader_proxies: - http://proxy-us-1:8080 - http://proxy-eu-1:8080 ``` #### 示例 4:B站指定 Cookie 轮换 + 解析直连 + 下载走代理 ```yaml platforms: bilibili: disable_parser_proxy: true downloader_proxies: - http://127.0.0.1:7890 cookies: - "SESSDATA=xxx; bili_jct=xxx; buvid3=xxx" - "SESSDATA=yyy; bili_jct=yyy; buvid3=yyy" ``` ## 🌟 Star History [![Star History Chart](https://api.star-history.com/svg?repos=z-mio/Parse_Hub_Bot&type=Date)](https://star-history.com/#z-mio/Parse_Hub_Bot&Date) ## 🤝 参与贡献 欢迎提交 Pull Request 或 Issue! - 核心解析相关请前往 [ParseHub](https://github.com/z-mio/ParseHub)。 - Bug 反馈请附上相关 URL 和日志信息。 ## 📄 开源协议 本项目基于 [MIT License](LICENSE) 协议开源。 ---
**如果这个项目对你有帮助,欢迎点个 ⭐ Star!**
================================================ FILE: bot.py ================================================ import asyncio import shutil from typing import Any import pillow_heif from pyrogram import Client from pyrogram.handlers import ConnectHandler, DisconnectHandler from pyrogram.types import BotCommand from core import bs, on_connect, on_disconnect, ws from log import logger, setup_logging from services import parse_cache, persistent_cache from utils.event_loop import setup_optimized_event_loop pillow_heif.register_heif_opener() setup_logging(debug=bs.debug) loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) setup_optimized_event_loop() class Bot(Client): def __init__(self) -> None: self.cfg = bs super().__init__( f"{self.cfg.bot_token.split(':')[0]}_bot", api_id=self.cfg.api_id, api_hash=self.cfg.api_hash, bot_token=self.cfg.bot_token, plugins={"root": "plugins"}, proxy=self.cfg.bot_proxy, loop=loop, workdir=self.cfg.sessions_path, ) async def start(self, *args: Any, **kwargs: Any) -> "Bot": self.init_watchdog() parse_cache.start_cleanup() persistent_cache.start_cleanup() await super().start() await self.set_menu() return self async def stop(self, *args: Any, **kwargs: Any) -> None: ws.exit_flag = True await persistent_cache.close() await super().stop() # 结束时清理下载残留 if self.cfg.download_dir.exists(): shutil.rmtree(self.cfg.download_dir) def init_watchdog(self) -> None: self.add_handler(ConnectHandler(on_connect)) self.add_handler(DisconnectHandler(on_disconnect)) async def set_menu(self) -> None: commands = { "start": "开始", "jx": "解析", "raw": "不处理媒体, 发送原始文件", "zip": "不处理媒体, 保存解析结果, 发送压缩包", } await self.set_bot_commands([BotCommand(command=k, description=v) for k, v in commands.items()]) logger.debug(f"菜单已设置: {commands}") if __name__ == "__main__": bot = Bot() bot.run() ================================================ FILE: core/__init__.py ================================================ from .config import bs, ws from .platform_config import pl_cfg from .watchdog import on_connect, on_disconnect __all__ = [ "bs", "ws", "pl_cfg", "on_connect", "on_disconnect", ] ================================================ FILE: core/config.py ================================================ import os from pathlib import Path from typing import Any from urllib.parse import urlparse from dotenv import load_dotenv from pydantic import Field, field_validator, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict load_dotenv() class BotSettings(BaseSettings): model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", extra="ignore", ) bot_token: str = Field(...) api_id: str = Field(...) api_hash: str = Field(...) bot_proxy: dict | None = Field(default=None) data_path: Path = Path("data") cache_time: int = Field(default=14 * 24 * 60, ge=0, description="缓存时间, 单位分钟, 0 为禁用") cache_max_entries: int = Field(default=30000, ge=0, description="缓存最大条数, 0 为不限制") cache_save_interval: int = Field(default=5, gt=0, description="缓存保存间隔, 单位分钟") cache_cleanup_interval: int = Field(default=60, gt=0, description="缓存过期清理间隔, 单位分钟") download_dir: Path = Path("downloads") debug: bool = Field(default=False) debug_skip_cleanup: bool = Field(default=False, description="跳过资源清理") @model_validator(mode="after") def cache_config_validate(self) -> "BotSettings": if self.cache_time and self.cache_cleanup_interval > self.cache_time: raise ValueError("CACHE_CLEANUP_INTERVAL 不能大于 CACHE_TIME") return self def model_post_init(self, __context: Any) -> None: """模型初始化后的操作""" self.sessions_path.mkdir(parents=True, exist_ok=True) self.cache_path.mkdir(parents=True, exist_ok=True) self.config_path.mkdir(parents=True, exist_ok=True) @property def sessions_path(self) -> Path: return self.data_path / "sessions" @property def cache_path(self) -> Path: return self.data_path / "cache" @property def config_path(self) -> Path: return self.data_path / "config" @field_validator("bot_proxy", mode="before") @classmethod def proxy_config(cls, v: str | None = None) -> dict | None: url = urlparse(v) if v else None if not url: return None return { "scheme": url.scheme, "hostname": url.hostname, "port": url.port, "username": url.username, "password": url.password, } @property def bot_session_name(self) -> str: return f"bot_{self.bot_token.split(':')[0]}" @field_validator("data_path", mode="before") @classmethod def data_path_init(cls, v: str | Path) -> Path: p = Path(v) if isinstance(v, str) else v p.mkdir(exist_ok=True, parents=True) return p class WatchdogSettings(BaseSettings): model_config = SettingsConfigDict( env_file=None, extra="ignore", env_prefix="WD_", ) is_running: bool = Field(default=False) """运行中""" restart_count: int = Field(default=0) """重启次数""" disconnect_count: int = Field(default=0) """断开连接次数""" max_disconnect_count: int = Field(default=3) """最大断开连接次数, 超过后重启""" remove_session_after_restart: int = Field(default=3) """重启失败几次后删除会话文件""" max_restart_count: int = Field(default=6) """意外断开连接时,最大重启次数""" exit_flag: bool = Field(default=False) """退出标志""" def update_bot_restart_count(self) -> None: self.restart_count += 1 os.environ["WD_RESTART_COUNT"] = str(self.restart_count) def reset_bot_restart_count(self) -> None: self.restart_count = 0 os.environ["WD_RESTART_COUNT"] = "0" def update_bot_disconnect_count(self) -> None: self.disconnect_count += 1 os.environ["WD_DISCONNECT_COUNT"] = str(self.disconnect_count) def reset_bot_disconnect_count(self) -> None: self.disconnect_count = 0 os.environ["WD_DISCONNECT_COUNT"] = "0" bs = BotSettings() # type: ignore[call-arg] ws = WatchdogSettings() ================================================ FILE: core/platform_config.py ================================================ import random from pathlib import Path from parsehub.types import Platform as PPlatform from pydantic import BaseModel, ConfigDict, HttpUrl from yaml import safe_load from log import logger from .config import bs logger = logger.bind(name="PlatformConfig") class Platform(BaseModel): model_config = ConfigDict(extra="forbid") disable_parser_proxy: bool = False disable_downloader_proxy: bool = False parser_proxies: list[HttpUrl] | None = None downloader_proxies: list[HttpUrl] | None = None cookies: list[str] | None = None def roll_cookie(self) -> str | None: if not self.cookies: return None return random.choice(self.cookies) def roll_parser_proxy(self) -> str | None: if not self.parser_proxies: return None return str(random.choice(self.parser_proxies)) def roll_downloader_proxy(self) -> str | None: if not self.downloader_proxies: return None return str(random.choice(self.downloader_proxies)) class PlatformsConfig(BaseModel): model_config = ConfigDict(extra="forbid") default_parser_proxies: list[HttpUrl] | None = None default_downloader_proxies: list[HttpUrl] | None = None platforms: dict[str, Platform] = {} @classmethod def load_config(cls, file: Path) -> "PlatformsConfig": if not file.exists(): logger.info("未找到 platform_config.yaml, 跳过加载") return cls() with open(file, encoding="utf-8") as f: data = safe_load(f) if not data: logger.info("platform_config.yaml 为空, 跳过加载") return cls() platforms = {} if data.get("platforms"): pid_list = [p.id for p in PPlatform] for name, pdata in data["platforms"].items(): if name not in pid_list: logger.error(f"平台 [{name}] 不存在, 支持的平台id: {pid_list}") exit(1) if not pdata: continue try: platforms[name] = Platform(**pdata) except Exception as e: logger.error(f"平台 [{name}] 配置错误:\n{e}") raise SystemExit(1) from e pc = cls( default_parser_proxies=cls._2l(data.get("default_parser_proxies", None)), default_downloader_proxies=cls._2l(data.get("default_downloader_proxies", None)), platforms=platforms, ) logger.debug(f"已载入平台配置: {pc.model_dump_json(indent=4)}") return pc @staticmethod def _2l[T](v: T | list[T] | None) -> list[T] | None: if v is None: return None if isinstance(v, list): return v return [v] def get(self, platform_id: str) -> Platform | None: return self.platforms.get(platform_id) def roll_cookie(self, platform_id: str) -> str | None: if not (pc := self.get(platform_id)): return None return pc.roll_cookie() def roll_parser_proxy(self, platform_id: str) -> str | None: if not (pc := self.get(platform_id)): pc = Platform() if pc.disable_parser_proxy: return None if platform_proxy := pc.roll_parser_proxy(): return platform_proxy if self.default_parser_proxies: return str(random.choice(self.default_parser_proxies)) return None def roll_downloader_proxy(self, platform_id: str) -> str | None: if not (pc := self.get(platform_id)): pc = Platform() if pc.disable_downloader_proxy: return None if platform_proxy := pc.roll_downloader_proxy(): return platform_proxy if self.default_downloader_proxies: return str(random.choice(self.default_downloader_proxies)) return None pl_cfg = PlatformsConfig.load_config(bs.config_path / "platform_config.yaml") ================================================ FILE: core/watchdog.py ================================================ import asyncio import os import sys from pyrogram import Client from pyrogram.session import Session from core.config import bs, ws from log import logger logger = logger.bind(name="Watchdog") async def reset_count_task() -> None: """重置重启次数任务""" if ws.restart_count: logger.info(f"第 {ws.restart_count} 次重启成功, 稳定运行 10 分钟后重置重启次数") elif ws.disconnect_count: logger.info("Bot 重连成功, 稳定运行 10 分钟后重置断开连接次数") await asyncio.sleep(600) ws.reset_bot_disconnect_count() ws.reset_bot_restart_count() logger.info("已稳定运行 10 分钟, 次数已重置") async def on_connect(_: Client, session: Session) -> None: """Bot 连接成功回调函数""" if session.is_media: return ws.is_running = True logger.success("Bot 开始运行...") if ws.restart_count or ws.disconnect_count: asyncio.create_task(reset_count_task()) async def on_disconnect(cli: Client, session: Session) -> None: """Bot 断开连接回调函数""" if session.is_media: return if ws.exit_flag: ws.is_running = False # 正常退出 if ws.exit_flag and not ws.is_running: logger.info("Bot 已结束运行") return # 启动失败 if not ws.is_running and not ws.restart_count: exit("Bot 连接失败, 请检查设备网络和代理配置") # 断开连接 if ws.restart_count >= ws.max_restart_count: exit(f"重启次数已达上限 ({ws.max_restart_count} 次), 结束进程") if ws.disconnect_count < ws.max_disconnect_count: ws.update_bot_disconnect_count() logger.warning(f"Bot 已断开连接... | {ws.disconnect_count}/{ws.max_disconnect_count}") return if bs.debug: exit("Bot 已断开连接, 目前处于调试模式, 已跳过重启") try: ws.update_bot_restart_count() logger.warning(f"Bot 已断开连接, 尝试重启... | {ws.restart_count}/{ws.max_restart_count}") if ws.restart_count == ws.remove_session_after_restart and not cli.in_memory: await remove_session_file(cli) python = sys.executable os.execv(python, [python] + sys.argv) except Exception as e: logger.exception(e) exit("重启失败, 结束进程, 以上为错误信息") async def remove_session_file(cli: Client) -> None: """删除会话文件""" logger.warning("尝试删除会话文件...") try: if cli.session is not None: await cli.session.stop() await cli.storage.close() if (session := cli.workdir / f"{cli.name}.session") and session.exists(): os.remove(session) logger.warning(f"会话文件已移除: {session}") except Exception as e: logger.error(f"移除会话文件失败: {e}") ================================================ FILE: log.py ================================================ import inspect import logging import sys from typing import TYPE_CHECKING, Any import loguru if TYPE_CHECKING: from loguru import Logger logger: "Logger" = loguru.logger.bind(name="Main") def formatter(record: Any) -> str: rid = record["extra"].get("req_id") if rid: return ( "{time:HH:mm:ss} | " "{level: <8} | " "{name}:{function}:{line} | " "[{extra[name]}][{extra[req_id]}] {message}\n" ) else: return ( "{time:HH:mm:ss} | " "{level: <8} | " "{name}:{function}:{line} | " "[{extra[name]}] {message}\n" ) def setup_logging(debug: bool = False) -> None: logger.remove() level = "DEBUG" if debug else "INFO" logger.add(sys.stderr, level=level, format=formatter) logger.add( "logs/bot.log", rotation="10 MB", level="INFO", format=formatter, enqueue=True, ) if debug: logger.debug("调试模式已启用") class InterceptHandler(logging.Handler): def emit(self, record: logging.LogRecord) -> None: try: level: str | int = logger.level(record.levelname).name except ValueError: level = record.levelno frame, depth = inspect.currentframe(), 0 while frame: filename = frame.f_code.co_filename is_logging = filename == logging.__file__ is_frozen = "importlib" in filename and "_bootstrap" in filename if depth > 0 and not (is_logging or is_frozen): break frame = frame.f_back depth += 1 logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage()) logging.basicConfig(handlers=[InterceptHandler()], level="ERROR", force=True) ================================================ FILE: plugins/__init__.py ================================================ ================================================ FILE: plugins/filters.py ================================================ from typing import Any from pyrogram import filters from pyrogram.types import InlineQuery, Message from services import ParseService async def _platform_filter(_: Any, __: Any, update: Message | InlineQuery) -> bool: t: str | None = None match update: case Message(): t = update.caption or update.text case InlineQuery(): t = update.query try: return bool(t and ParseService().parser.get_platform(t)) except Exception: return False platform_filter = filters.create(_platform_filter) ================================================ FILE: plugins/helpers.py ================================================ """plugins 共用的工具函数和数据类""" from dataclasses import dataclass from pathlib import Path from markdown import markdown from parsehub import ParseHub, Platform from parsehub.types import AnyMediaFile, AnyParseResult, DownloadResult, RichTextParseResult from parsehub.utils.media_info import MediaInfoReader from pyrogram import Client from log import logger from utils.converter import clean_article_html from utils.helpers import to_list from utils.media_processing_unit import MediaProcessingUnit from utils.ph import Telegraph logger = logger.bind(name="Helpers") @dataclass class ProcessedMedia: source: AnyMediaFile output_paths: list[Path] | None = None output_dir: Path | None = None def resolve_media_info(processed: "ProcessedMedia", file_path: str) -> tuple[int, int, int]: """获取媒体的宽、高、时长。若经过转码则从文件读取,否则使用源信息。""" if processed.output_paths: info = MediaInfoReader.read(file_path) return info.width, info.height, info.duration return processed.source.width, processed.source.height, getattr(processed.source, "duration", 0) def build_caption(parse_result: AnyParseResult, telegraph_url: str | None = None) -> str: return build_caption_by_str(parse_result.title, parse_result.content, parse_result.raw_url, telegraph_url) def build_caption_by_str(title: str | None, content: str | None, raw_url: str, telegraph_url: str | None = None) -> str: """构建消息正文:标题 + 内容 + 来源链接""" title, content = title or "", content or "" if telegraph_url: label = (title or content[:15]).replace("\n", " ") or "无标题" body = f"**[{label}]({telegraph_url})**" else: parts = [] if title: parts.append(f"**{title}**") if content: parts.append(content) body = format_text("\n\n".join(parts) or "**无标题**") return f"{body}\n\nSource" def format_text(text: str) -> str: """格式化输出内容, 限制长度, 添加折叠块样式""" text = text.strip() if len(text) > 500 or len(text.splitlines()) > 10: if len(text) > 1000: text = text[:900] + "......" return f"
{text}
" else: return text def progress(current: int, total: int, unit: str) -> str | None: if unit == "bytes": if total <= 0: return None text = f"下 载 中... | {current * 100 / total:.0f}%" if round(current * 100 / total, 1) % 25 == 0: return text else: text = f"下 载 中... | {current}/{total}" if (current + 1) % 3 == 0 or (current + 1) == total: return text return None async def create_telegraph_page(html_content: str, cli: Client, parse_result: AnyParseResult) -> str: """创建 Telegraph 页面,返回页面 URL""" logger.debug(f"创建 Telegraph 页面: title={parse_result.title}") me = await cli.get_me() page = await Telegraph().create_page( parse_result.title or "无标题", html_content=html_content, author_name=me.full_name, author_url=parse_result.raw_url, ) logger.debug(f"Telegraph 页面已创建: {page.url}") return page.url async def create_richtext_telegraph(cli: Client, parse_result: RichTextParseResult) -> str: """将富文本解析结果转换为 Telegraph 页面,返回页面 URL""" logger.debug(f"富文本转 Telegraph: platform={parse_result.platform}, md_len={len(parse_result.markdown_content)}") md = parse_result.markdown_content match parse_result.platform: case Platform.WEIXIN: md = md.replace("mmbiz.qpic.cn", "qpic.cn.in/mmbiz.qpic.cn") case Platform.COOLAPK: md = md.replace("image.coolapk.com", "qpic.cn.in/image.coolapk.com") html = clean_article_html(markdown(md)) return await create_telegraph_page(html, cli, parse_result) async def process_media_files(download_result: DownloadResult) -> list[ProcessedMedia]: """对下载结果中的媒体文件进行格式转换,返回 ProcessedMedia 列表""" processed_dir = download_result.output_dir.joinpath("processed") processor = MediaProcessingUnit(processed_dir, segment_height=1920, logger=logger.bind(name="MediaProcessor").debug) media_files = to_list(download_result.media) logger.debug(f"开始媒体格式转换: 文件数={len(media_files)}, output_dir={processed_dir}") processed_list: list[ProcessedMedia] = [] for media_file in media_files: # 对于实况图片只处理图片, 不处理视频 logger.debug(f"处理文件: {media_file.path}") result = await processor.process(media_file.path) logger.debug(f"处理结果: output_paths={result.output_paths}") processed_list.append(ProcessedMedia(media_file, result.output_paths, result.temp_dir)) logger.debug(f"媒体格式转换完成: 处理数={len(processed_list)}") return processed_list def get_supported_platforms() -> str: text: list[str] = [] for i in ParseHub().get_platforms(): text.append(f"**{i['name']}** __({'__, __'.join(i['supported_types'])})__") text.sort(reverse=True) return "\n".join(text) def build_start_text() -> str: return ( f"**发送分享链接以进行解析**\n\n" f"**支持的平台:**\n" f"
{get_supported_platforms()}
\n\n" f"**命令列表:**\n" f"`/jx <链接>` - 解析并发送媒体\n" f"`/raw <链接>` - 不处理媒体, 发送原始文件\n" f"`/zip <链接>` - 不处理媒体, 保存解析结果, 发送压缩包\n\n" f"**开源地址: [GitHub](https://github.com/z-mio/parse_hub_bot)**" ) ================================================ FILE: plugins/inline_parse.py ================================================ import asyncio from parsehub import AnyParseResult from parsehub.types import ( AniRef, ImageRef, PostType, VideoRef, ) from pyrogram import Client from pyrogram.errors import FloodWait from pyrogram.types import ( ChosenInlineResult, InlineQuery, InlineQueryResult, InlineQueryResultAnimation, InlineQueryResultArticle, InlineQueryResultCachedAnimation, InlineQueryResultCachedDocument, InlineQueryResultCachedPhoto, InlineQueryResultCachedVideo, InlineQueryResultPhoto, InlineQueryResultVideo, InputMediaVideo, InputTextMessageContent, LinkPreviewOptions, ) from pyrogram.types import ( InlineKeyboardButton as Ikb, ) from pyrogram.types import ( InlineKeyboardMarkup as Ikm, ) from log import logger from plugins.filters import platform_filter from plugins.helpers import ( build_caption, build_caption_by_str, build_start_text, create_richtext_telegraph, resolve_media_info, ) from services import ParseService from services.cache import CacheEntry, CacheMediaType, parse_cache, persistent_cache from services.pipeline import ParsePipeline, StatusReporter from utils.helpers import to_list, with_request_id logger = logger.bind(name="InlineParse") DEFAULT_THUMB_URL = "https://telegra.ph/file/cdfdb65b83a4b7b2b6078.png" class InlineStatusReporter(StatusReporter): """基于 inline_message_id 的状态报告器""" def __init__(self, cli: Client, inline_message_id: str, caption: str = ""): self._cli = cli self._mid = inline_message_id self._caption = caption self._last_text: str | None = None async def report(self, text: str) -> None: text = f"**▎{text}**" full = f"{self._caption}\n{text}" if self._caption else text if full == self._last_text: return self._last_text = full try: await self._cli.edit_inline_text(self._mid, full) except FloodWait: pass async def report_error(self, stage: str, error: Exception) -> None: await self._cli.edit_inline_text( self._mid, f"**▎{stage}错误:** \n```\n{error}```", link_preview_options=LinkPreviewOptions(is_disabled=True), ) async def fn() -> None: await asyncio.sleep(15) await self._cli.edit_inline_text( self._mid, self._caption, link_preview_options=LinkPreviewOptions(is_disabled=True), ) loop = asyncio.get_running_loop() loop.create_task(fn()) async def dismiss(self) -> None: pass def build_cached_inline_results(entry: CacheEntry, raw_url: str) -> list[InlineQueryResult]: """有 file_id 缓存时,构建 cached 类型的 inline 结果(Telegram 服务端直发)""" if entry.parse_result is None: return [] content = entry.parse_result.content caption = build_caption_by_str(entry.parse_result.title, content, raw_url, entry.telegraph_url) title = entry.parse_result.title or "无标题" # 富文本 if entry.telegraph_url: return [ InlineQueryResultArticle( title=title, input_message_content=InputTextMessageContent( caption, link_preview_options=LinkPreviewOptions(show_above_text=True), ), ) ] results: list[InlineQueryResult] = [] if not entry.media: results.append( InlineQueryResultArticle( title=title, description=content, input_message_content=InputTextMessageContent( caption, link_preview_options=LinkPreviewOptions(is_disabled=True), ), ) ) return results for m in entry.media: match m.type: case CacheMediaType.PHOTO: results.append( InlineQueryResultCachedPhoto( photo_file_id=m.file_id, title=title, caption=caption, description=content, ) ) case CacheMediaType.VIDEO: results.append( InlineQueryResultCachedVideo( video_file_id=m.file_id, title=title, caption=caption, description=content, ) ) case CacheMediaType.ANIMATION: results.append( InlineQueryResultCachedAnimation( animation_file_id=m.file_id, title=title, caption=caption, ) ) case CacheMediaType.DOCUMENT: results.append( InlineQueryResultCachedDocument( document_file_id=m.file_id, title=title, caption=caption, description=content, ) ) return results async def build_inline_results(parse_result: AnyParseResult, cli: Client) -> list[InlineQueryResult]: """根据解析结果构建内联查询结果列表""" logger.debug(f"构建 inline 结果: type={parse_result.type}, title={parse_result.title}") title = parse_result.title or "无标题" media_list = to_list(parse_result.media) reply_markup = Ikm([[Ikb("原链接", url=parse_result.raw_url)]]) results: list[InlineQueryResult] = [] # ── 富文本直接 telegraph 发送 ── if parse_result.type == PostType.RICHTEXT: url = await create_richtext_telegraph(cli, parse_result) caption = build_caption(parse_result, url) results.append( InlineQueryResultArticle( title=title, description=parse_result.content, input_message_content=InputTextMessageContent( caption, link_preview_options=LinkPreviewOptions(show_above_text=True), ), ) ) return results caption = build_caption(parse_result) if not media_list: results.append( InlineQueryResultArticle( title=title, description=parse_result.content, input_message_content=InputTextMessageContent( caption, link_preview_options=LinkPreviewOptions(is_disabled=True), ), ) ) return results for index, media_ref in enumerate(media_list): if isinstance(media_ref, ImageRef): results.append( InlineQueryResultPhoto( media_ref.url, thumb_url=media_ref.thumb_url, photo_width=media_ref.width, photo_height=media_ref.height, caption=caption, title=title, description=parse_result.content, ) ) elif isinstance(media_ref, VideoRef): results.append( InlineQueryResultPhoto( media_ref.thumb_url or DEFAULT_THUMB_URL, photo_width=media_ref.width, photo_height=media_ref.height, id=f"download_{index}", title=caption, caption=caption, reply_markup=reply_markup, ) ) elif isinstance(media_ref, AniRef): if media_ref.ext != "gif": results.append( InlineQueryResultVideo( media_ref.url, media_ref.thumb_url or DEFAULT_THUMB_URL, caption=caption, title=title, description=parse_result.content, ) ) else: results.append( InlineQueryResultAnimation( media_ref.url, thumb_url=media_ref.thumb_url, caption=caption, title=title, description=parse_result.content, ) ) logger.debug(f"inline 结果构建完成: count={len(results)}") return results @Client.on_inline_query(~platform_filter) async def inline_parse_tip(_: Client, inline_query: InlineQuery) -> None: results: list[InlineQueryResult] = [ InlineQueryResultArticle( title="聚合解析", description="请在聊天框输入链接", input_message_content=InputTextMessageContent( build_start_text(), link_preview_options=LinkPreviewOptions(is_disabled=True) ), thumb_url="https://i.imgloc.com/2023/06/15/Vbfazk.png", ) ] await inline_query.answer(results=results, cache_time=1) @Client.on_inline_query(platform_filter) @with_request_id async def call_inline_parse(cli: Client, inline_query: InlineQuery) -> None: logger.info(f"收到内联解析请求: query={inline_query.query}, from_user={inline_query.from_user.id}") raw_url = await ParseService().get_raw_url(inline_query.query) if cached := await persistent_cache.get(raw_url): logger.debug("inline: 缓存命中, 构建 cached 结果") results = build_cached_inline_results(cached, raw_url) await inline_query.answer(results[:50], cache_time=60) return parse_result = await parse_cache.get(raw_url) if parse_result is None: parse_result = await ParseService().parse(inline_query.query) await parse_cache.set(raw_url, parse_result) results = await build_inline_results(parse_result, cli) logger.debug(f"inline 查询完成, 返回 {len(results)} 个结果") await inline_query.answer(results[:50], cache_time=0) @Client.on_chosen_inline_result() @with_request_id async def inline_result_download(cli: Client, chosen_result: ChosenInlineResult) -> None: if not chosen_result.result_id.startswith("download_"): return media_index = int(chosen_result.result_id.split("_")[1]) inline_message_id = chosen_result.inline_message_id if inline_message_id is None: return query = chosen_result.query logger.debug(f"inline 下载触发: media_index={media_index}, query={query}") raw_url = await ParseService().get_raw_url(query) cached_result = await parse_cache.get(raw_url) logger.debug(f"缓存命中: {cached_result is not None}") caption = build_caption(cached_result) if cached_result else "" reporter = InlineStatusReporter(cli, inline_message_id, caption) pipeline = ParsePipeline(query, reporter, parse_result=cached_result, singleflight=False) if (result := await pipeline.run()) is None: return parse_result = result.parse_result caption = build_caption(parse_result) # ── 上传 ── await reporter.report("上 传 中...") processed = result.processed_list[media_index] video_ref = parse_result.media[media_index] if isinstance(parse_result.media, list) else parse_result.media try: file_paths = processed.output_paths or [processed.source.path] file_path_str = str(file_paths[0]) logger.debug(f"inline 上传文件: {file_path_str}") width, height, duration = resolve_media_info(processed, file_path_str) video_cover = str(video_ref.thumb_url) if video_ref and video_ref.thumb_url else None media = ( InputMediaVideo( file_path_str, caption=caption, video_cover=video_cover, duration=duration or 0, width=width or 0, height=height or 0, supports_streaming=True, ) if video_cover else InputMediaVideo( file_path_str, caption=caption, duration=duration or 0, width=width or 0, height=height or 0, supports_streaming=True, ) ) await cli.edit_inline_media(inline_message_id, media=media) except Exception as e: logger.opt(exception=e).debug("详细堆栈") logger.error(f"inline 上传失败: {e}") await reporter.report_error("上传", e) finally: logger.debug("inline 下载任务完成") result.cleanup() ================================================ FILE: plugins/parse.py ================================================ import asyncio import os from collections.abc import Awaitable, Callable from itertools import batched from typing import Any, Literal from parsehub.types import ( AniFile, AnyMediaRef, AnyParseResult, ImageFile, LivePhotoFile, PostType, VideoFile, ) from pyrogram import Client, enums, filters from pyrogram.errors import FloodWait, SlowmodeWait from pyrogram.types import ( InputMediaAnimation, InputMediaDocument, InputMediaPhoto, InputMediaVideo, LinkPreviewOptions, Message, ) from core import bs from log import logger from plugins.filters import platform_filter from plugins.helpers import ( ProcessedMedia, build_caption, build_caption_by_str, create_richtext_telegraph, resolve_media_info, ) from services import ParseService from services.cache import CacheEntry, CacheMedia, CacheMediaType, CacheParseResult, parse_cache, persistent_cache from services.pipeline import ParsePipeline, PipelineResult, StatusReporter from utils.helpers import pack_dir_to_tar_gz, to_list, with_request_id logger = logger.bind(name="Parse") SKIP_DOWNLOAD_THRESHOLD = 0 MAX_RETRIES = 5 async def _send_with_rate_limit[T]( send_coro_fn: Callable[[], Awaitable[T]], ) -> T: """带自动重试的发送包装器。 Args: send_coro_fn: 返回协程的可调用对象(lambda 或函数),每次重试会重新调用 """ for attempt in range(MAX_RETRIES): try: return await send_coro_fn() except (FloodWait, SlowmodeWait) as e: if attempt < MAX_RETRIES - 1: logger.warning(f"{e.ID} 重试 ({attempt + 1}/{MAX_RETRIES}),等待 {e.value}s") await asyncio.sleep(e.value) else: raise e from e raise RuntimeError("发送重试失败") class MessageStatusReporter(StatusReporter): """基于 Telegram Message 的状态报告器""" def __init__(self, user_msg: Message): self._user_msg = user_msg self._msg: Message | None = None async def report(self, text: str) -> None: await self._edit_text(f"**▎{text}**") async def report_error(self, stage: str, error: Exception) -> None: await self._edit_text( f"**▎{stage}错误:** \n```\n{error}```", link_preview_options=LinkPreviewOptions(is_disabled=True), ) async def fn() -> None: await asyncio.sleep(15) if self._msg: await self._msg.delete() loop = asyncio.get_running_loop() loop.create_task(fn()) async def dismiss(self) -> None: if self._msg: await self._msg.delete() async def _edit_text(self, text: str, **kwargs: Any) -> None: try: if self._msg is None: self._msg = await self._user_msg.reply_text(text, **kwargs) else: if self._msg.text != text: await self._msg.edit_text(text, **kwargs) except (FloodWait, SlowmodeWait): pass # ── Handler ────────────────────────────────────────────────────────── @Client.on_message(filters.command(["jx", "raw", "zip"]) | ((filters.text | filters.caption) & platform_filter)) async def jx(cli: Client, msg: Message) -> None: mode = "preview" if msg.command: match msg.command[0]: case "raw": mode = "raw" case "jx": mode = "preview" case "zip": mode = "zip" text = " ".join(msg.command[1:]) if msg.command[1:] else "" if not text and msg.reply_to_message: text = msg.reply_to_message.text or msg.reply_to_message.caption or "" if not text: await msg.reply_text("**▎请加上链接或回复一条消息**") return else: text = msg.text or msg.caption or "" tokens = text.strip().split() urls = list({i for i in tokens if ParseService().parser.get_platform(i)})[:10] if not urls: await msg.reply_text("**▎不支持的平台**") return tasks = [handle_parse(cli, msg, url, mode) for url in urls] await asyncio.gather(*tasks) # ── 主流程 ─────────────────────────────────────────────────────────── @with_request_id async def handle_parse( cli: Client, msg: Message, url: str, mode: Literal["raw", "preview", "zip"] | str = "preview" ) -> None: chat_id = msg.chat.id if msg.chat else None logger.info(f"收到解析请求: url={url}, chat_id={chat_id}, msg_id={msg.id}, mode={mode}") reporter = MessageStatusReporter(msg) match mode: case "raw": use_caching = False skip_media_processing = True singleflight = False save_metadata = False case "zip": use_caching = False skip_media_processing = True singleflight = False save_metadata = True case _: use_caching = True skip_media_processing = False singleflight = True save_metadata = False try: raw_url = await ParseService().get_raw_url(url) except Exception as e: await reporter.report_error("获取原始链接", e) return if use_caching and (cached := await persistent_cache.get(raw_url)): logger.debug("file_id 缓存命中, 直接发送") await _send_cached(msg, cached, raw_url) return cached_parse_result = await parse_cache.get(raw_url) pipeline = ParsePipeline( url, reporter, parse_result=cached_parse_result, singleflight=singleflight, skip_media_processing=skip_media_processing, skip_download_threshold=SKIP_DOWNLOAD_THRESHOLD, save_metadata=save_metadata, ) if (result := await pipeline.run()) is None: if pipeline.waited: logger.debug("Singleflight 等待完成, 重新检查缓存") if cached := await persistent_cache.get(raw_url): await _send_cached(msg, cached, raw_url) else: await handle_parse(cli, msg, url, mode=mode) return else: logger.debug("Pipeline 返回 None, 跳过后续处理") return parse_result = result.parse_result await parse_cache.set(raw_url, parse_result) # ── 富文本 → Telegraph ── if parse_result.type == PostType.RICHTEXT: logger.debug(f"富文本类型, 创建 Telegraph 页面: title={parse_result.title}") try: await msg.reply_chat_action(enums.ChatAction.TYPING) ph_url = await create_richtext_telegraph(cli, parse_result) logger.debug(f"Telegraph 页面创建完成: {ph_url}") caption = build_caption(parse_result, ph_url) await msg.reply_text( caption, link_preview_options=LinkPreviewOptions(show_above_text=True), ) await persistent_cache.set( raw_url, CacheEntry( parse_result=CacheParseResult(title=parse_result.title, content=parse_result.content), telegraph_url=ph_url, ), ) await reporter.dismiss() return finally: pipeline.finish() caption = build_caption(parse_result) if not result.processed_list: logger.debug("无媒体文件, 仅发送文本") await msg.reply_chat_action(enums.ChatAction.TYPING) await msg.reply_text( caption, link_preview_options=LinkPreviewOptions(is_disabled=True), ) cache_entry = CacheEntry(parse_result=CacheParseResult(title=parse_result.title, content=parse_result.content)) await persistent_cache.set(raw_url, cache_entry) await reporter.dismiss() pipeline.finish() return if mode == "raw": await _send_raw(msg, result, reporter) return if mode == "zip": await _send_zip(msg, result, reporter) return # ── 上传媒体 ── logger.debug(f"开始上传媒体: media_count={len(result.processed_list)}") await reporter.report("上 传 中...") try: media_cache_entry = await _send_media(msg, parse_result, result.processed_list, caption) if media_cache_entry: await persistent_cache.set(raw_url, media_cache_entry) await reporter.dismiss() except Exception as e: logger.opt(exception=e).debug("详细堆栈") logger.error(f"上传失败: {e}") await reporter.report_error("上传", e) return finally: result.cleanup() pipeline.finish() # ── 构建 InputMedia ────────────────────────────────────────────────── def _build_input_media( media_refs: list[AnyMediaRef], processed_list: list[ProcessedMedia], ) -> tuple[list[InputMediaPhoto | InputMediaVideo], list[InputMediaAnimation]]: """根据处理结果和媒体引用构建 Telegram InputMedia 列表。 Returns: (photos_videos, animations) 两类媒体列表 """ photos_videos: list[InputMediaPhoto | InputMediaVideo] = [] animations: list[InputMediaAnimation] = [] for media_ref, processed in zip(media_refs, processed_list, strict=False): file_paths = processed.output_paths or [processed.source.path] for file_path in file_paths: file_path_str = str(file_path) width, height, duration = resolve_media_info(processed, file_path_str) match processed.source: case ImageFile(): photos_videos.append(InputMediaPhoto(media=file_path_str)) case AniFile(): animations.append(InputMediaAnimation(media=file_path_str)) case VideoFile(): photos_videos.append( InputMediaVideo( media=file_path_str, video_cover=media_ref.thumb_url, duration=duration, width=width, height=height, supports_streaming=True, ) ) case LivePhotoFile(): photos_videos.append( InputMediaVideo( media=processed.source.video_path, video_cover=file_path_str, duration=duration, width=width, height=height, supports_streaming=True, ) ) return photos_videos, animations # ── 缓存条目构建 ───────────────────────────────────────────────────── def _cache_media_from_message(m: Message) -> CacheMedia | None: """从已发送的 Telegram Message 提取 CacheMedia。""" if m.photo: return CacheMedia(type=CacheMediaType.PHOTO, file_id=m.photo.file_id) if m.video: return CacheMedia( type=CacheMediaType.VIDEO, file_id=m.video.file_id, cover_file_id=m.video.video_cover.file_id if m.video.video_cover else None, ) if m.animation: return CacheMedia(type=CacheMediaType.ANIMATION, file_id=m.animation.file_id) if m.document: return CacheMedia(type=CacheMediaType.DOCUMENT, file_id=m.document.file_id) return None def _make_cache_entry(parse_result: AnyParseResult, media_list: list[CacheMedia]) -> CacheEntry: return CacheEntry( parse_result=CacheParseResult(title=parse_result.title, content=parse_result.content), media=media_list, ) # ── Raw 模式上传 ────────────────────────────────────────────────────── async def _send_raw( msg: Message, result: PipelineResult, reporter: MessageStatusReporter, ) -> None: """Raw 模式:将文件以原始文档形式上传。""" logger.debug("Raw 模式, 直接上传文件") await reporter.report("上 传 中...") try: caption = build_caption(result.parse_result) all_docs: list[InputMediaDocument] = [] livephoto_videos: dict[int, InputMediaDocument] = {} for idx, processed in enumerate(result.processed_list): file_paths = processed.output_paths or [processed.source.path] file_path = file_paths[0] all_docs.append(InputMediaDocument(media=str(file_path))) if isinstance(processed.source, LivePhotoFile): livephoto_videos[idx] = InputMediaDocument(media=str(processed.source.video_path)) if len(all_docs) == 1: await msg.reply_chat_action(enums.ChatAction.UPLOAD_DOCUMENT) sent_msg = await _send_with_rate_limit( lambda: msg.reply_document(all_docs[0].media, caption=caption, force_document=True) ) if livephoto_videos and sent_msg: await _send_with_rate_limit( lambda: sent_msg.reply_document(livephoto_videos[0].media, force_document=True) ) else: msgs: list[Message] = [] for batch in batched(all_docs, 10): await msg.reply_chat_action(enums.ChatAction.UPLOAD_DOCUMENT) # noinspection PyDefaultArgument mg = await _send_with_rate_limit(lambda b=list(batch): msg.reply_media_group(b)) # type: ignore msgs.extend(mg) if livephoto_videos: for idx, media_doc in livephoto_videos.items(): await msg.reply_chat_action(enums.ChatAction.UPLOAD_DOCUMENT) await _send_with_rate_limit( lambda m_=media_doc, idx_=idx: msgs[idx_].reply_document(m_.media, force_document=True) # type: ignore[misc] ) await _send_with_rate_limit( lambda: msg.reply_text( caption, link_preview_options=LinkPreviewOptions(is_disabled=True), ) ) except Exception as e: logger.opt(exception=e).debug("详细堆栈") logger.error(f"Raw 模式上传失败: {e}") await reporter.report_error("上传", e) return finally: result.cleanup() await reporter.dismiss() async def _send_zip( msg: Message, result: PipelineResult, reporter: MessageStatusReporter, ) -> None: logger.debug("Zip 模式, 开始打包") await reporter.report("打 包 中...") try: caption = build_caption(result.parse_result) if result.output_dir is None: raise ValueError("缺少打包目录") pack_path = await asyncio.to_thread(pack_dir_to_tar_gz, result.output_dir) except Exception as e: logger.opt(exception=e).debug("详细堆栈") logger.error(f"打包失败: {e}") await reporter.report_error("打包", Exception("...")) return finally: result.cleanup() await reporter.report("上 传 中...") try: await msg.reply_chat_action(enums.ChatAction.UPLOAD_DOCUMENT) await _send_with_rate_limit(lambda: msg.reply_document(str(pack_path), caption=caption)) except Exception as e: logger.opt(exception=e).debug("详细堆栈") logger.error(f"上传失败: {e}") await reporter.report_error("上传", e) return finally: if not bs.debug_skip_cleanup: logger.debug("清理压缩包") os.remove(pack_path) await reporter.dismiss() # ── 发送媒体 ───────────────────────────────────────────────────────── async def _send_single( msg: Message, photos_videos: list[InputMediaPhoto | InputMediaVideo], animations: list[InputMediaAnimation], caption: str, ) -> list[CacheMedia] | None: """发送单个媒体,返回 CacheMedia 列表。上传失败时降级为 document。 返回 None 表示不缓存 """ media_list: list[CacheMedia] = [] all_media = animations + photos_videos try: sent: Message | None = None if animations: await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) sent = await _send_with_rate_limit(lambda: msg.reply_animation(animations[0].media, caption=caption)) else: single = photos_videos[0] match single: case InputMediaPhoto(): await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) sent = await _send_with_rate_limit(lambda: msg.reply_photo(single.media, caption=caption)) case InputMediaVideo(): await msg.reply_chat_action(enums.ChatAction.UPLOAD_VIDEO) sent = await _send_with_rate_limit( lambda: msg.reply_video( single.media, caption=caption, video_cover=single.video_cover, duration=single.duration, width=single.width, height=single.height, supports_streaming=True, ) ) if sent and (cm := _cache_media_from_message(sent)): media_list.append(cm) except Exception as e: logger.warning(f"上传失败 {e}, 使用兼容模式上传") await msg.reply_chat_action(enums.ChatAction.UPLOAD_DOCUMENT) await _send_with_rate_limit( lambda: msg.reply_document(all_media[0].media, caption=caption, force_document=True) ) return None return media_list async def _send_multi( msg: Message, photos_videos: list[InputMediaPhoto | InputMediaVideo], animations: list[InputMediaAnimation], caption: str, ) -> list[CacheMedia] | None: """发送多个媒体(动图逐条、图片视频分批),返回 CacheMedia 列表。 返回 None 表示不缓存 """ media_list: list[CacheMedia] = [] not_cache = False for ani in animations: await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) caption_ = caption if ani == animations[-1] and not photos_videos else "" try: sent = await _send_with_rate_limit( lambda a=ani, c=caption_: msg.reply_animation( # type: ignore[misc] a.media, caption=c, ) ) except Exception as e: logger.warning(f"上传失败 {e}, 使用兼容模式上传") not_cache = True await msg.reply_chat_action(enums.ChatAction.UPLOAD_DOCUMENT) await _send_with_rate_limit( lambda a=ani, c=caption_: msg.reply_document(a.media, caption=c, force_document=True) # type: ignore[misc] ) else: # 过大的 GIF 会返回 document if sent and sent.document: media_list.append(CacheMedia(type=CacheMediaType.DOCUMENT, file_id=sent.document.file_id)) elif sent and sent.animation: media_list.append(CacheMedia(type=CacheMediaType.ANIMATION, file_id=sent.animation.file_id)) try: for batch in batched(photos_videos, 10): if batch[-1] == photos_videos[-1]: batch[0].caption = caption await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) # noinspection PyDefaultArgument sent_msgs = await _send_with_rate_limit(lambda b=list(batch): msg.reply_media_group(media=b)) # type: ignore[misc] for m in sent_msgs: if cm := _cache_media_from_message(m): media_list.append(cm) except Exception as e: logger.warning(f"上传失败 {e}, 使用兼容模式上传") input_documents: list[InputMediaDocument] = [InputMediaDocument(media=item.media) for item in photos_videos] for document_batch in batched(input_documents, 10): if document_batch[-1] == input_documents[-1]: document_batch[0].caption = caption await msg.reply_chat_action(enums.ChatAction.UPLOAD_DOCUMENT) # noinspection PyDefaultArgument await _send_with_rate_limit(lambda b=list(document_batch): msg.reply_media_group(media=b)) # type: ignore return None return None if not_cache else media_list async def _send_media( msg: Message, parse_result: AnyParseResult, processed_list: list[ProcessedMedia], caption: str ) -> CacheEntry | None: """构建、发送媒体,并返回缓存条目。 返回 None 表示不缓存 """ media_refs: list[AnyMediaRef] = to_list(parse_result.media) photos_videos, animations = _build_input_media(media_refs, processed_list) all_count = len(photos_videos) + len(animations) logger.debug(f"媒体分类完成: animations={len(animations)}, photos_videos={len(photos_videos)}") if all_count == 1: logger.debug("单媒体模式发送") media_list = await _send_single(msg, photos_videos, animations, caption) else: logger.debug(f"多媒体模式发送: total={all_count}") media_list = await _send_multi(msg, photos_videos, animations, caption) if media_list is None: return None return _make_cache_entry(parse_result, media_list) # ── 缓存发送 ───────────────────────────────────────────────────────── async def _send_cached(msg: Message, entry: CacheEntry, url: str) -> None: """从 file_id 缓存直接发送,跳过解析/下载/转码""" logger.debug(f"缓存发送: media={entry.media}") if entry.parse_result is None: await persistent_cache.remove(url) return caption = build_caption_by_str(entry.parse_result.title, entry.parse_result.content, url, entry.telegraph_url) # 富文本类型 if entry.telegraph_url: await msg.reply_text( caption, link_preview_options=LinkPreviewOptions(show_above_text=True), ) return if not entry.media: await msg.reply_text( caption, link_preview_options=LinkPreviewOptions(is_disabled=True), ) return if len(entry.media) == 1: await _send_cached_single(msg, entry.media[0], caption) else: await _send_cached_multi(msg, entry.media, caption) async def _send_cached_single(msg: Message, m: CacheMedia, caption: str) -> None: """从缓存发送单个媒体。""" match m.type: case CacheMediaType.PHOTO: await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) await _send_with_rate_limit(lambda: msg.reply_photo(m.file_id, caption=caption)) case CacheMediaType.VIDEO: await msg.reply_chat_action(enums.ChatAction.UPLOAD_VIDEO) await _send_with_rate_limit( lambda: msg.reply_video( m.file_id, caption=caption, supports_streaming=True, video_cover=m.cover_file_id ) ) case CacheMediaType.ANIMATION: await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) await _send_with_rate_limit(lambda: msg.reply_animation(m.file_id, caption=caption)) case CacheMediaType.DOCUMENT: await msg.reply_chat_action(enums.ChatAction.UPLOAD_DOCUMENT) await _send_with_rate_limit(lambda: msg.reply_document(m.file_id, caption=caption, force_document=True)) async def _send_cached_multi(msg: Message, media: list[CacheMedia], caption: str) -> None: """从缓存发送多个媒体。""" animations = [m for m in media if m.type == CacheMediaType.ANIMATION] others = [m for m in media if m.type != CacheMediaType.ANIMATION] for ani in animations: await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) await _send_with_rate_limit( lambda a=ani: msg.reply_animation( # type: ignore[misc] a.file_id, caption=caption if a == animations[-1] and not others else "", ) ) media_group = _build_cached_media_group(others) for batch in batched(media_group, 10): if batch[-1] == media_group[-1]: batch[0].caption = caption await msg.reply_chat_action(enums.ChatAction.UPLOAD_PHOTO) # noinspection PyDefaultArgument await _send_with_rate_limit(lambda m=list(batch): msg.reply_media_group(m)) # type: ignore[misc] def _build_cached_media_group( media: list[CacheMedia], ) -> list[InputMediaPhoto | InputMediaVideo | InputMediaDocument]: """从 CacheMedia 列表构建 Telegram media group。""" group: list[InputMediaPhoto | InputMediaVideo | InputMediaDocument] = [] for m in media: match m.type: case CacheMediaType.PHOTO: group.append(InputMediaPhoto(media=m.file_id)) case CacheMediaType.VIDEO: if m.cover_file_id: group.append(InputMediaVideo(media=m.file_id, supports_streaming=True, video_cover=m.cover_file_id)) else: group.append(InputMediaVideo(media=m.file_id, supports_streaming=True)) case CacheMediaType.DOCUMENT: group.append(InputMediaDocument(media=m.file_id)) return group ================================================ FILE: plugins/start.py ================================================ from pyrogram import Client, filters from pyrogram.types import LinkPreviewOptions, Message from plugins.helpers import build_start_text @Client.on_message(filters.command(["start", "help"])) async def start(_: Client, msg: Message) -> None: await msg.reply( build_start_text(), link_preview_options=LinkPreviewOptions(is_disabled=True), ) ================================================ FILE: pyproject.toml ================================================ [project] name = "parsehubbot" version = "0.1.0" description = "Add your description here" readme = "README.md" requires-python = ">=3.12" dependencies = [ "haishoku>=1.1.8", "httpx>=0.28.1", "kurigram>=2.2.7", "loguru>=0.6.0", "lxml-html-clean>=0.4.1", "markdown>=3.7", "parsehub>=2.0.17", "pickledb>=1.6", "pillow>=12.1.1", "pillow-heif>=1.1.1", "pydantic>=2.12.5", "pydantic-settings>=2.11.0", "python-dotenv>=1.0.1", "pyyaml>=6.0.3", "telegraph>=2.2.0", "tgcrypto>=1.2.5", "uvloop>=0.22.1 ; sys_platform != 'win32'", "winloop>=0.3.1 ; sys_platform == 'win32'", ] [tool.ruff] line-length = 120 [tool.ruff.lint] select = [ "E", # pycodestyle 错误检查 "W", # pycodestyle 警告检查 "F", # pyflakes 错误检查 "I", # isort 导入排序 "B", # flake8-bugbear 常见错误检查 "C4", # flake8-comprehensions 列表/字典推导式检查 "UP", # pyupgrade 自动升级语法 ] ignore = [ "B008", # 不在参数默认值中执行函数调用 "C901", # 函数复杂度过高 ] [dependency-groups] dev = [ "mypy>=2.1.0", ] [tool.mypy] python_version = "3.12" files = ["./"] ignore_missing_imports = true warn_return_any = true warn_unused_ignores = true check_untyped_defs = true disallow_untyped_defs = true no_implicit_optional = true ================================================ FILE: services/__init__.py ================================================ from .cache import CacheEntry, CacheMedia, CacheMediaType, CacheParseResult, parse_cache, persistent_cache from .parser import ParseService from .pipeline import ParsePipeline, PipelineProgressCallback, PipelineResult, StatusReporter __all__ = [ "ParseService", "parse_cache", "persistent_cache", "CacheEntry", "CacheMedia", "CacheMediaType", "CacheParseResult", "ParsePipeline", "PipelineResult", "PipelineProgressCallback", "StatusReporter", ] ================================================ FILE: services/cache.py ================================================ import asyncio import time from enum import StrEnum from typing import Any from pickledb import PickleDB from pydantic import BaseModel from core import bs from log import logger class TTLCache: def __init__(self, ttl: float = 300, cleanup_interval: float = 60, maxsize: int = 0): self._ttl = ttl self._store: dict[str, tuple[Any, float]] = {} self._lock = asyncio.Lock() self.logger = logger.bind(name="TTLCache") self._cleanup_interval = cleanup_interval self._cleanup_task: asyncio.Task | None = None self._maxsize = maxsize async def get(self, key: str) -> Any | None: async with self._lock: entry = self._store.get(key) if entry is None: self.logger.debug(f"缓存未命中: key={key}") return None value, expire_at = entry if time.monotonic() > expire_at: self.logger.debug(f"缓存已过期: key={key}") del self._store[key] return None self.logger.debug(f"缓存命中: key={key}") return value async def set(self, key: str, value: Any, ttl: float | None = None) -> None: async with self._lock: effective_ttl = ttl or self._ttl self.logger.debug(f"缓存写入: key={key}, ttl={effective_ttl}s") if key in self._store: del self._store[key] self._store[key] = (value, time.monotonic() + effective_ttl) await self._evict_overflow_locked() async def _evict_overflow_locked(self) -> None: if self._maxsize <= 0: return overflow = len(self._store) - self._maxsize if overflow <= 0: return for key in list(self._store)[:overflow]: del self._store[key] self.logger.debug(f"缓存数量超限, 淘汰最旧缓存: {overflow} 条") async def pop(self, key: str) -> Any | None: async with self._lock: entry = self._store.pop(key, None) if entry is None: self.logger.debug(f"缓存 pop 未命中: key={key}") return None value, expire_at = entry if time.monotonic() > expire_at: self.logger.debug(f"缓存 pop 已过期: key={key}") return None self.logger.debug(f"缓存 pop 命中: key={key}") return value def start_cleanup(self) -> None: """启动后台清理任务(需在事件循环运行后调用)""" if self._cleanup_task is None: self._cleanup_task = asyncio.create_task(self._periodic_cleanup()) self.logger.debug(f"后台清理任务已启动, interval={self._cleanup_interval}s") async def _periodic_cleanup(self) -> None: while True: await asyncio.sleep(self._cleanup_interval) async with self._lock: now = time.monotonic() expired_keys = [k for k, (_, exp) in self._store.items() if now > exp] for k in expired_keys: del self._store[k] if expired_keys: self.logger.debug(f"定时清理过期缓存: {len(expired_keys)} 条") class CacheMediaType(StrEnum): PHOTO = "photo" VIDEO = "video" ANIMATION = "animation" DOCUMENT = "document" class CacheParseResult(BaseModel): title: str = "" content: str = "" class CacheMedia(BaseModel): type: CacheMediaType file_id: str cover_file_id: str | None = None class CacheEntry(BaseModel): parse_result: CacheParseResult | None = None media: list[CacheMedia] | None = None telegraph_url: str | None = None class _StorageWrapper(BaseModel): entry: CacheEntry exp: int = 0 class PersistentCache: def __init__( self, db_path: str, ttl: int, save_interval: float = 5 * 60, cleanup_interval: float = 60 * 60, max_entries: int = 30000, ): self._db = PickleDB(db_path) self._ttl = ttl self.logger = logger.bind(name="PersistentCache") self.logger.debug(f"缓存已初始化: {db_path}") self._save_interval = save_interval self._cleanup_interval = cleanup_interval self._max_entries = max_entries self._cleanup_task: asyncio.Task | None = None self._lock = asyncio.Lock() self._loaded = False self._dirty = False self._last_cleanup_at = 0.0 @property def enabled(self) -> bool: return self._ttl > 0 async def _ensure_loaded_locked(self) -> None: if self._loaded: return await self._db.load() self._loaded = True self._last_cleanup_at = time.monotonic() removed = await self._evict_overflow_locked() if removed: self._dirty = True self.logger.debug(f"缓存已加载: {self._db.location}, evicted={removed}") async def _save_locked(self) -> None: if not self._loaded or not self._dirty: return await self._db.save() self._dirty = False self.logger.debug("缓存已保存") async def get(self, url: str) -> CacheEntry | None: if not self.enabled: return None async with self._lock: await self._ensure_loaded_locked() data = await self._db.get(url) if data is None: return None if data.get("exp", 0) <= time.time(): self.logger.debug(f"缓存过期: key={url}") if await self._db.remove(url): self._dirty = True return None self.logger.debug(f"缓存命中: key={url}") return _StorageWrapper.model_validate(data).entry async def set(self, url: str, entry: CacheEntry) -> None: if not self.enabled: return sw = _StorageWrapper(entry=entry, exp=int(time.time() + self._ttl)) async with self._lock: await self._ensure_loaded_locked() await self._db.remove(url) await self._db.set(url, sw.model_dump()) removed = await self._evict_overflow_locked() self._dirty = True self.logger.debug(f"缓存写入: key={url}, evicted={removed}") async def remove(self, url: str) -> None: if not self.enabled: return async with self._lock: await self._ensure_loaded_locked() if await self._db.remove(url): self._dirty = True def start_cleanup(self) -> None: """启动后台清理任务""" if not self.enabled: self.logger.debug("持久缓存已禁用, 跳过后台任务") return if self._cleanup_task is None: self._cleanup_task = asyncio.create_task(self._periodic_cleanup()) self.logger.debug( f"后台缓存任务已启动, save_interval={self._save_interval}s, cleanup_interval={self._cleanup_interval}s" ) async def close(self) -> None: if self._cleanup_task: self._cleanup_task.cancel() try: await self._cleanup_task except asyncio.CancelledError: pass self._cleanup_task = None if not self.enabled: return async with self._lock: await self._save_locked() async def _periodic_cleanup(self) -> None: while True: await asyncio.sleep(self._save_interval) if not self._loaded: continue async with self._lock: now = time.monotonic() if now - self._last_cleanup_at >= self._cleanup_interval: expired = await self._remove_expired_locked() overflow = await self._evict_overflow_locked() if expired or overflow: self._dirty = True self.logger.debug(f"定时清理缓存: expired={expired}, overflow={overflow}") self._last_cleanup_at = now await self._save_locked() async def _remove_expired_locked(self) -> int: now = time.time() removed = 0 all_keys = await self._db.all() for key in all_keys: data = await self._db.get(key) if data and data.get("exp", 0) <= now: await self._db.remove(key) removed += 1 return removed async def _evict_overflow_locked(self) -> int: if self._max_entries <= 0: return 0 keys = await self._db.all() overflow = len(keys) - self._max_entries if overflow <= 0: return 0 for key in keys[:overflow]: await self._db.remove(key) return overflow parse_cache = TTLCache(ttl=30 * 60, maxsize=1000) # 解析结果缓存 30 分钟 persistent_cache = PersistentCache( str(bs.cache_path / "cache.json"), ttl=bs.cache_time * 60, save_interval=bs.cache_save_interval * 60, cleanup_interval=bs.cache_cleanup_interval * 60, max_entries=bs.cache_max_entries, ) ================================================ FILE: services/parser.py ================================================ from typing import Self from parsehub import ParseHub, Platform from parsehub.types import ( AnyParseResult, ) from core import pl_cfg from log import logger logger = logger.bind(name="ParseService") class ParseService: _instance: Self | None = None def __new__(cls) -> Self: if cls._instance is None: cls._instance = super().__new__(cls) return cls._instance def __init__(self) -> None: self.parser = ParseHub() def get_platform(self, url: str) -> Platform: p = self.parser.get_platform(url) if not p: raise ValueError("不支持的平台") return p async def parse(self, url: str) -> AnyParseResult: logger.debug(f"开始解析 {url}") p = self.get_platform(url) max_retries = 3 for attempt in range(1, max_retries + 1): try: cookie = pl_cfg.roll_cookie(p.id) proxy = pl_cfg.roll_parser_proxy(p.id) logger.debug(f"使用配置: proxy={proxy}, cookie={cookie}, attempt={attempt}/{max_retries}") pr = await self.parser.parse(url, cookie=cookie, proxy=proxy) logger.debug(f"解析完成: {pr}") return pr except Exception as e: logger.warning(f"解析失败, attempt={attempt}/{max_retries}, err={e}") if attempt >= max_retries: raise Exception(e) from e raise async def get_raw_url(self, url: str, clean_all: bool = True) -> str: p = self.get_platform(url) max_retries = 3 for attempt in range(1, max_retries + 1): try: proxy = pl_cfg.roll_parser_proxy(p.id) logger.debug(f"使用配置: proxy={proxy}, attempt={attempt}/{max_retries}") raw_url = await self.parser.get_raw_url(url, proxy=proxy, clean_all=clean_all) logger.debug(f"原始 URL: {raw_url}") return str(raw_url) except Exception as e: logger.warning(f"获取原始 URL 失败, attempt={attempt}/{max_retries}, err={e}") if attempt >= max_retries: raise Exception(e) from e raise ================================================ FILE: services/pipeline.py ================================================ import asyncio import shutil from collections.abc import Awaitable, Callable from dataclasses import dataclass, field from pathlib import Path from typing import Any, Protocol from parsehub import DownloadResult from parsehub.types import AnyParseResult, PostType, ProgressUnit from core import bs, pl_cfg from log import logger from plugins.helpers import ProcessedMedia, process_media_files from services import ParseService from utils.helpers import to_list logger = logger.bind(name="Pipeline") _inflight: dict[str, asyncio.Event] = {} class StatusReporter(Protocol): """抽象状态通知,由调用方实现""" async def report(self, text: str) -> None: ... async def report_error(self, stage: str, error: Exception) -> None: ... async def dismiss(self) -> None: ... @dataclass class PipelineResult: parse_result: AnyParseResult processed_list: list[ProcessedMedia] = field(default_factory=list) output_dir: Path | None = None def cleanup(self) -> None: if bs.debug_skip_cleanup: logger.debug("debug_skip_cleanup=True 跳过清理") return if self.output_dir: logger.debug("清理资源") shutil.rmtree(self.output_dir, ignore_errors=True) class PipelineProgressCallback: """统一的下载进度回调,依赖 StatusReporter""" def __init__(self, reporter: StatusReporter): self._reporter = reporter self._last_text: str | None = None async def __call__(self, current: int, total: int, unit: ProgressUnit, *args: Any, **kwargs: Any) -> None: from plugins.helpers import progress as fmt_progress text = fmt_progress(current, total, unit) if not text or text == self._last_text: return self._last_text = text await self._reporter.report(text) class ParsePipeline: """ 将 解析 → 下载 → 格式转换 封装为一条流水线。 上传逻辑仍由调用方负责。 内置 Singleflight 机制:对同一 URL 的并发调用只会执行一次流水线, 其余调用等待 Event 完成后返回 None(调用方应重新检查缓存)。 首个调用方在完成上传+缓存后必须调用 finish() 以释放等待者。 """ def __init__( self, url: str, reporter: StatusReporter, parse_result: AnyParseResult | None = None, *, singleflight: bool = True, skip_media_processing: bool = False, skip_download_threshold: int = 0, richtext_skip_download: bool = True, save_metadata: bool = False, ): self._url = url self._reporter = reporter self._parse_result = parse_result self._waited = False self._singleflight = singleflight self._skip_media_processing = skip_media_processing self._skip_download_threshold = skip_download_threshold self._richtext_skip_download = richtext_skip_download self._save_metadata = save_metadata @property def waited(self) -> bool: """是否因 singleflight 而等待了其他流水线""" return self._waited def finish(self) -> None: """首个调用方完成上传+缓存后调用,释放所有等待者""" event = _inflight.pop(self._url, None) if event is not None: event.set() async def run(self) -> PipelineResult | None: """执行流水线,返回 PipelineResult 或 None(失败时已通知)""" if self._singleflight: key = self._url existing = _inflight.get(key) if existing is not None: self._waited = True logger.debug(f"Singleflight 命中, 等待已有流水线: url={key}") await self._reporter.report("已有相同任务正在解析, 等待解析完成...") await existing.wait() await self._reporter.dismiss() return None event = asyncio.Event() _inflight[key] = event try: result = await self._execute() if result is None: self.finish() # 流水线失败,立即释放等待者 return result except BaseException: self.finish() # 流水线异常,立即释放等待者 raise async def _execute(self) -> PipelineResult | None: """实际执行流水线逻辑""" logger.debug(f"流水线启动: url={self._url}, has_cached_result={self._parse_result is not None}") ps = ParseService() # ── 1. 解析 ── if self._parse_result is not None: logger.debug("使用缓存的解析结果") parse_result = self._parse_result else: await self._reporter.report("解 析 中...") parse_result = await self._step("解析", lambda: ps.parse(self._url)) if parse_result is None: return None if self._richtext_skip_download and parse_result.type == PostType.RICHTEXT: logger.debug("富文本跳过下载") return PipelineResult(parse_result=parse_result) if self._skip_download_threshold and len(to_list(parse_result.media)) > self._skip_download_threshold: logger.debug( f"媒体数量({len(to_list(parse_result.media))})大于设定值({self._skip_download_threshold}), 跳过下载" ) return PipelineResult(parse_result=parse_result) # ── 2. 下载 ── await self._reporter.report("下 载 中...") p = ps.parser.get_platform(self._url) proxy = pl_cfg.roll_downloader_proxy(p.id) logger.debug(f"使用配置: proxy={proxy}") progress_cb = PipelineProgressCallback(self._reporter) download_result: DownloadResult = await self._step( "下载", lambda: parse_result.download( bs.download_dir, callback=progress_cb, callback_args=(), proxy=proxy, save_metadata=self._save_metadata ), timeout=60 * 30, # 30分钟 ) if download_result is None: return None logger.debug(f"下载完成: output_dir={download_result.output_dir}") # ── 3. 格式转换 ── if self._skip_media_processing: logger.debug(f"流水线完成: download_result={download_result}") processed_list = [ProcessedMedia(i, [i.path]) for i in to_list(download_result.media)] return PipelineResult( parse_result=parse_result, processed_list=processed_list, output_dir=download_result.output_dir ) await self._reporter.report("处 理 中...") maybe_processed_list = await self._step( "格式转换", lambda: process_media_files(download_result), cleanup=lambda: shutil.rmtree(download_result.output_dir, ignore_errors=True), ) if maybe_processed_list is None: return None processed_list = maybe_processed_list logger.debug(f"流水线完成: processed_count={len(processed_list)}") return PipelineResult( parse_result=parse_result, processed_list=processed_list, output_dir=download_result.output_dir, ) async def _step[T]( self, stage: str, action: Callable[[], Awaitable[T]], cleanup: Callable[[], None] | None = None, timeout: float | None = None, ) -> T | None: """执行单个步骤,失败时统一处理""" logger.debug(f"执行步骤: {stage}") try: coro = action() if timeout is not None: return await asyncio.wait_for(coro, timeout=timeout) return await coro except TimeoutError: logger.error(f"{stage}超时 (>{timeout}s)") await self._reporter.report_error(stage, TimeoutError(f"{stage}超时 (>{timeout}s)")) if cleanup: cleanup() return None except Exception as e: logger.exception(e) logger.error(f"{stage}失败, 以上为错误信息") await self._reporter.report_error(stage, e) if cleanup: cleanup() return None ================================================ FILE: utils/__init__.py ================================================ ================================================ FILE: utils/converter.py ================================================ # FROM https://github.com/mercuree/html-telegraph-poster/blob/7212225e28a0206803c32e67d1185bbfbd1fc181/html_telegraph_poster/converter.py import re from lxml.html.clean import Cleaner allowed_tags = ( "a", "aside", "b", "blockquote", "br", "code", "em", "figcaption", "figure", "h3", "h4", "hr", "i", "iframe", "img", "li", "ol", "p", "pre", "s", "strong", "u", "ul", "video", ) telegram_embed_script_re = re.compile( r"""]+\sdata-telegram-post=['"]([^'"]+))[^<]+""", re.IGNORECASE, ) pre_content_re = re.compile(r"<(pre|code)(>|\s[^>]*>)[\s\S]*?") line_breaks_inside_pre = re.compile(r"|\s[^<>]*>)") line_breaks_and_empty_strings = re.compile(r"(\s{2,}|\s*\r?\n\s*)") header_re = re.compile(r"") def clean_article_html(html_string: str) -> str: html_string = html_string.replace("", "") # telegram will convert anyway html_string = re.sub(r"<(/?)b(?=\s|>)", r"<\1strong", html_string) html_string = re.sub(r"<(/?)(h2|h5|h6)", r"<\1h4", html_string) # convert telegram embed posts before cleaner html_string = re.sub( telegram_embed_script_re, r'', html_string, ) # remove if present (can't do this with Cleaner) html_string = header_re.sub("", html_string) c = Cleaner( allow_tags=allowed_tags, style=True, remove_unknown_tags=False, embedded=False, safe_attrs_only=True, safe_attrs=("src", "href", "class"), ) # wrap with div to be sure it is there # (otherwise lxml will add parent element in some cases html_string = f"
{html_string}
" cleaned = c.clean_html(html_string) # remove wrapped div cleaned = cleaned[5:-6] # remove all line breaks and empty strings html_string = replace_line_breaks_except_pre(cleaned) # but replace multiple br tags with one line break, telegraph will convert it to
html_string = re.sub(r"(|\s[^<>]*>)\s*)+", "\n", html_string) return html_string.strip(" \t") def replace_line_breaks_except_pre(html_string: str, replace_by: str = " ") -> str: # Remove all line breaks and empty strings, except pre tag # how to make it in one string? :\ pre_ranges = [0] out = "" # replace non-breaking space with usual space html_string = html_string.replace("\u00a0", " ") # get
 start/end postion
    for x in pre_content_re.finditer(html_string):
        start, end = x.start(), x.end()
        pre_ranges.extend((start, end))
    pre_ranges.append(len(html_string))

    # all odd elements are 
, leave them untouched
    for k in range(1, len(pre_ranges)):
        part = html_string[pre_ranges[k - 1] : pre_ranges[k]]
        if k % 2 == 0:
            out += line_breaks_inside_pre.sub("\n", part)
        else:
            out += line_breaks_and_empty_strings.sub(replace_by, part)
    return out


================================================
FILE: utils/event_loop.py
================================================
import importlib
import sys

from log import logger


def setup_optimized_event_loop() -> bool:
    """配置优化的事件循环,自动选择winloop或uvloop"""
    is_windows = sys.platform == "win32"
    loop_module = "winloop" if is_windows else "uvloop"

    try:
        # 动态导入并安装事件循环
        module = importlib.import_module(loop_module)
        module.install()
        logger.debug(f"{loop_module} 已启用")
        return True
    except ImportError:
        logger.debug(f"{loop_module} 未安装")
        logger.debug("使用标准 asyncio 事件循环")
        return False
    except Exception as e:
        logger.debug(f"启用 {loop_module} 时出错: {e}")
        logger.debug("使用标准 asyncio 事件循环")
        return False


================================================
FILE: utils/helpers.py
================================================
import asyncio
import functools
import tarfile
import uuid
from collections.abc import Awaitable, Callable
from pathlib import Path
from typing import Any, overload

from log import logger


async def run_cmd(*cmd: str, timeout: float = 30) -> str:
    """运行外部命令并异步读取输出"""
    proc = await asyncio.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.DEVNULL,
    )
    try:
        stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout)
    except TimeoutError:
        proc.kill()
        await proc.wait()
        return ""
    return stdout.decode().strip()


@overload
def to_list[T](v: list[T]) -> list[T]: ...


@overload
def to_list[T](v: T) -> list[T]: ...


def to_list[T](v: T | list[T]) -> list[T]:
    return v if isinstance(v, list) else [v]


def pack_dir_to_tar_gz(dir_path: str | Path, output_path: str | Path | None = None) -> Path:
    """
    将目录打包为 tar.gz,返回压缩包路径。

    Args:
        dir_path: 要打包的目录
        output_path: 输出压缩包路径;不传则默认生成同名 .tar.gz

    Returns:
        生成的 tar.gz 文件路径
    """
    source_dir = Path(dir_path).resolve()
    if not source_dir.is_dir():
        raise ValueError(f"不是有效目录: {source_dir}")

    if output_path is None:
        output_path = source_dir.with_suffix(".tar.gz")
    else:
        output_path = Path(output_path).resolve()

    with tarfile.open(output_path, "w:gz") as tar:
        tar.add(source_dir, arcname=source_dir.name)

    return output_path


def with_request_id[T](func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
    @functools.wraps(func)
    async def wrapper(*args: Any, **kwargs: Any) -> T:
        request_id = str(uuid.uuid4())[:8]
        with logger.contextualize(req_id=request_id):
            return await func(*args, **kwargs)

    return wrapper


================================================
FILE: utils/media_processing_unit.py
================================================
"""媒体处理器 — 将图片/视频转换为 Telegram 兼容格式"""

import asyncio
import math
import mimetypes
import os
import time
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path

from haishoku.haishoku import Haishoku
from loguru import logger
from PIL import Image, ImageOps
from PIL.Image import Resampling

from utils.helpers import run_cmd


@dataclass
class MediaProcessResult:
    """统一处理结果"""

    output_paths: list[Path]
    temp_dir: Path | None = None


class MediaProcessingUnit:
    """媒体处理器,将媒体转换为 Telegram 兼容的格式

    Telegram 限制:
    - 图片宽高比 / 高宽比不能超过 20:1
    - 单次最多发送 10 张图片

    用法:
        mpu = MediaProcessingUnit(output_dir=Path("./output"))
        result = await mpu.process("media.mp4")
    """

    def __init__(
        self,
        output_dir: str | Path,
        segment_height: int = 1400,
        medium_threshold: int = 2,
        overlap: int = 100,
        logger: Callable = logger.info,
    ):
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.segment_height = segment_height
        self.medium_threshold = medium_threshold
        self.overlap = overlap
        self.logger = logger

    # ------------------------------------------------------------------ #
    #  公共入口
    # ------------------------------------------------------------------ #

    async def process(self, file_path: str | Path) -> MediaProcessResult:
        media_type = self.get_media_type_by_mime(file_path)
        self.logger(f"开始处理媒体: path={file_path}, type={media_type}")
        if media_type == "image":
            return await self.process_image(Path(file_path))
        elif media_type == "video":
            return await self.process_video(Path(file_path))
        else:
            raise ValueError(f"Unsupported media type: {file_path}")

    # ------------------------------------------------------------------ #
    #  图片处理
    # ------------------------------------------------------------------ #

    async def process_image(self, file_path: Path) -> MediaProcessResult:
        ext = file_path.suffix.lower()
        needs_convert = ext in {".heif", ".heic", ".avif"}
        intermediates: list[Path] = []  # 统一收集中间文件

        try:
            if needs_convert:
                self.logger(f"图片格式需转换: {ext} -> webp")
                source = await asyncio.to_thread(self._img2webp, file_path)
                intermediates.append(source)
            else:
                source = file_path

            if result := await asyncio.to_thread(self._adapt_image, source):
                return result

            # _adapt_image 无需处理,尝试 downscale
            if downscaled := await asyncio.to_thread(self._downscale_image, source):
                intermediates.append(downscaled)
                source = downscaled

            intermediates = [p for p in intermediates if p != source]
            return MediaProcessResult(output_paths=[source])
        finally:
            for p in intermediates:
                if p.exists():
                    self.logger(f"删除中间文件: {p}")
                    os.remove(p)

    def _adapt_image(self, file_path: Path) -> MediaProcessResult | None:
        """分析图片尺寸并做填充 / 切割,返回 None 表示无需处理"""
        with Image.open(file_path) as img:
            w, h = img.width, img.height

        wh_ratio = w / h
        hw_ratio = h / w
        self.logger(f"图片尺寸: {w}x{h}, wh_ratio={wh_ratio:.2f}, hw_ratio={hw_ratio:.2f}")

        if w >= h:
            # 横图
            if wh_ratio <= 20:
                self.logger("横图比例正常,跳过处理")
                return None
            self.logger("横图比例超限,需要填充")
            padding = self._calc_padding_horizontal(w, h)
            with Image.open(file_path) as img:
                return self._pad_image(file_path, img, padding)
        else:
            # 竖图
            if hw_ratio <= 5 or (w < 200 and hw_ratio < 20):
                self.logger("竖图比例正常,跳过处理")
                return None
            if w < 200 and hw_ratio > 20:
                self.logger("窄竖图比例超限,需要填充")
                padding = self._calc_padding_vertical(w, h)
                with Image.open(file_path) as img:
                    return self._pad_image(file_path, img, padding)
            # 长图切割
            segments = h // self.segment_height
            seg_h = h // 2 if segments < self.medium_threshold else self.segment_height
            self.logger(f"长图切割: segments={segments}, seg_h={seg_h}")
            return self._split_image(file_path, seg_h)

    def _img2webp(self, file_path: Path) -> Path:
        with Image.open(file_path) as pil_img:
            img = pil_img.convert("RGBA") if pil_img.mode != "RGBA" else pil_img
            output = self.output_dir / file_path.with_suffix(".webp").name
            img.save(output, format="WEBP")
        self.logger(f"webp 转换完成: {output}")
        return output

    def _downscale_image(self, file_path: Path, max_side: int = 2560) -> Path | None:
        """若图片任一边超过 max_side,等比缩放至长边为 max_side,返回新文件路径;无需缩放返回 None"""
        with Image.open(file_path) as img:
            w, h = img.size
            if max(w, h) <= max_side:
                return None
            scale = max_side / max(w, h)
            new_w, new_h = int(w * scale), int(h * scale)
            self.logger(f"图片长边超限({max(w, h)}px > {max_side}px),缩放: {w}x{h} -> {new_w}x{new_h}")
            resized = img.resize((new_w, new_h), Resampling.LANCZOS)
            ext = (img.format and f".{img.format.lower()}") or file_path.suffix
            out_path = self.output_dir / f"downscaled_{time.time_ns()}{ext}"
            resized.save(out_path)
        return out_path

    # -- 图片辅助 --------------------------------------------------------- #

    @staticmethod
    def _calc_padding_horizontal(w: int, h: int) -> tuple[int, int, int, int]:
        h_padding = w // 20 - h // 2
        return 0, h_padding, 0, h_padding

    @staticmethod
    def _calc_padding_vertical(w: int, h: int) -> tuple[int, int, int, int]:
        w_padding = h // 20 - w // 2
        return w_padding, 0, w_padding, 0

    @staticmethod
    def _get_dominant_color(file_path: Path) -> tuple[int, ...]:
        haishoku = Haishoku.loadHaishoku(str(file_path))
        return tuple(int(v * 0.8) for v in haishoku.palette[0][1])

    def _pad_image(
        self,
        file_path: Path,
        img: Image.Image,
        padding: tuple[int, int, int, int],
    ) -> MediaProcessResult:
        fill_color = self._get_dominant_color(file_path)
        padded = ImageOps.expand(img, padding, fill=fill_color)
        out_path = self.output_dir / f"padded_{time.time_ns()}.png"
        padded.save(out_path)
        self.logger(f"填充完成: padding={padding}, color={fill_color}, output={out_path}")
        return MediaProcessResult(output_paths=[out_path])

    def _split_image(self, file_path: Path, segment_height: int) -> MediaProcessResult:
        temp_dir = self.output_dir / f"split_{time.time_ns()}"
        temp_dir.mkdir(parents=True, exist_ok=True)
        segments = self._do_split(file_path, temp_dir, segment_height)
        self.logger(f"图片切割完成: {len(segments)} 段, output_dir={temp_dir}")
        return MediaProcessResult(output_paths=segments, temp_dir=temp_dir)

    def _do_split(
        self,
        input_path: Path,
        output_dir: Path,
        segment_height: int,
    ) -> list[Path]:
        with Image.open(input_path) as img:
            width, height = img.size
            num_segments = math.ceil(height / segment_height)
            self.logger(f"切割参数: size={width}x{height}, segment_h={segment_height}, num={num_segments}")
            result: list[Path] = []
            for i in range(num_segments):
                top = i * segment_height - (self.overlap if i != 0 else 0)
                bottom = min((i + 1) * segment_height, height)
                segment = img.crop((0, top, width, bottom))
                out_path = output_dir / f"segment_{i + 1:03d}.png"
                segment.save(out_path)
                result.append(out_path)
        return result

    # ------------------------------------------------------------------ #
    #  视频处理
    # ------------------------------------------------------------------ #

    async def process_video(self, file_path: Path) -> MediaProcessResult:
        codec = await self.get_video_codec(file_path)
        self.logger(f"视频编码: codec={codec}, path={file_path}")

        converted: Path | None = None
        if codec != "h264":
            self.logger("编码非 h264,开始转码")
            converted = await self.ensure_h264(file_path)
            self.logger(f"转码完成: {converted}")

        source = converted or file_path
        video_size = source.stat().st_size
        self.logger(f"视频大小: {video_size / 1024 / 1024:.1f} MB")

        if video_size > 2 * 1024**3:  # 2 GiB
            self.logger("视频超过 2 GiB,开始分割")
            output_paths, output_dir = await self.split_video(source, self.output_dir)
            if converted:
                os.remove(converted)
            return MediaProcessResult(output_paths=output_paths, temp_dir=output_dir)

        return MediaProcessResult(output_paths=[source])

    @staticmethod
    async def get_video_codec(file_path: Path) -> str:
        out = await run_cmd(
            "ffprobe",
            "-v",
            "error",
            "-select_streams",
            "v:0",
            "-show_entries",
            "stream=codec_name",
            "-of",
            "default=noprint_wrappers=1:nokey=1",
            str(file_path),
        )
        return out.strip().lower() if out else ""

    @staticmethod
    async def get_duration(file_path: Path) -> float:
        out = await run_cmd(
            "ffprobe",
            "-v",
            "error",
            "-show_entries",
            "format=duration",
            "-of",
            "default=noprint_wrappers=1:nokey=1",
            str(file_path),
        )
        return float(out.strip()) if out else 0.0

    async def ensure_h264(self, file_path: Path) -> Path:
        out = self.output_dir / (file_path.stem + "_h264" + file_path.suffix)
        duration = await self.get_duration(file_path)
        height = await self._get_video_height(file_path)

        cmd = self._build_sw_transcode_cmd(file_path, out, duration, height)

        self.logger(f"h264 转码: {file_path.name} -> {out.name}, duration={duration:.0f}s, encoder=SW:libx264")

        proc = await asyncio.create_subprocess_exec(
            *cmd,
            stdout=asyncio.subprocess.DEVNULL,
            stderr=asyncio.subprocess.DEVNULL,
        )
        await proc.wait()

        if out.exists() and out.stat().st_size > 0:
            self.logger(f"h264 转码成功: size={out.stat().st_size / 1024 / 1024:.1f}MB")
            return out

        self.logger(f"h264 转码失败,返回原文件: {file_path}")
        return file_path

    @staticmethod
    async def _get_video_height(file_path: Path) -> int:
        out = await run_cmd(
            "ffprobe",
            "-v",
            "error",
            "-select_streams",
            "v:0",
            "-show_entries",
            "stream=height",
            "-of",
            "default=noprint_wrappers=1:nokey=1",
            str(file_path),
        )
        return int(out.strip()) if out and out.strip().isdigit() else 0

    def _build_sw_transcode_cmd(self, file_path: Path, out: Path, duration: float, height: int) -> list[str]:
        if duration <= 30:
            preset, crf = "slow", "18"
        elif duration <= 60:
            preset, crf = "medium", "20"
        elif duration <= 600:
            preset, crf = "fast", "23"
        elif duration <= 1800:
            preset, crf = "veryfast", "26"
        else:
            preset, crf = "ultrafast", "28"

        scale = ["-vf", "scale=-2:720"] if duration > 1800 and height > 720 else []
        self.logger(f"SW 转码策略: preset={preset}, crf={crf}, scale={'720p' if scale else 'original'}")

        return [
            "ffmpeg",
            "-i",
            str(file_path),
            "-c:v",
            "libx264",
            "-preset",
            preset,
            "-crf",
            crf,
            *scale,
            "-c:a",
            "aac",
            "-y",
            str(out),
        ]

    async def split_video(
        self,
        file_path: Path,
        output_dir: Path,
        size_limit: int = 2_000_000_000,
        ffmpeg_args: list[str] | None = None,
        keep_sec: float = 1.0,
    ) -> tuple[list[Path], Path]:
        if ffmpeg_args is None:
            ffmpeg_args = ["-c", "copy"]

        base = file_path.stem
        split_dir = output_dir / f"{base}_split"
        split_dir.mkdir(parents=True, exist_ok=True)
        ext = file_path.suffix.lstrip(".")
        total_duration = int(await self.get_duration(file_path))
        self.logger(f"视频分割: duration={total_duration}s, size_limit={size_limit}")

        cur, part, output_paths = 0, 1, []
        while cur < total_duration:
            out_file = split_dir / f"{base}_part_{part:03d}.{ext}"
            output_paths.append(out_file)
            cmd = [
                "ffmpeg",
                "-ss",
                str(cur),
                "-i",
                str(file_path),
                "-fs",
                str(size_limit),
                *ffmpeg_args,
                "-y",
                str(out_file),
            ]
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.DEVNULL,
                stderr=asyncio.subprocess.DEVNULL,
            )
            await proc.wait()

            new_dur = int(await self.get_duration(out_file))
            self.logger(f"分割 part {part}: offset={cur}s, duration={new_dur}s, file={out_file}")
            if new_dur <= 0:
                break
            cur += new_dur
            if cur < total_duration:
                cur = max(cur - int(keep_sec), 0)
            part += 1

        self.logger(f"视频分割完成: {len(output_paths)} 段")
        return output_paths, split_dir

    # ------------------------------------------------------------------ #
    #  工具方法
    # ------------------------------------------------------------------ #

    @staticmethod
    def get_media_type_by_mime(file_path: str | Path) -> str:
        mime, _ = mimetypes.guess_type(str(file_path))
        if mime:
            if mime.startswith("image/"):
                return "image"
            if mime.startswith("video/"):
                return "video"
        return "unknown"


async def main() -> None:
    mpu = MediaProcessingUnit(output_dir=Path(r"D:\Downloads\新建文件夹"))
    result = await mpu.process(r"D:\Downloads\36751083810-1-30066.mp4")
    print(result.output_paths)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: utils/ph.py
================================================
import random
from dataclasses import dataclass
from typing import Any

from telegraph.aio import Telegraph as TelegraphAPI


class Telegraph:
    """Telegraph API 封装"""

    def __init__(self, token: str | None = None, domain: str = "telegra.ph"):
        self.token = token
        self.domain = domain
        self.telegraph = TelegraphAPI(access_token=token, domain=domain)

    async def create_account(
        self, short_name: str, author_name: str | None = None, author_url: str | None = None
    ) -> "TelegraphAccount":
        """创建 Telegraph 账户"""
        account = await self.telegraph.create_account(short_name, author_name, author_url)
        acc_info = await self.get_account_info(account)
        self.token = acc_info.access_token
        return acc_info

    async def get_account_info(self, account_info: dict[str, str] | None = None) -> "TelegraphAccount":
        """获取 Telegraph 账户信息"""
        account_info = account_info or await self.telegraph.get_account_info(
            [
                "short_name",
                "author_name",
                "author_url",
                "auth_url",
            ]
        )
        return TelegraphAccount(
            self.telegraph.get_access_token(),
            account_info["short_name"],
            account_info["author_name"],
            account_info["author_url"],
            account_info["auth_url"],
        )

    async def create_page(
        self,
        title: str,
        content: list[dict[str, Any]] | None = None,
        html_content: str | None = None,
        author_name: str | None = None,
        author_url: str | None = None,
        return_content: bool = False,
        auto_create_account: bool = True,
    ) -> "TelegraphPage":
        """创建 Telegraph 页面"""
        if auto_create_account and not self.token:
            # 随机用户名
            short_name = "tg_" + str(random.randint(100000, 999999))
            await self.create_account(short_name)
        response = await self.telegraph.create_page(
            title,
            content,
            html_content,
            author_name,
            author_url,
            return_content,
        )
        return TelegraphPage(
            response["path"],
            response["url"],
            response["title"],
            response["description"],
            response["views"],
            response["can_edit"],
            await self.get_account_info(),
        )


@dataclass
class TelegraphAccount:
    access_token: str
    short_name: str
    author_name: str
    author_url: str
    auth_url: str


@dataclass
class TelegraphPage:
    path: str
    url: str
    title: str
    description: str
    views: int
    can_edit: bool
    account: TelegraphAccount