[
  {
    "path": ".dockerignore",
    "content": "Dockerfile*\ndocker-compose*\nLICENSE\ntest_books\nREADME*\n.dockerignore\n.git\n.github\n.gitignore\n.vscode"
  },
  {
    "path": ".github/workflows/docs.yaml",
    "content": "name: Publish docs\non:\n  push:\n    branches:\n      - main\n\njobs:\n  deploy:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/setup-python@v2\n        with:\n          python-version: '3.10'\n      - run: pip install mkdocs mkdocs-material\n      - run: mkdocs gh-deploy --force\n"
  },
  {
    "path": ".github/workflows/make_test_ebook.yaml",
    "content": "name: CI\n\non:\n  push:\n    branches: [ main ]\n  pull_request:\n    branches: [ main ]\n  workflow_dispatch:\n  \nenv:\n  ACTIONS_ALLOW_UNSECURE_COMMANDS: true\n  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n  BBM_CAIYUN_API_KEY: ${{ secrets.BBM_CAIYUN_API_KEY }}\n\njobs:\n  typos-check:\n    name: Spell Check with Typos\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout Actions Repository\n        uses: actions/checkout@v3\n      - name: Check spelling with custom config file\n        uses: crate-ci/typos@v1.16.6\n        with:\n          config: ./typos.toml\n  testing:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v3\n      - name: install python 3.10\n        uses: actions/setup-python@v4\n        with:\n          python-version: '3.10'\n          cache: 'pip' # caching pip dependencies\n      - name: Check formatting (black)\n        run: |\n            pip install black\n            black . --check\n      - name: install python requirements\n        run: pip install -r requirements.txt\n\n      - name: Test install\n        run: |\n            pip install .\n\n      - name: make normal ebook test using google translate and cli\n        run: |\n            bbook_maker --book_name \"test_books/Liber_Esther.epub\" --test --test_num 10 --model google --translate-tags div,p\n            bbook_maker --book_name \"test_books/Liber_Esther.epub\" --test --test_num 20 --model google\n\n      - name: make txt book test using google translate\n        run: |\n          python3 make_book.py --book_name \"test_books/the_little_prince.txt\" --test --test_num 20 --model google\n\n      - name: make txt book test with batch_size\n        run: |\n          python3 make_book.py --book_name \"test_books/the_little_prince.txt\" --test --batch_size 30 --test_num 20 --model google\n  \n      - name: make caiyun translator test\n        if: env.BBM_CAIYUN_API_KEY != null\n        run: |\n          python3 make_book.py --book_name \"test_books/the_little_prince.txt\" --test --batch_size 30 --test_num 100 --model caiyun\n\n      - name: make openai key ebook test\n        if: env.BBM_DEEPL_API_KEY != null\n        run: |\n            python3 make_book.py --book_name \"test_books/lemo.epub\" --test --test_num 5 --language zh-hans\n            python3 make_book.py --book_name \"test_books/animal_farm.epub\" --test --test_num 5 --language ja --model gpt3 --prompt prompt_template_sample.txt\n            python3 make_book.py --book_name \"test_books/animal_farm.epub\" --test --test_num 5 --language ja --prompt prompt_template_sample.json\n            python3 make_book.py --book_name test_books/Lex_Fridman_episode_322.srt --test --test_num 20\n            \n      - name: Rename and Upload ePub\n        if: env.OPENAI_API_KEY != null\n        uses: actions/upload-artifact@v4\n        with:\n          name: epub_output\n          path: \"test_books/lemo_bilingual.epub\"\n"
  },
  {
    "path": ".github/workflows/release.yaml",
    "content": "name: Release and Build Docker Image\n\npermissions:\n  contents: write\n\non:\n  push:\n    tags:\n      - \"*\"\n\njobs:\n  release-pypi:\n    name: Build and Release PyPI\n    runs-on: ubuntu-latest\n\n    steps:\n      - uses: actions/checkout@v3\n        with:\n          fetch-depth: 0\n\n      - uses: actions/setup-python@v4\n        with:\n          python-version: \"3.10\"\n\n      - uses: actions/setup-node@v3\n        with:\n          node-version: 16\n\n      - name: Build artifacts\n        run: |\n          pip install build\n          python -m build\n\n      - uses: pypa/gh-action-pypi-publish@release/v1\n        with:\n          password: ${{ secrets.PYPI_API_TOKEN }}\n\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\npip-wheel-metadata/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n.idea/\n.DS_Store\ntest_books/\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n.python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n/test_books/*.epub\nlog/\n.chatgpt_cache.json\n# for user do not want to push\n*.srt\n*.txt\n*.bin\n*.epub\n\n# For markdown files in user directories\n.cursorrules\nbooks/\nprompts/\n.pdm-python\n"
  },
  {
    "path": "Dockerfile",
    "content": "FROM python:3.10-slim\n\nRUN apt-get update\n\nWORKDIR /app\n\nCOPY requirements.txt .\n\nRUN pip install -r /app/requirements.txt\n\nCOPY . .\n\nENTRYPOINT [\"python3\", \"make_book.py\"]\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2023 yihong\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "Makefile",
    "content": "SHELL := /bin/bash\n\nfmt:\n\t@echo \"Running formatter ...\"\n\tvenv/bin/black .\n\n.PHONY:tests\ntests:\n\t@echo \"Running tests ...\"\n\tvenv/bin/pytest tests/test_integration.py\n\nserve-docs:\n\tmkdocs serve\n"
  },
  {
    "path": "README-CN.md",
    "content": "# bilingual_book_maker\n\nbilingual_book_maker 是一个 AI 翻译工具，使用 ChatGPT 帮助用户制作多语言版本的 epub/txt/srt 文件和图书。该工具仅适用于翻译进入公共版权领域的 epub/txt 图书，不适用于有版权的书籍。请在使用之前阅读项目的 **[免责声明](./disclaimer.md)**。\n\n![image](https://user-images.githubusercontent.com/15976103/222317531-a05317c5-4eee-49de-95cd-04063d9539d9.png)\n\n## 准备\n\n1. ChatGPT or OpenAI token [^token]\n2. epub/txt books\n3. 能正常联网的环境或 proxy\n4. python3.8+\n\n## 快速开始\n\n本地放了一个 `test_books/animal_farm.epub` 给大家测试\n\n```shell\npip install -r requirements.txt\npython3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test\n或\npip install -U bbook_maker\nbbook --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test\n```\n\n## 翻译服务\n\n- 使用 `--openai_key` 指定 OpenAI API key，如果有多个可以用英文逗号分隔(xxx,xxx,xxx)，可以减少接口调用次数限制带来的错误。\n  或者，指定环境变量 `BBM_OPENAI_API_KEY` 来略过这个选项。\n- 默认用了 [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis) 模型，也就是 ChatGPT 正在使用的模型。\n\n* DeepL\n\n  使用 DeepL 封装的 api 进行翻译，需要付费。[DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) 来获得 token\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key}\n  ```\n\n* DeepL free\n\n  使用 DeepL free\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model deeplfree\n  ```\n\n* Claude\n\n  使用 [Claude](https://console.anthropic.com/docs) 模型进行翻译\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key}\n  ```\n\n* 谷歌翻译\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model google\n  ```\n\n* 彩云小译\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model caiyun --caiyun_key ${caiyun_key}\n  ```\n\n* Gemini\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model gemini --gemini_key ${gemini_key}\n  ```\n\n* Qwen\n\n  使用 [Qwen](https://www.aliyun.com/product/dashscope) 模型进行翻译，支持 qwen-mt-turbo 和 qwen-mt-plus 模型。\n\n  使用 `--source_lang` 指定源语言，留空为自动检测。\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --qwen_key ${qwen_key} --model qwen-mt-turbo --language \"Simplified Chinese\"\n  python3 make_book.py --book_name test_books/animal_farm.epub --qwen_key ${qwen_key} --model qwen-mt-plus --language \"Japanese\" --source_lang \"English\"\n  ```\n\n* 腾讯交互翻译\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model tencentransmart\n  ```\n\n* [xAI](https://x.ai)\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model xai --xai_key ${xai_key}\n  ```\n\n* [Ollama](https://github.com/ollama/ollama)\n\n  使用 [Ollama](https://github.com/ollama/ollama) 自托管模型进行翻译。\n  如果 ollama server 不运行在本地，使用 `--api_base http://x.x.x.x:port/v1` 指向 ollama server 地址\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --ollama_model ${ollama_model_name}\n  ```\n\n* [Groq](https://console.groq.com/keys)\n\n  GroqCloud 当前支持的模型可以查看[Supported Models](https://console.groq.com/docs/models)\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --groq_key [your_key] --model groq --model_list llama3-8b-8192\n  ```\n\n## 使用说明\n\n- 翻译完会生成一本 `{book_name}_bilingual.epub` 的双语书\n- 如果出现了错误或使用 `CTRL+C` 中断命令，不想接下来继续翻译了，会生成一本 `{book_name}_bilingual_temp.epub` 的书，直接改成你想要的名字就可以了\n\n## 参数说明\n\n- `--test`:\n\n  如果大家没付费可以加上这个先看看效果（有 limit 稍微有些慢）\n\n- `--language`: 指定目标语言\n\n  - 例如： `--language \"Simplified Chinese\"`，预设值为 `\"Simplified Chinese\"`.\n  - 请阅读 helper message 来查找可用的目标语言： `python make_book.py --help`\n\n- `--proxy`\n\n  方便中国大陆的用户在本地测试时使用代理，传入类似 `http://127.0.0.1:7890` 的字符串\n\n- `--resume`\n\n  手动中断后，加入命令可以从之前中断的位置继续执行。\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model google --resume\n  ```\n\n- `--translate-tags`\n\n  指定需要翻译的标签，使用逗号分隔多个标签。epub 由 html 文件组成，默认情况下，只翻译 `<p>` 中的内容。例如: `--translate-tags h1,h2,h3,p,div`\n\n- `--book_from`\n\n  选项指定电子阅读器类型（现在只有 kobo 可用），并使用 `--device_path` 指定挂载点。\n\n- `--api_base ${url}`\n\n  如果你遇到了墙需要用 Cloudflare Workers 替换 api_base 请使用 `--api_base ${url}` 来替换。\n  **请注意，此处你输入的 api 应该是'`https://xxxx/v1`'的字样，域名需要用引号包裹**\n\n- `--allow_navigable_strings`\n\n  如果你想要翻译电子书中的无标签字符串，可以使用 `--allow_navigable_strings` 参数，会将可遍历字符串加入翻译队列，**注意，在条件允许情况下，请寻找更规范的电子书**\n\n- `--prompt`\n\n  如果你想调整 prompt，你可以使用 `--prompt` 参数。有效的占位符包括 `{text}` 和 `{language}`。你可以用以下方式配置 prompt:\n\n  - 如果您不需要设置 `system` 角色，可以这样：`--prompt \"Translate {text} to {language}\"` 或者 `--prompt prompt_template_sample.txt`（示例文本文件可以在 [./prompt_template_sample.txt](./prompt_template_sample.txt) 找到）。\n\n  - 如果您需要设置 `system` 角色，可以使用以下方式配置：`--prompt '{\"user\":\"Translate {text} to {language}\", \"system\": \"You are a professional translator.\"}'`，或者 `--prompt prompt_template_sample.json`（示例 JSON 文件可以在 [./prompt_template_sample.json](./prompt_template_sample.json) 找到）。\n\n  - 你也可以用环境以下环境变量来配置 `system` 和 `user` 角色 prompt：`BBM_CHATGPTAPI_USER_MSG_TEMPLATE` 和 `BBM_CHATGPTAPI_SYS_MSG`。\n  该参数可以是提示模板字符串，也可以是模板 `.txt` 文件的路径。\n\n- `--batch_size`\n\n  指定批量翻译的行数(默认行数为 10，目前只对 txt 生效)\n\n- `--accumulated_num`:\n\n  达到累计token数开始进行翻译。gpt3.5将total_token限制为4090。\n  例如，如果您使用`--accumulation_num 1600`，则可能会输出2200个令牌，另外200个令牌用于系统指令（system_message）和用户指令（user_message），1600+2200+200 = 4000，所以token接近极限。你必须选择一个自己合适的值，我们无法在发送之前判断是否达到限制\n\n- `--use_context`:\n\n  prompts the model to create a three-paragraph summary. If it's the beginning of the translation, it will summarize the entire passage sent (the size depending on `--accumulated_num`).\n  For subsequent passages, it will amend the summary to include details from the most recent passage, creating a running one-paragraph context payload of the important details of the entire translated work. This improves consistency of flow and tone throughout the translation. This option is available for all ChatGPT-compatible models and Gemini models.\n\n  模型提示词将创建三段摘要。如果是翻译的开始，它将总结发送的整个段落（大小取决于`--accumulated_num`）。\n  对于后续的段落，它将修改摘要，以包括最近段落的细节，创建一个完整的段落上下文负载，包含整个翻译作品的重要细节。 这提高了整个翻译过程中的流畅性和语气的一致性。 此选项适用于所有ChatGPT兼容型号和Gemini型号。\n\n  - `--context_paragraph_limit`:\n\n    使用`--use_context`选项时，使用`--context_paragraph_limit`设置上下文段落数限制。\n\n- `--temperature`:\n\n  使用 `--temperature` 设置 `chatgptapi`/`gpt4`/`claude`模型的temperature值.\n  如 `--temperature 0.7`.\n\n- `--block_size`:\n\n  使用`--block_size`将多个段落合并到一个块中。这可能会提高准确性并加快处理速度，但可能会干扰原始格式。必须与`--single_translate`一起使用。\n  例如：`--block_size 5 --single_translate`。\n\n- `--single_translate`:\n\n  使用`--single_translate`只输出翻译后的图书，不创建双语版本。\n\n- `--translation_style`:\n\n  如: `--translation_style \"color: #808080; font-style: italic;\"`\n\n- `--retranslate \"$translated_filepath\" \"file_name_in_epub\" \"start_str\" \"end_str\"(optional)`:\n\n  - 重新翻译，从 start_str 到 end_str 的标记:\n\n  ```shell\n  python3 \"make_book.py\" --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which' 'This kind of thing is not a good symptom. Obviously'\n  ```\n\n  - 重新翻译, 从start_str 的标记开始:\n\n  ```shell\n  python3 \"make_book.py\" --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which'\n  ```\n\n### 示范用例\n\n**如果使用 `pip install bbook_maker` 以下命令都可以改成 `bbook args`**\n\n```shell\n# 如果你想快速测一下\npython3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test\n\n# 或翻译完整本书\npython3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --language zh-hans\n\n# Or translate the whole book using Gemini\npython3 make_book.py --book_name test_books/animal_farm.epub --gemini_key ${gemini_key} --model gemini\n\n# 指定环境变量来略过 --openai_key\nexport OPENAI_API_KEY=${your_api_key}\n\n# Use the DeepL model with Japanese\npython3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key} --language ja\n\n# Use the Claude model with Japanese\npython3 make_book.py --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key} --language ja\n\n# Use the CustomAPI model with Japanese\npython3 make_book.py --book_name test_books/animal_farm.epub --model customapi --custom_api ${custom_api} --language ja\n\n# Translate contents in <div> and <p>\npython3 make_book.py --book_name test_books/animal_farm.epub --translate-tags div,p\n\n# 修改prompt\npython3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.txt\n# 或者\npython3 make_book.py --book_name test_books/animal_farm.epub --prompt \"Please translate \\`{text}\\` to {language}\"\n# 翻译 kobo e-reader 中，來自 Rakuten Kobo 的书籍\npython3 make_book.py --book_from kobo --device_path /tmp/kobo\n\n# 翻译 txt 文件\npython3 make_book.py --book_name test_books/the_little_prince.txt --test\n# 聚合多行翻译 txt 文件\npython3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20\n\n\n# 使用彩云小译翻译(彩云api目前只支持: 简体中文 <-> 英文， 简体中文 <-> 日语)\n# 彩云提供了测试token（3975l6lr5pcbvidl6jl2）\n# 你可以参考这个教程申请自己的token (https://bobtranslate.com/service/translate/caiyun.html)\npython3 make_book.py --model caiyun --caiyun_key 3975l6lr5pcbvidl6jl2 --book_name test_books/animal_farm.epub\n# 可以在环境变量中设置BBM_CAIYUN_API_KEY，略过--openai_key\nexport BBM_CAIYUN_API_KEY=${your_api_key}\n```\n\n更加小白的示例\n\n```shell\npython3 make_book.py --book_name 'animal_farm.epub' --openai_key sk-XXXXX --api_base 'https://xxxxx/v1'\n\n# 有可能你不需要 python3 而是python\npython make_book.py --book_name 'animal_farm.epub' --openai_key sk-XXXXX --api_base 'https://xxxxx/v1'\n```\n\n[演示视频](https://www.bilibili.com/video/BV1XX4y1d75D/?t=0h07m08s)\n[演示视频 2](https://www.bilibili.com/video/BV1T8411c7iU/)\n\n使用 Azure OpenAI service\n\n```shell\npython3 make_book.py --book_name 'animal_farm.epub' --openai_key XXXXX --api_base 'https://example-endpoint.openai.azure.com' --deployment_id 'deployment-name'\n\n# Or python3 is not in your PATH\npython make_book.py --book_name 'animal_farm.epub' --openai_key XXXXX --api_base 'https://example-endpoint.openai.azure.com' --deployment_id 'deployment-name'\n```\n\n## 注意\n\n1. Free trail 的 API token 有所限制，如果想要更快的速度，可以考虑付费方案\n2. 欢迎提交 PR\n\n# 感谢\n\n- @[yetone](https://github.com/yetone)\n\n# 贡献\n\n- 任何 issue PR 都欢迎\n- Issue 中有些 TODO 没做的都可以选\n- 提交代码前请先执行 `black make_book.py` [^black]\n\n# 其它推荐项目\n\n- 书译 BookTranslator -> [Book Translator](https://www.booktranslator.app)\n\n## 赞赏\n\n谢谢就够了\n\n![image](https://user-images.githubusercontent.com/15976103/222407199-1ed8930c-13a8-402b-9993-aaac8ee84744.png)\n\n[^token]: https://platform.openai.com/account/api-keys\n[^black]: https://github.com/psf/black\n"
  },
  {
    "path": "README.md",
    "content": "**[中文](./README-CN.md) | English**\n[![litellm](https://img.shields.io/badge/%20%F0%9F%9A%85%20liteLLM-OpenAI%7CAzure%7CAnthropic%7CPalm%7CCohere%7CReplicate%7CHugging%20Face-blue?color=green)](https://github.com/BerriAI/litellm)\n\n# bilingual_book_maker\n\nThe bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt/srt/pdf files and books. This tool is exclusively designed for translating epub and other public domain works and is not intended for copyrighted works. Before using this tool, please review the project's **[disclaimer](./disclaimer.md)**.\n\n![image](https://user-images.githubusercontent.com/15976103/222317531-a05317c5-4eee-49de-95cd-04063d9539d9.png)\n\n## Supported Models\n\ngpt-5-mini, gpt-4, gpt-3.5-turbo, claude-2, palm, llama-2, azure-openai, command-nightly, gemini, qwen-mt-turbo, qwen-mt-plus\nFor using Non-OpenAI models, use class `liteLLM()` - liteLLM supports all models above.\nFind more info here for using liteLLM: https://github.com/BerriAI/litellm/blob/main/setup.py\n\n## Preparation\n\n1. ChatGPT or OpenAI token [^token]\n2. epub/txt/pdf books\n3. Environment with internet access or proxy\n4. Python 3.8+\n\n## Quick Start\n\nA sample book, `test_books/animal_farm.epub`, is provided for testing purposes.\n\n```shell\npip install -r requirements.txt\npython3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test\nOR\npip install -U bbook_maker\nbbook --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test\n```\n\n## Translate Service\n\n- Use `--openai_key` option to specify OpenAI API key. If you have multiple keys, separate them by commas (xxx,xxx,xxx) to reduce errors caused by API call limits.\n  Or, just set environment variable `BBM_OPENAI_API_KEY` instead.\n- A sample book, `test_books/animal_farm.epub`, is provided for testing purposes.\n- The default underlying model is [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis), which is used by ChatGPT currently. Use `--model gpt4` to change the underlying model to `GPT4`. You can also use `GPT4omini`.\n- Important to note that `gpt-4` is significantly more expensive than `gpt-4-turbo`, but to avoid bumping into rate limits, we automatically balance queries across `gpt-4-1106-preview`, `gpt-4`, `gpt-4-32k`, `gpt-4-0613`,`gpt-4-32k-0613`.\n- If you want to use a specific model alias with OpenAI (eg `gpt-4-1106-preview` or `gpt-3.5-turbo-0125`), you can use `--model openai --model_list gpt-4-1106-preview,gpt-3.5-turbo-0125`. `--model_list` takes a comma-separated list of model aliases.\n- If using chatgptapi, you can add `--use_context` to add a context paragraph to each passage sent to the model for translation (see below).\n\n* DeepL\n  Support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator) need pay to get the token\n\n  ```\n  python3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key}\n  ```\n\n* DeepL free\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model deeplfree\n  ```\n\n* [Claude](https://console.anthropic.com/docs)\n\n  Use [Claude](https://console.anthropic.com/docs) model to translate\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key}\n  ```\n\n* Google Translate\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model google\n  ```\n\n* Caiyun Translate\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model caiyun --caiyun_key ${caiyun_key}\n  ```\n\n* Gemini\n\n  Support Google [Gemini](https://aistudio.google.com/app/apikey) model, use `--model gemini` for Gemini Flash or `--model geminipro` for Gemini Pro.\n  If you want to use a specific model alias with Gemini (eg `gemini-1.5-flash-002` or `gemini-1.5-flash-8b-exp-0924`), you can use `--model gemini --model_list gemini-1.5-flash-002,gemini-1.5-flash-8b-exp-0924`. `--model_list` takes a comma-separated list of model aliases.\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model gemini --gemini_key ${gemini_key}\n  ```\n\n* Qwen\n\n  Support Alibaba Cloud [Qwen-MT](https://bailian.console.aliyun.com/) specialized translation model. Supports 92 languages with features like terminology intervention and translation memory.\n  Use `--model qwen-mt-turbo` for faster/cheaper translation, or `--model qwen-mt-plus` for higher quality.\n\n  Use `source_lang` to specify the source language explicitly, or leave it empty for auto-detection.\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --qwen_key ${qwen_key} --model qwen-mt-turbo --language \"Simplified Chinese\"\n  python3 make_book.py --book_name test_books/animal_farm.epub --qwen_key ${qwen_key} --model qwen-mt-plus --language \"Japanese\" --source_lang \"English\"\n  ```\n\n* [Tencent TranSmart](https://transmart.qq.com)\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model tencentransmart\n  ```\n\n* [xAI](https://x.ai)\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model xai --xai_key ${xai_key}\n  ```\n\n* [Ollama](https://github.com/ollama/ollama)\n\n  Support [Ollama](https://github.com/ollama/ollama) self-host models,\n  If ollama server is not running on localhost, use `--api_base http://x.x.x.x:port/v1` to point to the ollama server address\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --ollama_model ${ollama_model_name}\n  ```\n\n* [groq](https://console.groq.com/keys)\n\n  GroqCloud currently supports models: you can find from [Supported Models](https://console.groq.com/docs/models)\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --groq_key [your_key] --model groq --model_list llama3-8b-8192\n  ```\n\n## Use\n\n- Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated for EPUB inputs; for TXT/MD/SRT inputs a bilingual text (or subtitle) file named `${book_name}_bilingual.txt` (or `_bilingual.srt`) will be generated. For **PDF inputs** the tool will produce a bilingual `.txt` fallback and will also attempt to create `${book_name}_bilingual.epub` — if EPUB creation fails, the TXT fallback remains so you do not need to retranslate.\n- If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`, a temporary bilingual file (for example `{book_name}_bilingual_temp.epub` or `{book_name}_bilingual_temp.txt`) would be generated. You can simply rename it to any desired name.\n\n## Params\n\n- `--test`:\n\n  Use `--test` option to preview the result if you haven't paid for the service. Note that there is a limit and it may take some time.\n\n- `--language`:\n\n  Set the target language like `--language \"Simplified Chinese\"`. Default target language is `\"Simplified Chinese\"`.\n  Read available languages by helper message: `python make_book.py --help`\n\n- `--proxy`:\n\n  Use `--proxy` option to specify proxy server for internet access. Enter a string such as `http://127.0.0.1:7890`.\n\n- `--resume`:\n\n  Use `--resume` option to manually resume the process after an interruption.\n\n  ```shell\n  python3 make_book.py --book_name test_books/animal_farm.epub --model google --resume\n  ```\n\n- `--translate-tags`:\n\n  epub is made of html files. By default, we only translate contents in `<p>`.\n  Use `--translate-tags` to specify tags need for translation. Use comma to separate multiple tags.\n  For example: `--translate-tags h1,h2,h3,p,div`\n\n- `--book_from`:\n\n  Use `--book_from` option to specify e-reader type (Now only `kobo` is available), and use `--device_path` to specify the mounting point.\n\n- `--api_base`:\n\n  If you want to change api_base like using Cloudflare Workers, use `--api_base <URL>` to support it.\n  **Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**\n\n- `--allow_navigable_strings`:\n\n  If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**\n\n- `--prompt`:\n\n  To tweak the prompt, use the `--prompt` parameter. Valid placeholders for the `user` role template include `{text}` and `{language}`. It supports a few ways to configure the prompt:\n\n  - If you don't need to set the `system` role content, you can simply set it up like this: `--prompt \"Translate {text} to {language}.\"` or `--prompt prompt_template_sample.txt` (example of a text file can be found at [./prompt_template_sample.txt](./prompt_template_sample.txt)).\n\n  - If you need to set the `system` role content, you can use the following format: `--prompt '{\"user\":\"Translate {text} to {language}\", \"system\": \"You are a professional translator.\"}'` or `--prompt prompt_template_sample.json` (example of a JSON file can be found at [./prompt_template_sample.json](./prompt_template_sample.json)).\n  \n  - You can now use [PromptDown](https://github.com/btfranklin/promptdown) format (`.md` files) for more structured prompts: `--prompt prompt_md.prompt.md`. PromptDown supports both traditional system messages and developer messages (used by newer AI models). Example:\n  \n      ```markdown\n      # Translation Prompt\n      \n      ## Developer Message\n      You are a professional translator who specializes in accurate translations.\n      \n      ## Conversation\n      \n      | Role | Content                                                        |\n      | ---- | -------------------------------------------------------------- |\n      | User | Please translate the following text into {language}:\\n\\n{text} |\n      ```\n\n  - You can also set the `user` and `system` role prompt by setting environment variables: `BBM_CHATGPTAPI_USER_MSG_TEMPLATE` and `BBM_CHATGPTAPI_SYS_MSG`.\n\n- `--batch_size`:\n\n  Use the `--batch_size` parameter to specify the number of lines for batch translation (default is 10, currently only effective for txt files).\n\n- `--accumulated_num`:\n\n  Wait for how many tokens have been accumulated before starting the translation. gpt3.5 limits the total_token to 4090. For example, if you use `--accumulated_num 1600`, maybe openai will output 2200 tokens and maybe 200 tokens for other messages in the system messages user messages, 1600+2200+200=4000, So you are close to reaching the limit. You have to choose your own\n  value, there is no way to know if the limit is reached before sending\n\n- `--use_context`:\n\n  prompts the model to create a three-paragraph summary. If it's the beginning of the translation, it will summarize the entire passage sent (the size depending on `--accumulated_num`).\n  For subsequent passages, it will amend the summary to include details from the most recent passage, creating a running one-paragraph context payload of the important details of the entire translated work. This improves consistency of flow and tone throughout the translation. This option is available for all ChatGPT-compatible models and Gemini models.\n\n- `--context_paragraph_limit`:\n\n  Use `--context_paragraph_limit` to set a limit on the number of context paragraphs when using the `--use_context` option.\n\n- `--parallel-workers`:\n\n  Use `--parallel-workers` to enable parallel EPUB chapter processing. Values greater than `1` spin up multiple workers (recommended: `2-4`) and automatically fall back to sequential mode for single-chapter books.\n\n- `--temperature`:\n\n  Use `--temperature` to set the temperature parameter for `chatgptapi`/`gpt4`/`claude` models.\n  For example: `--temperature 0.7`.\n\n- `--block_size`:\n\n  Use `--block_size` to merge multiple paragraphs into one block. This may increase accuracy and speed up the process but can disturb the original format. Must be used with `--single_translate`.\n  For example: `--block_size 5 --single_translate`.\n\n- `--single_translate`:\n\n  Use `--single_translate` to output only the translated book without creating a bilingual version.\n\n- `--translation_style`:\n\n  example: `--translation_style \"color: #808080; font-style: italic;\"`\n\n- `--retranslate \"$translated_filepath\" \"file_name_in_epub\" \"start_str\" \"end_str\"(optional)`:\n\n  Retranslate from start_str to end_str's tag:\n\n  ```shell\n  python3 \"make_book.py\" --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which' 'This kind of thing is not a good symptom. Obviously'\n  ```\n\n  Retranslate start_str's tag:\n\n  ```shell\n  python3 \"make_book.py\" --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which'\n  ```\n\n### Examples\n\n**Note if use `pip install bbook_maker` all commands can change to `bbook_maker args`**\n\n```shell\n# Test quickly\npython3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key}  --test --language zh-hans\n\n# Test quickly for src\npython3 make_book.py --book_name test_books/Lex_Fridman_episode_322.srt --openai_key ${openai_key}  --test\n\n# Or translate the whole book\npython3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --language zh-hans\n\n# Or translate the whole book using Gemini flash\npython3 make_book.py --book_name test_books/animal_farm.epub --gemini_key ${gemini_key} --model gemini\n\n# Translate an EPUB with parallel chapter processing\npython3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --parallel-workers 4\n\n# Use a specific list of Gemini model aliases\npython3 make_book.py --book_name test_books/animal_farm.epub --gemini_key ${gemini_key} --model gemini --model_list gemini-1.5-flash-002,gemini-1.5-flash-8b-exp-0924\n\n# Set env OPENAI_API_KEY to ignore option --openai_key\nexport OPENAI_API_KEY=${your_api_key}\n\n# Use the GPT-4 model with context to Japanese\npython3 make_book.py --book_name test_books/animal_farm.epub --model gpt4 --use_context --language ja\n\n# Use a specific OpenAI model alias\npython3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list gpt-4-1106-preview --openai_key ${openai_key}\n\n**Note** you can use other `openai like` model in this way\npython3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list yi-34b-chat-0205 --openai_key ${openai_key} --api_base \"https://api.lingyiwanwu.com/v1\"\n\n# Use a specific list of OpenAI model aliases\npython3 make_book.py --book_name test_books/animal_farm.epub --model openai --model_list gpt-4-1106-preview,gpt-4-0125-preview,gpt-3.5-turbo-0125 --openai_key ${openai_key}\n\n# Use the DeepL model with Japanese\npython3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key} --language ja\n\n# Use the Claude model with Japanese\npython3 make_book.py --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key} --language ja\n\n# Use the CustomAPI model with Japanese\npython3 make_book.py --book_name test_books/animal_farm.epub --model customapi --custom_api ${custom_api} --language ja\n\n# Translate contents in <div> and <p>\npython3 make_book.py --book_name test_books/animal_farm.epub --translate-tags div,p\n\n# Tweaking the prompt\npython3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.txt\n# or\npython3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.json\n# or\npython3 make_book.py --book_name test_books/animal_farm.epub --prompt \"Please translate \\`{text}\\` to {language}\"\n\n# Translate books download from Rakuten Kobo on kobo e-reader\npython3 make_book.py --book_from kobo --device_path /tmp/kobo\n\n# translate txt file\npython3 make_book.py --book_name test_books/the_little_prince.txt --test --language zh-hans\n# aggregated translation txt file\npython3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20\n\n# Using Caiyun model to translate\n# (the api currently only support: simplified chinese <-> english, simplified chinese <-> japanese)\n# the official Caiyun has provided a test token (3975l6lr5pcbvidl6jl2)\n# you can apply your own token by following this tutorial(https://bobtranslate.com/service/translate/caiyun.html)\npython3 make_book.py --model caiyun --caiyun_key 3975l6lr5pcbvidl6jl2 --book_name test_books/animal_farm.epub\n\n\n# Set env BBM_CAIYUN_API_KEY to ignore option --openai_key\nexport BBM_CAIYUN_API_KEY=${your_api_key}\n\n```\n\nMore understandable example\n\n```shell\npython3 make_book.py --book_name 'animal_farm.epub' --openai_key sk-XXXXX --api_base 'https://xxxxx/v1'\n\n# Or python3 is not in your PATH\npython make_book.py --book_name 'animal_farm.epub' --openai_key sk-XXXXX --api_base 'https://xxxxx/v1'\n```\n\nMicrosoft Azure Endpoints\n\n```shell\npython3 make_book.py --book_name 'animal_farm.epub' --openai_key XXXXX --api_base 'https://example-endpoint.openai.azure.com' --deployment_id 'deployment-name'\n\n# Or python3 is not in your PATH\npython make_book.py --book_name 'animal_farm.epub' --openai_key XXXXX --api_base 'https://example-endpoint.openai.azure.com' --deployment_id 'deployment-name'\n```\n\n## Docker\n\nYou can use [Docker](https://www.docker.com/) if you don't want to deal with setting up the environment.\n\n```shell\n# Build image\ndocker build --tag bilingual_book_maker .\n\n# Run container\n# \"$folder_path\" represents the folder where your book file locates. Also, it is where the processed file will be stored.\n\n# Windows PowerShell\n$folder_path=your_folder_path # $folder_path=\"C:\\Users\\user\\mybook\\\"\n$book_name=your_book_name # $book_name=\"animal_farm.epub\"\n$openai_key=your_api_key # $openai_key=\"sk-xxx\"\n$language=your_language # see utils.py\n\ndocker run --rm --name bilingual_book_maker --mount type=bind,source=$folder_path,target='/app/test_books' bilingual_book_maker --book_name \"/app/test_books/$book_name\" --openai_key $openai_key --language $language\n\n# Linux\nexport folder_path=${your_folder_path}\nexport book_name=${your_book_name}\nexport openai_key=${your_api_key}\nexport language=${your_language}\n\ndocker run --rm --name bilingual_book_maker --mount type=bind,source=${folder_path},target='/app/test_books' bilingual_book_maker --book_name \"/app/test_books/${book_name}\" --openai_key ${openai_key} --language \"${language}\"\n```\n\nFor example:\n\n```shell\n# Linux\ndocker run --rm --name bilingual_book_maker --mount type=bind,source=/home/user/my_books,target='/app/test_books' bilingual_book_maker --book_name /app/test_books/animal_farm.epub --openai_key sk-XXX --test --test_num 1 --language zh-hant\n```\n\n## Notes\n\n1. API token from free trial has limit. If you want to speed up the process, consider paying for the service or use multiple OpenAI tokens\n2. PR is welcome\n\n# Thanks\n\n- @[yetone](https://github.com/yetone)\n\n# Contribution\n\n- Any issues or PRs are welcome.\n- TODOs in the issue can also be selected.\n- Please run `black make_book.py`[^black] before submitting the code.\n\n# Others better\n\n- 书译 BookTranslator -> [Book Translator](https://www.booktranslator.app)\n\n## Appreciation\n\nThank you, that's enough.\n\n![image](https://user-images.githubusercontent.com/15976103/222407199-1ed8930c-13a8-402b-9993-aaac8ee84744.png)\n\n[^token]: https://platform.openai.com/account/api-keys\n[^black]: https://github.com/psf/black\n"
  },
  {
    "path": "book_maker/__init__.py",
    "content": ""
  },
  {
    "path": "book_maker/__main__.py",
    "content": "from cli import main\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "book_maker/cli.py",
    "content": "import argparse\nimport json\nimport os\nfrom os import environ as env\n\nfrom book_maker.loader import BOOK_LOADER_DICT\nfrom book_maker.translator import MODEL_DICT\nfrom book_maker.utils import LANGUAGES, TO_LANGUAGE_CODE\n\n\ndef parse_prompt_arg(prompt_arg):\n    prompt = None\n    if prompt_arg is None:\n        return prompt\n\n    # Check if it's a path to a markdown file (PromptDown format)\n    if prompt_arg.endswith(\".md\") and os.path.exists(prompt_arg):\n        try:\n            from promptdown import StructuredPrompt\n\n            structured_prompt = StructuredPrompt.from_promptdown_file(prompt_arg)\n\n            # Initialize our prompt structure\n            prompt = {}\n\n            # Handle developer_message or system_message\n            # Developer message takes precedence if both are present\n            if (\n                hasattr(structured_prompt, \"developer_message\")\n                and structured_prompt.developer_message\n            ):\n                prompt[\"system\"] = structured_prompt.developer_message\n            elif (\n                hasattr(structured_prompt, \"system_message\")\n                and structured_prompt.system_message\n            ):\n                prompt[\"system\"] = structured_prompt.system_message\n\n            # Extract user message from conversation\n            if (\n                hasattr(structured_prompt, \"conversation\")\n                and structured_prompt.conversation\n            ):\n                for message in structured_prompt.conversation:\n                    if message.role.lower() == \"user\":\n                        prompt[\"user\"] = message.content\n                        break\n\n            # Ensure we found a user message\n            if \"user\" not in prompt or not prompt[\"user\"]:\n                raise ValueError(\n                    \"PromptDown file must contain at least one user message\"\n                )\n\n            print(f\"Successfully loaded PromptDown file: {prompt_arg}\")\n\n            # Validate required placeholders\n            if any(c not in prompt[\"user\"] for c in [\"{text}\"]):\n                raise ValueError(\n                    \"User message in PromptDown must contain `{text}` placeholder\"\n                )\n\n            return prompt\n        except Exception as e:\n            print(f\"Error parsing PromptDown file: {e}\")\n            # Fall through to other parsing methods\n\n    # Existing parsing logic for JSON strings and other formats\n    if not any(prompt_arg.endswith(ext) for ext in [\".json\", \".txt\", \".md\"]):\n        try:\n            # user can define prompt by passing a json string\n            # eg: --prompt '{\"system\": \"You are a professional translator who translates computer technology books\", \"user\": \"Translate \\`{text}\\` to {language}\"}'\n            prompt = json.loads(prompt_arg)\n        except json.JSONDecodeError:\n            # if not a json string, treat it as a template string\n            prompt = {\"user\": prompt_arg}\n\n    elif os.path.exists(prompt_arg):\n        if prompt_arg.endswith(\".txt\"):\n            # if it's a txt file, treat it as a template string\n            with open(prompt_arg, encoding=\"utf-8\") as f:\n                prompt = {\"user\": f.read()}\n        elif prompt_arg.endswith(\".json\"):\n            # if it's a json file, treat it as a json object\n            # eg: --prompt prompt_template_sample.json\n            with open(prompt_arg, encoding=\"utf-8\") as f:\n                prompt = json.load(f)\n    else:\n        raise FileNotFoundError(f\"{prompt_arg} not found\")\n\n    # if prompt is None or any(c not in prompt[\"user\"] for c in [\"{text}\", \"{language}\"]):\n    if prompt is None or any(c not in prompt[\"user\"] for c in [\"{text}\"]):\n        raise ValueError(\"prompt must contain `{text}`\")\n\n    if \"user\" not in prompt:\n        raise ValueError(\"prompt must contain the key of `user`\")\n\n    if (prompt.keys() - {\"user\", \"system\"}) != set():\n        raise ValueError(\"prompt can only contain the keys of `user` and `system`\")\n\n    print(\"prompt config:\", prompt)\n    return prompt\n\n\ndef main():\n    translate_model_list = list(MODEL_DICT.keys())\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        \"--book_name\",\n        dest=\"book_name\",\n        type=str,\n        help=\"path of the epub file to be translated\",\n    )\n    parser.add_argument(\n        \"--book_from\",\n        dest=\"book_from\",\n        type=str,\n        choices=[\"kobo\"],  # support kindle later\n        metavar=\"E-READER\",\n        help=\"e-reader type, available: {%(choices)s}\",\n    )\n    parser.add_argument(\n        \"--device_path\",\n        dest=\"device_path\",\n        type=str,\n        help=\"Path of e-reader device\",\n    )\n    ########## KEYS ##########\n    parser.add_argument(\n        \"--openai_key\",\n        dest=\"openai_key\",\n        type=str,\n        default=\"\",\n        help=\"OpenAI api key,if you have more than one key, please use comma\"\n        \" to split them to go beyond the rate limits\",\n    )\n    parser.add_argument(\n        \"--caiyun_key\",\n        dest=\"caiyun_key\",\n        type=str,\n        help=\"you can apply caiyun key from here (https://dashboard.caiyunapp.com/user/sign_in/)\",\n    )\n    parser.add_argument(\n        \"--deepl_key\",\n        dest=\"deepl_key\",\n        type=str,\n        help=\"you can apply deepl key from here (https://rapidapi.com/splintPRO/api/dpl-translator\",\n    )\n    parser.add_argument(\n        \"--claude_key\",\n        dest=\"claude_key\",\n        type=str,\n        help=\"you can find claude key from here (https://console.anthropic.com/account/keys)\",\n    )\n\n    parser.add_argument(\n        \"--custom_api\",\n        dest=\"custom_api\",\n        type=str,\n        help=\"you should build your own translation api\",\n    )\n\n    # for Google Gemini\n    parser.add_argument(\n        \"--gemini_key\",\n        dest=\"gemini_key\",\n        type=str,\n        help=\"You can get Gemini Key from  https://makersuite.google.com/app/apikey\",\n    )\n\n    # for Groq\n    parser.add_argument(\n        \"--groq_key\",\n        dest=\"groq_key\",\n        type=str,\n        help=\"You can get Groq Key from  https://console.groq.com/keys\",\n    )\n\n    # for xAI\n    parser.add_argument(\n        \"--xai_key\",\n        dest=\"xai_key\",\n        type=str,\n        help=\"You can get xAI Key from  https://console.x.ai/\",\n    )\n\n    # for Qwen\n    parser.add_argument(\n        \"--qwen_key\",\n        dest=\"qwen_key\",\n        type=str,\n        help=\"You can get Qwen Key from  https://bailian.console.aliyun.com/?tab=model#/api-key\",\n    )\n\n    parser.add_argument(\n        \"--test\",\n        dest=\"test\",\n        action=\"store_true\",\n        help=\"only the first 10 paragraphs will be translated, for testing\",\n    )\n    parser.add_argument(\n        \"--test_num\",\n        dest=\"test_num\",\n        type=int,\n        default=10,\n        help=\"how many paragraphs will be translated for testing\",\n    )\n    parser.add_argument(\n        \"-m\",\n        \"--model\",\n        dest=\"model\",\n        type=str,\n        default=\"chatgptapi\",\n        choices=translate_model_list,  # support DeepL later\n        metavar=\"MODEL\",\n        help=\"model to use, available: {%(choices)s}\",\n    )\n    parser.add_argument(\n        \"--ollama_model\",\n        dest=\"ollama_model\",\n        type=str,\n        default=\"\",\n        metavar=\"MODEL\",\n        help=\"use ollama\",\n    )\n    parser.add_argument(\n        \"--language\",\n        type=str,\n        choices=sorted(LANGUAGES.keys())\n        + sorted([k.title() for k in TO_LANGUAGE_CODE]),\n        default=\"zh-hans\",\n        metavar=\"LANGUAGE\",\n        help=\"language to translate to, available: {%(choices)s}\",\n    )\n    parser.add_argument(\n        \"--resume\",\n        dest=\"resume\",\n        action=\"store_true\",\n        help=\"if program stop unexpected you can use this to resume\",\n    )\n    parser.add_argument(\n        \"-p\",\n        \"--proxy\",\n        dest=\"proxy\",\n        type=str,\n        default=\"\",\n        help=\"use proxy like http://127.0.0.1:7890\",\n    )\n    parser.add_argument(\n        \"--deployment_id\",\n        dest=\"deployment_id\",\n        type=str,\n        help=\"the deployment name you chose when you deployed the model\",\n    )\n    # args to change api_base\n    parser.add_argument(\n        \"--api_base\",\n        metavar=\"API_BASE_URL\",\n        dest=\"api_base\",\n        type=str,\n        help=\"specify base url other than the OpenAI's official API address\",\n    )\n    parser.add_argument(\n        \"--exclude_filelist\",\n        dest=\"exclude_filelist\",\n        type=str,\n        default=\"\",\n        help=\"if you have more than one file to exclude, please use comma to split them, example: --exclude_filelist 'nav.xhtml,cover.xhtml'\",\n    )\n    parser.add_argument(\n        \"--only_filelist\",\n        dest=\"only_filelist\",\n        type=str,\n        default=\"\",\n        help=\"if you only have a few files with translations, please use comma to split them, example: --only_filelist 'nav.xhtml,cover.xhtml'\",\n    )\n    parser.add_argument(\n        \"--translate-tags\",\n        dest=\"translate_tags\",\n        type=str,\n        default=\"p\",\n        help=\"example --translate-tags p,blockquote\",\n    )\n    parser.add_argument(\n        \"--exclude_translate-tags\",\n        dest=\"exclude_translate_tags\",\n        type=str,\n        default=\"sup\",\n        help=\"example --exclude_translate-tags table,sup\",\n    )\n    parser.add_argument(\n        \"--allow_navigable_strings\",\n        dest=\"allow_navigable_strings\",\n        action=\"store_true\",\n        default=False,\n        help=\"allow NavigableStrings to be translated\",\n    )\n    parser.add_argument(\n        \"--prompt\",\n        dest=\"prompt_arg\",\n        type=str,\n        metavar=\"PROMPT_ARG\",\n        help=\"used for customizing the prompt. It can be the prompt template string, or a path to the template file. The valid placeholders are `{text}` and `{language}`.\",\n    )\n    parser.add_argument(\n        \"--accumulated_num\",\n        dest=\"accumulated_num\",\n        type=int,\n        default=1,\n        help=\"\"\"Wait for how many tokens have been accumulated before starting the translation.\ngpt3.5 limits the total_token to 4090.\nFor example, if you use --accumulated_num 1600, maybe openai will output 2200 tokens\nand maybe 200 tokens for other messages in the system messages user messages, 1600+2200+200=4000,\nSo you are close to reaching the limit. You have to choose your own value, there is no way to know if the limit is reached before sending\n\"\"\",\n    )\n    parser.add_argument(\n        \"--translation_style\",\n        dest=\"translation_style\",\n        type=str,\n        help=\"\"\"ex: --translation_style \"color: #808080; font-style: italic;\" \"\"\",\n    )\n    parser.add_argument(\n        \"--batch_size\",\n        dest=\"batch_size\",\n        type=int,\n        help=\"how many lines will be translated by aggregated translation(This options currently only applies to txt files)\",\n    )\n    parser.add_argument(\n        \"--retranslate\",\n        dest=\"retranslate\",\n        nargs=4,\n        type=str,\n        help=\"\"\"--retranslate \"$translated_filepath\" \"file_name_in_epub\" \"start_str\" \"end_str\"(optional)\n        Retranslate from start_str to end_str's tag:\n        python3 \"make_book.py\" --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which' 'This kind of thing is not a good symptom. Obviously'\n        Retranslate start_str's tag:\n        python3 \"make_book.py\" --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which'\n\"\"\",\n    )\n    parser.add_argument(\n        \"--single_translate\",\n        action=\"store_true\",\n        help=\"output translated book, no bilingual\",\n    )\n    parser.add_argument(\n        \"--use_context\",\n        dest=\"context_flag\",\n        action=\"store_true\",\n        help=\"adds an additional paragraph for global, updating historical context of the story to the model's input, improving the narrative consistency for the AI model (this uses ~200 more tokens each time)\",\n    )\n    parser.add_argument(\n        \"--context_paragraph_limit\",\n        dest=\"context_paragraph_limit\",\n        type=int,\n        default=0,\n        help=\"if use --use_context, set context paragraph limit\",\n    )\n    parser.add_argument(\n        \"--temperature\",\n        type=float,\n        default=1.0,\n        help=\"temperature parameter for `chatgptapi`/`gpt4`/`gpt4omini`/`gpt4o`/`gpt5mini`/`claude`/`gemini`\",\n    )\n    parser.add_argument(\n        \"--source_lang\",\n        type=str,\n        default=\"auto\",\n        help=\"source language for translation models like `qwen` (default: auto-detect)\",\n    )\n    parser.add_argument(\n        \"--block_size\",\n        type=int,\n        default=-1,\n        help=\"merge multiple paragraphs into one block, may increase accuracy and speed up the process, but disturb the original format, must be used with `--single_translate`\",\n    )\n    parser.add_argument(\n        \"--model_list\",\n        type=str,\n        dest=\"model_list\",\n        help=\"Rather than using our preset lists of models, specify exactly the models you want as a comma separated list `gpt-4-32k,gpt-3.5-turbo-0125` (Currently only supports: `openai`)\",\n    )\n    parser.add_argument(\n        \"--batch\",\n        dest=\"batch_flag\",\n        action=\"store_true\",\n        help=\"Enable batch translation using ChatGPT's batch API for improved efficiency\",\n    )\n    parser.add_argument(\n        \"--batch-use\",\n        dest=\"batch_use_flag\",\n        action=\"store_true\",\n        help=\"Use pre-generated batch translations to create files. Run with --batch first before using this option\",\n    )\n    parser.add_argument(\n        \"--interval\",\n        type=float,\n        default=0.01,\n        help=\"Request interval in seconds (e.g., 0.1 for 100ms). Currently only supported for Gemini models. Default: 0.01\",\n    )\n    parser.add_argument(\n        \"--parallel-workers\",\n        dest=\"parallel_workers\",\n        type=int,\n        default=1,\n        help=\"Number of parallel workers for EPUB chapter processing. Use 2-4 for better performance. Default: 1\",\n    )\n\n    options = parser.parse_args()\n\n    if not options.book_name:\n        print(\"Error: please provide the path of your book using --book_name <path>\")\n        exit(1)\n    if not os.path.isfile(options.book_name):\n        print(f\"Error: the book {options.book_name!r} does not exist.\")\n        exit(1)\n\n    PROXY = options.proxy\n    if PROXY != \"\":\n        os.environ[\"http_proxy\"] = PROXY\n        os.environ[\"https_proxy\"] = PROXY\n\n    translate_model = MODEL_DICT.get(options.model)\n    assert translate_model is not None, \"unsupported model\"\n    API_KEY = \"\"\n    if options.model in [\n        \"openai\",\n        \"chatgptapi\",\n        \"gpt4\",\n        \"gpt4omini\",\n        \"gpt4o\",\n        \"gpt5mini\",\n        \"o1preview\",\n        \"o1\",\n        \"o1mini\",\n        \"o3mini\",\n    ]:\n        if OPENAI_API_KEY := (\n            options.openai_key\n            or env.get(\n                \"OPENAI_API_KEY\",\n            )  # XXX: for backward compatibility, deprecate soon\n            or env.get(\n                \"BBM_OPENAI_API_KEY\",\n            )  # suggest adding `BBM_` prefix for all the bilingual_book_maker ENVs.\n        ):\n            API_KEY = OPENAI_API_KEY\n            # patch\n        elif options.ollama_model:\n            # any string is ok, can't be empty\n            API_KEY = \"ollama\"\n        else:\n            raise Exception(\n                \"OpenAI API key not provided, please google how to obtain it\",\n            )\n    elif options.model == \"caiyun\":\n        API_KEY = options.caiyun_key or env.get(\"BBM_CAIYUN_API_KEY\")\n        if not API_KEY:\n            raise Exception(\"Please provide caiyun key\")\n    elif options.model == \"deepl\":\n        API_KEY = options.deepl_key or env.get(\"BBM_DEEPL_API_KEY\")\n        if not API_KEY:\n            raise Exception(\"Please provide deepl key\")\n    elif options.model.startswith(\"claude\"):\n        API_KEY = options.claude_key or env.get(\"BBM_CLAUDE_API_KEY\")\n        if not API_KEY:\n            raise Exception(\"Please provide claude key\")\n    elif options.model == \"customapi\":\n        API_KEY = options.custom_api or env.get(\"BBM_CUSTOM_API\")\n        if not API_KEY:\n            raise Exception(\"Please provide custom translate api\")\n    elif options.model in [\"gemini\", \"geminipro\"]:\n        API_KEY = options.gemini_key or env.get(\"BBM_GOOGLE_GEMINI_KEY\")\n    elif options.model == \"groq\":\n        API_KEY = options.groq_key or env.get(\"BBM_GROQ_API_KEY\")\n    elif options.model == \"xai\":\n        API_KEY = options.xai_key or env.get(\"BBM_XAI_API_KEY\")\n    elif options.model.startswith(\"qwen-\"):\n        API_KEY = options.qwen_key or env.get(\"BBM_QWEN_API_KEY\")\n    else:\n        API_KEY = \"\"\n\n    if options.book_from == \"kobo\":\n        from book_maker import obok\n\n        device_path = options.device_path\n        if device_path is None:\n            raise Exception(\n                \"Device path is not given, please specify the path by --device_path <DEVICE_PATH>\",\n            )\n        options.book_name = obok.cli_main(device_path)\n\n    book_type = options.book_name.split(\".\")[-1]\n    support_type_list = list(BOOK_LOADER_DICT.keys())\n    if book_type not in support_type_list:\n        raise Exception(\n            f\"now only support files of these formats: {','.join(support_type_list)}\",\n        )\n\n    if options.block_size > 0 and not options.single_translate:\n        raise Exception(\n            \"block_size must be used with `--single_translate` because it disturbs the original format\",\n        )\n\n    book_loader = BOOK_LOADER_DICT.get(book_type)\n    assert book_loader is not None, \"unsupported loader\"\n    language = options.language\n    if options.language in LANGUAGES:\n        # use the value for prompt\n        language = LANGUAGES.get(language, language)\n\n    # change api_base for issue #42\n    model_api_base = options.api_base\n\n    if options.ollama_model and not model_api_base:\n        # ollama default api_base\n        model_api_base = \"http://localhost:11434/v1\"\n\n    e = book_loader(\n        options.book_name,\n        translate_model,\n        API_KEY,\n        options.resume,\n        language=language,\n        model_api_base=model_api_base,\n        is_test=options.test,\n        test_num=options.test_num,\n        prompt_config=parse_prompt_arg(options.prompt_arg),\n        single_translate=options.single_translate,\n        context_flag=options.context_flag,\n        context_paragraph_limit=options.context_paragraph_limit,\n        temperature=options.temperature,\n        source_lang=options.source_lang,\n        parallel_workers=options.parallel_workers,\n    )\n    # other options\n    if options.allow_navigable_strings:\n        e.allow_navigable_strings = True\n    if options.translate_tags:\n        e.translate_tags = options.translate_tags\n    if options.exclude_translate_tags:\n        e.exclude_translate_tags = options.exclude_translate_tags\n    if options.exclude_filelist:\n        e.exclude_filelist = options.exclude_filelist\n    if options.only_filelist:\n        e.only_filelist = options.only_filelist\n    if options.accumulated_num > 1:\n        e.accumulated_num = options.accumulated_num\n    if options.translation_style:\n        e.translation_style = options.translation_style\n    if options.batch_size:\n        e.batch_size = options.batch_size\n    if options.retranslate:\n        e.retranslate = options.retranslate\n    if options.deployment_id:\n        # only work for ChatGPT api for now\n        # later maybe support others\n        assert options.model in [\n            \"chatgptapi\",\n            \"gpt4\",\n            \"gpt4omini\",\n            \"gpt4o\",\n            \"gpt5mini\",\n            \"o1\",\n            \"o1preview\",\n            \"o1mini\",\n            \"o3mini\",\n        ], \"only support chatgptapi for deployment_id\"\n        if not options.api_base:\n            raise ValueError(\"`api_base` must be provided when using `deployment_id`\")\n        e.translate_model.set_deployment_id(options.deployment_id)\n    if options.model in (\"openai\", \"groq\"):\n        # Currently only supports `openai` when you also have --model_list set\n        if options.model_list:\n            e.translate_model.set_model_list(options.model_list.split(\",\"))\n        else:\n            raise ValueError(\n                \"When using `openai` model, you must also provide `--model_list`. For default model sets use `--model chatgptapi` or `--model gpt4` or `--model gpt4omini` or `--model gpt5mini`\",\n            )\n    # TODO refactor, quick fix for gpt4 model\n    if options.model == \"chatgptapi\":\n        if options.ollama_model:\n            e.translate_model.set_gpt35_models(ollama_model=options.ollama_model)\n        else:\n            e.translate_model.set_gpt35_models()\n    if options.model == \"gpt4\":\n        e.translate_model.set_gpt4_models()\n    if options.model == \"gpt4omini\":\n        e.translate_model.set_gpt4omini_models()\n    if options.model == \"gpt4o\":\n        e.translate_model.set_gpt4o_models()\n    if options.model == \"gpt5mini\":\n        e.translate_model.set_gpt5mini_models()\n    if options.model == \"o1preview\":\n        e.translate_model.set_o1preview_models()\n    if options.model == \"o1\":\n        e.translate_model.set_o1_models()\n    if options.model == \"o1mini\":\n        e.translate_model.set_o1mini_models()\n    if options.model == \"o3mini\":\n        e.translate_model.set_o3mini_models()\n    if options.model.startswith(\"claude-\"):\n        e.translate_model.set_claude_model(options.model)\n    if options.model.startswith(\"qwen-\"):\n        e.translate_model.set_qwen_model(options.model)\n    if options.block_size > 0:\n        e.block_size = options.block_size\n    if options.batch_flag:\n        e.batch_flag = options.batch_flag\n    if options.batch_use_flag:\n        e.batch_use_flag = options.batch_use_flag\n\n    if options.model in (\"gemini\", \"geminipro\"):\n        e.translate_model.set_interval(options.interval)\n    if options.model == \"gemini\":\n        if options.model_list:\n            e.translate_model.set_model_list(options.model_list.split(\",\"))\n        else:\n            e.translate_model.set_geminiflash_models()\n    if options.model == \"geminipro\":\n        e.translate_model.set_geminipro_models()\n\n    e.make_bilingual_book()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "book_maker/config.py",
    "content": "config = {\n    \"translator\": {\n        \"chatgptapi\": {\n            \"context_paragraph_limit\": 3,\n            \"batch_context_update_interval\": 50,\n        }\n    },\n}\n"
  },
  {
    "path": "book_maker/loader/__init__.py",
    "content": "from book_maker.loader.epub_loader import EPUBBookLoader\nfrom book_maker.loader.txt_loader import TXTBookLoader\nfrom book_maker.loader.srt_loader import SRTBookLoader\nfrom book_maker.loader.md_loader import MarkdownBookLoader\nfrom book_maker.loader.pdf_loader import PDFBookLoader\n\nBOOK_LOADER_DICT = {\n    \"epub\": EPUBBookLoader,\n    \"txt\": TXTBookLoader,\n    \"srt\": SRTBookLoader,\n    \"md\": MarkdownBookLoader,\n    \"pdf\": PDFBookLoader,\n    # TODO add more here\n}\n"
  },
  {
    "path": "book_maker/loader/base_loader.py",
    "content": "from abc import ABC, abstractmethod\n\n\nclass BaseBookLoader(ABC):\n    @staticmethod\n    def _is_special_text(text):\n        return text.isdigit() or text.isspace()\n\n    @abstractmethod\n    def _make_new_book(self, book):\n        pass\n\n    @abstractmethod\n    def make_bilingual_book(self):\n        pass\n\n    @abstractmethod\n    def load_state(self):\n        pass\n\n    @abstractmethod\n    def _save_temp_book(self):\n        pass\n\n    @abstractmethod\n    def _save_progress(self):\n        pass\n"
  },
  {
    "path": "book_maker/loader/epub_loader.py",
    "content": "import os\nimport pickle\nimport string\nimport sys\nimport time\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\nfrom copy import copy\nfrom pathlib import Path\nimport traceback\nfrom threading import Lock\n\nfrom bs4 import BeautifulSoup as bs\nfrom bs4 import Tag\nfrom bs4.element import NavigableString\nfrom ebooklib import ITEM_DOCUMENT, epub\nfrom rich import print\nfrom tqdm import tqdm\n\nfrom book_maker.utils import num_tokens_from_text, prompt_config_to_kwargs\n\nfrom .base_loader import BaseBookLoader\nfrom .helper import EPUBBookLoaderHelper, is_text_link, not_trans\n\n\nclass EPUBBookLoader(BaseBookLoader):\n    def __init__(\n        self,\n        epub_name,\n        model,\n        key,\n        resume,\n        language,\n        model_api_base=None,\n        is_test=False,\n        test_num=5,\n        prompt_config=None,\n        single_translate=False,\n        context_flag=False,\n        context_paragraph_limit=0,\n        temperature=1.0,\n        source_lang=\"auto\",\n        parallel_workers=1,\n    ):\n        self.epub_name = epub_name\n        self.new_epub = epub.EpubBook()\n        self.translate_model = model(\n            key,\n            language,\n            api_base=model_api_base,\n            context_flag=context_flag,\n            context_paragraph_limit=context_paragraph_limit,\n            temperature=temperature,\n            source_lang=source_lang,\n            **prompt_config_to_kwargs(prompt_config),\n        )\n        self.is_test = is_test\n        self.test_num = test_num\n        self.translate_tags = \"p\"\n        self.exclude_translate_tags = \"sup\"\n        self.allow_navigable_strings = False\n        self.accumulated_num = 1\n        self.translation_style = \"\"\n        self.context_flag = context_flag\n        self.helper = EPUBBookLoaderHelper(\n            self.translate_model,\n            self.accumulated_num,\n            self.translation_style,\n            self.context_flag,\n        )\n        self.retranslate = None\n        self.exclude_filelist = \"\"\n        self.only_filelist = \"\"\n        self.single_translate = single_translate\n        self.block_size = -1\n        self.batch_use_flag = False\n        self.batch_flag = False\n        self.parallel_workers = 1\n        self.enable_parallel = False\n        self._progress_lock = Lock()\n        self._translation_index = 0\n        self.set_parallel_workers(parallel_workers)\n\n        # monkey patch for # 173\n        def _write_items_patch(obj):\n            for item in obj.book.get_items():\n                if isinstance(item, epub.EpubNcx):\n                    obj.out.writestr(\n                        \"%s/%s\" % (obj.book.FOLDER_NAME, item.file_name), obj._get_ncx()\n                    )\n                elif isinstance(item, epub.EpubNav):\n                    obj.out.writestr(\n                        \"%s/%s\" % (obj.book.FOLDER_NAME, item.file_name),\n                        obj._get_nav(item),\n                    )\n                elif item.manifest:\n                    obj.out.writestr(\n                        \"%s/%s\" % (obj.book.FOLDER_NAME, item.file_name), item.content\n                    )\n                else:\n                    obj.out.writestr(\"%s\" % item.file_name, item.content)\n\n        def _check_deprecated(obj):\n            pass\n\n        epub.EpubWriter._write_items = _write_items_patch\n        epub.EpubReader._check_deprecated = _check_deprecated\n\n        try:\n            self.origin_book = epub.read_epub(self.epub_name)\n        except Exception:\n            # tricky monkey patch for #71 if you don't know why please check the issue and ignore this\n            # when upstream change will TODO fix this\n            def _load_spine(obj):\n                spine = obj.container.find(\"{%s}%s\" % (epub.NAMESPACES[\"OPF\"], \"spine\"))\n\n                obj.book.spine = [\n                    (t.get(\"idref\"), t.get(\"linear\", \"yes\")) for t in spine\n                ]\n                obj.book.set_direction(spine.get(\"page-progression-direction\", None))\n\n            epub.EpubReader._load_spine = _load_spine\n            self.origin_book = epub.read_epub(self.epub_name)\n\n        self.p_to_save = []\n        self.resume = resume\n        self.bin_path = f\"{Path(epub_name).parent}/.{Path(epub_name).stem}.temp.bin\"\n        if self.resume:\n            self.load_state()\n\n    @staticmethod\n    def _is_special_text(text):\n        return (\n            text.isdigit()\n            or text.isspace()\n            or is_text_link(text)\n            or all(char in string.punctuation for char in text)\n        )\n\n    def _make_new_book(self, book):\n        new_book = epub.EpubBook()\n        allowed_ns = set(epub.NAMESPACES.keys()) | set(epub.NAMESPACES.values())\n\n        for namespace, metas in book.metadata.items():\n            # Only keep namespaces recognized by ebooklib\n            if namespace not in allowed_ns:\n                continue\n\n            if isinstance(metas, dict):\n                entries = (\n                    (name, value, others)\n                    for name, values in metas.items()\n                    for value, others in (\n                        (item if isinstance(item, tuple) else (item, None))\n                        for item in values\n                    )\n                )\n            else:\n                entries = metas\n\n            for entry in entries:\n                if not entry:\n                    continue\n\n                if isinstance(entry, tuple):\n                    if len(entry) == 3:\n                        name, value, others = entry\n                    elif len(entry) == 2:\n                        name, value = entry\n                        others = None\n                    else:\n                        continue\n                else:\n                    # Unexpected metadata format; skip gracefully\n                    continue\n\n                # `others` can be {} or None\n                if others:\n                    new_book.add_metadata(namespace, name, value, others)\n                else:\n                    new_book.add_metadata(namespace, name, value)\n\n        new_book.spine = book.spine\n        new_book.toc = self._fix_toc_uids(book.toc)\n        return new_book\n\n    def _fix_toc_uids(self, toc, counter=None):\n        \"\"\"Fix TOC items that have uid=None to prevent TypeError when writing NCX.\"\"\"\n        if counter is None:\n            counter = [0]  # Use list to allow mutation in nested calls\n\n        fixed_toc = []\n        for item in toc:\n            if isinstance(item, tuple):\n                # Section with sub-items: (Section, [sub-items])\n                section, sub_items = item\n                if hasattr(section, \"uid\") and section.uid is None:\n                    section.uid = f\"navpoint-{counter[0]}\"\n                    counter[0] += 1\n                fixed_sub_items = self._fix_toc_uids(sub_items, counter)\n                fixed_toc.append((section, fixed_sub_items))\n            elif hasattr(item, \"uid\"):\n                # Link or EpubHtml item\n                if item.uid is None:\n                    item.uid = f\"navpoint-{counter[0]}\"\n                    counter[0] += 1\n                fixed_toc.append(item)\n            else:\n                fixed_toc.append(item)\n\n        return fixed_toc\n\n    def _extract_paragraph(self, p):\n        for p_exclude in self.exclude_translate_tags.split(\",\"):\n            # for issue #280\n            if type(p) is NavigableString:\n                continue\n            for pt in p.find_all(p_exclude):\n                pt.extract()\n        return p\n\n    def _process_paragraph(self, p, new_p, index, p_to_save_len, thread_safe=False):\n        if self.resume and index < p_to_save_len:\n            p.string = self.p_to_save[index]\n            new_p.string = self.p_to_save[\n                index\n            ]  # Fix: also update new_p to cached translation\n        else:\n            t_text = \"\"\n            if self.batch_flag:\n                self.translate_model.add_to_batch_translate_queue(index, new_p.text)\n            elif self.batch_use_flag:\n                t_text = self.translate_model.batch_translate(index)\n            else:\n                t_text = self.translate_model.translate(new_p.text)\n            if t_text is None:\n                raise RuntimeError(\n                    \"`t_text` is None: your translation model is not working as expected. Please check your translation model configuration.\"\n                )\n            if type(p) is NavigableString:\n                new_p = t_text\n                self.p_to_save.append(new_p)\n            else:\n                new_p.string = t_text\n                self.p_to_save.append(new_p.text)\n\n        self.helper.insert_trans(\n            p, new_p.string, self.translation_style, self.single_translate\n        )\n        index += 1\n\n        if thread_safe:\n            with self._progress_lock:\n                if index % 20 == 0:\n                    self._save_progress()\n        else:\n            if index % 20 == 0:\n                self._save_progress()\n        return index\n\n    def _process_combined_paragraph(\n        self, p_block, index, p_to_save_len, thread_safe=False\n    ):\n        text = []\n\n        for p in p_block:\n            if self.resume and index < p_to_save_len:\n                p.string = self.p_to_save[index]\n            else:\n                p_text = p.text.rstrip()\n                text.append(p_text)\n\n            if self.is_test and index >= self.test_num:\n                break\n\n            index += 1\n\n        if len(text) > 0:\n            translated_text = self.translate_model.translate(\"\\n\".join(text))\n            translated_text = translated_text.split(\"\\n\")\n            text_len = len(translated_text)\n\n            for i in range(text_len):\n                t = translated_text[i]\n\n                if i >= len(p_block):\n                    p = p_block[-1]\n                else:\n                    p = p_block[i]\n\n                if type(p) is NavigableString:\n                    p = t\n                else:\n                    p.string = t\n\n                self.helper.insert_trans(\n                    p, p.string, self.translation_style, self.single_translate\n                )\n\n        if thread_safe:\n            with self._progress_lock:\n                self._save_progress()\n        else:\n            self._save_progress()\n        return index\n\n    def translate_paragraphs_acc(self, p_list, send_num):\n        count = 0\n        wait_p_list = []\n        for i in range(len(p_list)):\n            p = p_list[i]\n            print(f\"translating {i}/{len(p_list)}\")\n            temp_p = copy(p)\n\n            for p_exclude in self.exclude_translate_tags.split(\",\"):\n                # for issue #280\n                if type(p) is NavigableString:\n                    continue\n                for pt in temp_p.find_all(p_exclude):\n                    pt.extract()\n\n            if any(\n                [not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)]\n            ):\n                if i == len(p_list) - 1:\n                    self.helper.deal_old(wait_p_list, self.single_translate)\n                continue\n            length = num_tokens_from_text(temp_p.text)\n            if length > send_num:\n                self.helper.deal_new(p, wait_p_list, self.single_translate)\n                continue\n            if i == len(p_list) - 1:\n                if count + length < send_num:\n                    wait_p_list.append(p)\n                    self.helper.deal_old(wait_p_list, self.single_translate)\n                else:\n                    self.helper.deal_new(p, wait_p_list, self.single_translate)\n                break\n            if count + length < send_num:\n                count += length\n                wait_p_list.append(p)\n            else:\n                self.helper.deal_old(wait_p_list, self.single_translate)\n                wait_p_list.append(p)\n                count = length\n\n    def get_item(self, book, name):\n        for item in book.get_items():\n            if item.file_name == name:\n                return item\n\n    def find_items_containing_string(self, book, search_string):\n        matching_items = []\n\n        for item in book.get_items_of_type(ITEM_DOCUMENT):\n            content = item.get_content()\n            soup = bs(content, \"html.parser\")\n            if search_string in soup.get_text():\n                matching_items.append(item)\n\n        return matching_items\n\n    def retranslate_book(self, index, p_to_save_len, pbar, trans_taglist, retranslate):\n        complete_book_name = retranslate[0]\n        fixname = retranslate[1]\n        fixstart = retranslate[2]\n        fixend = retranslate[3]\n\n        if fixend == \"\":\n            fixend = fixstart\n\n        name_fix = complete_book_name\n\n        complete_book = epub.read_epub(complete_book_name)\n\n        if fixname == \"\":\n            fixname = self.find_items_containing_string(complete_book, fixstart)[\n                0\n            ].file_name\n            print(f\"auto find fixname: {fixname}\")\n\n        new_book = self._make_new_book(complete_book)\n\n        complete_item = self.get_item(complete_book, fixname)\n        if complete_item is None:\n            return\n\n        ori_item = self.get_item(self.origin_book, fixname)\n        if ori_item is None:\n            return\n\n        content_complete = complete_item.content\n        content_ori = ori_item.content\n        soup_complete = bs(content_complete, \"html.parser\")\n        soup_ori = bs(content_ori, \"html.parser\")\n\n        p_list_complete = soup_complete.findAll(trans_taglist)\n        p_list_ori = soup_ori.findAll(trans_taglist)\n\n        target = None\n        tagl = []\n\n        # extract from range\n        find_end = False\n        find_start = False\n        for tag in p_list_complete:\n            if find_end:\n                tagl.append(tag)\n                break\n\n            if fixend in tag.text:\n                find_end = True\n            if fixstart in tag.text:\n                find_start = True\n\n            if find_start:\n                if not target:\n                    target = tag.previous_sibling\n                tagl.append(tag)\n\n        for t in tagl:\n            t.extract()\n\n        flag = False\n        extract_p_list_ori = []\n        for p in p_list_ori:\n            if fixstart in p.text:\n                flag = True\n            if flag:\n                extract_p_list_ori.append(p)\n            if fixend in p.text:\n                break\n\n        for t in extract_p_list_ori:\n            if target:\n                target.insert_after(t)\n                target = t\n\n        for item in complete_book.get_items():\n            if item.file_name != fixname:\n                new_book.add_item(item)\n        if soup_complete:\n            complete_item.content = soup_complete.encode()\n\n        index = self.process_item(\n            complete_item,\n            index,\n            p_to_save_len,\n            pbar,\n            new_book,\n            trans_taglist,\n            fixstart,\n            fixend,\n        )\n        epub.write_epub(f\"{name_fix}\", new_book, {})\n\n    def has_nest_child(self, element, trans_taglist):\n        if isinstance(element, Tag):\n            for child in element.children:\n                if child.name in trans_taglist:\n                    return True\n                if self.has_nest_child(child, trans_taglist):\n                    return True\n        return False\n\n    def filter_nest_list(self, p_list, trans_taglist):\n        filtered_list = [p for p in p_list if not self.has_nest_child(p, trans_taglist)]\n        return filtered_list\n\n    def process_item(\n        self,\n        item,\n        index,\n        p_to_save_len,\n        pbar,\n        new_book,\n        trans_taglist,\n        fixstart=None,\n        fixend=None,\n    ):\n        if self.only_filelist != \"\" and item.file_name not in self.only_filelist.split(\n            \",\"\n        ):\n            return index\n        elif self.only_filelist == \"\" and item.file_name in self.exclude_filelist.split(\n            \",\"\n        ):\n            new_book.add_item(item)\n            return index\n\n        if not os.path.exists(\"log\"):\n            os.makedirs(\"log\")\n\n        content = item.content\n        soup = bs(content, \"html.parser\")\n        p_list = soup.findAll(trans_taglist)\n\n        p_list = self.filter_nest_list(p_list, trans_taglist)\n\n        if self.retranslate:\n            new_p_list = []\n\n            if fixstart is None or fixend is None:\n                return\n\n            start_append = False\n            for p in p_list:\n                text = p.get_text()\n                if fixstart in text or fixend in text or start_append:\n                    start_append = True\n                    new_p_list.append(p)\n                if fixend in text:\n                    p_list = new_p_list\n                    break\n\n        if self.allow_navigable_strings:\n            p_list.extend(soup.findAll(text=True))\n\n        send_num = self.accumulated_num\n        if send_num > 1:\n            with open(\"log/buglog.txt\", \"a\") as f:\n                print(f\"------------- {item.file_name} -------------\", file=f)\n\n            print(\"------------------------------------------------------\")\n            print(f\"dealing {item.file_name} ...\")\n            self.translate_paragraphs_acc(p_list, send_num)\n        else:\n            is_test_done = self.is_test and index > self.test_num\n            p_block = []\n            block_len = 0\n            for p in p_list:\n                if is_test_done:\n                    break\n                if not p.text or self._is_special_text(p.text):\n                    pbar.update(1)\n                    continue\n\n                new_p = self._extract_paragraph(copy(p))\n                if self.single_translate and self.block_size > 0:\n                    p_len = num_tokens_from_text(new_p.text)\n                    block_len += p_len\n                    if block_len > self.block_size:\n                        index = self._process_combined_paragraph(\n                            p_block, index, p_to_save_len, thread_safe=False\n                        )\n                        p_block = [p]\n                        block_len = p_len\n                        print()\n                    else:\n                        p_block.append(p)\n                else:\n                    index = self._process_paragraph(\n                        p, new_p, index, p_to_save_len, thread_safe=False\n                    )\n                    print()\n\n                # pbar.update(delta) not pbar.update(index)?\n                pbar.update(1)\n\n                if self.is_test and index >= self.test_num:\n                    break\n            if self.single_translate and self.block_size > 0 and len(p_block) > 0:\n                index = self._process_combined_paragraph(\n                    p_block, index, p_to_save_len, thread_safe=False\n                )\n\n        if soup:\n            item.content = soup.encode(encoding=\"utf-8\")\n        new_book.add_item(item)\n\n        return index\n\n    def set_parallel_workers(self, workers):\n        \"\"\"Set number of parallel workers for chapter processing.\n\n        Args:\n            workers (int): Number of parallel workers. Will be automatically\n                         optimized based on actual chapter count during processing.\n        \"\"\"\n        self.parallel_workers = max(1, workers)\n        self.enable_parallel = workers > 1\n\n        if workers > 8:\n            print(\n                f\"⚠️  Warning: {workers} workers is quite high. Consider using 2-8 workers for optimal performance.\"\n            )\n\n    def _get_next_translation_index(self):\n        \"\"\"Thread-safe method to get next translation index.\"\"\"\n        with self._progress_lock:\n            index = self._translation_index\n            self._translation_index += 1\n            return index\n\n    def _process_chapter_parallel(self, chapter_data):\n        \"\"\"Process a single chapter in parallel mode with proper accumulated_num handling.\"\"\"\n        item, trans_taglist, p_to_save_len = chapter_data\n        chapter_result = {\n            \"item\": item,\n            \"processed_content\": None,\n            \"success\": False,\n            \"error\": None,\n        }\n\n        try:\n            # Create a chapter-specific translator instance to avoid context conflicts\n            # This ensures each chapter has its own independent context\n            thread_translator = self._create_chapter_translator()\n\n            content = item.content\n            soup = bs(content, \"html.parser\")\n            p_list = soup.findAll(trans_taglist)\n            p_list = self.filter_nest_list(p_list, trans_taglist)\n\n            if self.allow_navigable_strings:\n                p_list.extend(soup.findAll(text=True))\n\n            # Initialize chapter-specific context lists\n            chapter_context_list = []\n            chapter_translated_list = []\n\n            # Apply accumulated_num logic for this chapter independently\n            send_num = self.accumulated_num\n            if send_num > 1:\n                # Use accumulated translation logic for this chapter\n                self._translate_paragraphs_acc_parallel(\n                    p_list,\n                    send_num,\n                    thread_translator,\n                    chapter_context_list,\n                    chapter_translated_list,\n                )\n            else:\n                # Process paragraphs individually for this chapter\n                for p in p_list:\n                    if not p.text or self._is_special_text(p.text):\n                        continue\n\n                    new_p = self._extract_paragraph(copy(p))\n                    index = self._get_next_translation_index()\n\n                    if self.resume and index < p_to_save_len:\n                        t_text = self.p_to_save[index]\n                    else:\n                        # Use chapter-specific context for translation\n                        t_text = self._translate_with_chapter_context(\n                            thread_translator,\n                            new_p.text,\n                            chapter_context_list,\n                            chapter_translated_list,\n                        )\n                        t_text = \"\" if t_text is None else t_text\n                        with self._progress_lock:\n                            self.p_to_save.append(t_text)\n\n                    if isinstance(p, NavigableString):\n                        translated_node = NavigableString(t_text)\n                        p.insert_after(translated_node)\n                        if self.single_translate:\n                            p.extract()\n                    else:\n                        self.helper.insert_trans(\n                            p, t_text, self.translation_style, self.single_translate\n                        )\n\n                    with self._progress_lock:\n                        if index % 20 == 0:\n                            self._save_progress()\n\n            if soup:\n                chapter_result[\"processed_content\"] = soup.encode(encoding=\"utf-8\")\n            chapter_result[\"success\"] = True\n\n        except Exception as e:\n            chapter_result[\"error\"] = str(e)\n            print(f\"Error processing chapter {item.file_name}: {e}\")\n\n        return chapter_result\n\n    def _create_chapter_translator(self):\n        \"\"\"Create a translator instance for a specific chapter with independent context.\"\"\"\n        # Return the main translator - we'll handle context at the chapter level\n        return self.translate_model\n\n    def _translate_with_chapter_context(\n        self, translator, text, chapter_context_list, chapter_translated_list\n    ):\n        \"\"\"Translate text with chapter-specific context management.\"\"\"\n        if not translator.context_flag:\n            return translator.translate(text)\n\n        # Temporarily replace global context with chapter context\n        original_context = getattr(translator, \"context_list\", [])\n        original_translated = getattr(translator, \"context_translated_list\", [])\n\n        try:\n            # Use chapter-specific context\n            translator.context_list = chapter_context_list.copy()\n            translator.context_translated_list = chapter_translated_list.copy()\n\n            # Perform translation\n            result = translator.translate(text)\n\n            # Update chapter context\n            chapter_context_list[:] = translator.context_list\n            chapter_translated_list[:] = translator.context_translated_list\n\n            return result\n\n        finally:\n            # Restore original context\n            translator.context_list = original_context\n            translator.context_translated_list = original_translated\n\n    def _translate_paragraphs_acc_parallel(\n        self,\n        p_list,\n        send_num,\n        translator,\n        chapter_context_list,\n        chapter_translated_list,\n    ):\n        \"\"\"Apply accumulated_num logic for a single chapter in parallel mode with independent context.\"\"\"\n        from book_maker.utils import num_tokens_from_text\n        from .helper import not_trans\n\n        count = 0\n        wait_p_list = []\n\n        # Create chapter-specific helper instance with context-aware translation\n        class ChapterHelper:\n            def __init__(\n                self, parent_loader, translator, context_list, translated_list\n            ):\n                self.parent_loader = parent_loader\n                self.translator = translator\n                self.context_list = context_list\n                self.translated_list = translated_list\n\n            def translate_with_context(self, text):\n                return self.parent_loader._translate_with_chapter_context(\n                    self.translator, text, self.context_list, self.translated_list\n                )\n\n            def deal_old(self, wait_p_list, single_translate):\n                if not wait_p_list:\n                    return\n\n                # Use the same translate_list logic as sequential processing\n                # Create a temporary translator with chapter context\n                original_context = getattr(self.translator, \"context_list\", [])\n                original_translated = getattr(\n                    self.translator, \"context_translated_list\", []\n                )\n\n                try:\n                    # Set chapter context to the translator\n                    self.translator.context_list = self.context_list.copy()\n                    self.translator.context_translated_list = (\n                        self.translated_list.copy()\n                    )\n\n                    # Call translate_list for consistent batch translation logic\n                    result_txt_list = self.translator.translate_list(wait_p_list)\n\n                    # Update chapter context from translator\n                    self.context_list[:] = self.translator.context_list\n                    self.translated_list[:] = self.translator.context_translated_list\n\n                    # Apply translations using the same logic as helper.deal_old\n                    for i in range(len(wait_p_list)):\n                        if i < len(result_txt_list):\n                            p = wait_p_list[i]\n                            from .helper import shorter_result_link\n\n                            self.parent_loader.helper.insert_trans(\n                                p,\n                                shorter_result_link(result_txt_list[i]),\n                                self.parent_loader.translation_style,\n                                single_translate,\n                            )\n\n                finally:\n                    # Restore original context\n                    self.translator.context_list = original_context\n                    self.translator.context_translated_list = original_translated\n\n                wait_p_list.clear()\n\n            def deal_new(self, p, wait_p_list, single_translate):\n                self.deal_old(wait_p_list, single_translate)\n                translation = self.translate_with_context(p.text)\n                self.parent_loader.helper.insert_trans(\n                    p,\n                    translation,\n                    self.parent_loader.translation_style,\n                    single_translate,\n                )\n\n        chapter_helper = ChapterHelper(\n            self, translator, chapter_context_list, chapter_translated_list\n        )\n\n        for i in range(len(p_list)):\n            p = p_list[i]\n            temp_p = copy(p)\n\n            for p_exclude in self.exclude_translate_tags.split(\",\"):\n                if type(p) == NavigableString:\n                    continue\n                for pt in temp_p.find_all(p_exclude):\n                    pt.extract()\n\n            if any(\n                [not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)]\n            ):\n                if i == len(p_list) - 1:\n                    chapter_helper.deal_old(wait_p_list, self.single_translate)\n                continue\n\n            length = num_tokens_from_text(temp_p.text)\n            if length > send_num:\n                chapter_helper.deal_new(p, wait_p_list, self.single_translate)\n                continue\n\n            if i == len(p_list) - 1:\n                if count + length < send_num:\n                    wait_p_list.append(p)\n                    chapter_helper.deal_old(wait_p_list, self.single_translate)\n                else:\n                    chapter_helper.deal_new(p, wait_p_list, self.single_translate)\n                break\n\n            if count + length < send_num:\n                count += length\n                wait_p_list.append(p)\n            else:\n                chapter_helper.deal_old(wait_p_list, self.single_translate)\n                wait_p_list.append(p)\n                count = length\n\n    def batch_init_then_wait(self):\n        name, _ = os.path.splitext(self.epub_name)\n        if self.batch_flag or self.batch_use_flag:\n            self.translate_model.batch_init(name)\n            if self.batch_use_flag:\n                start_time = time.time()\n                while not self.translate_model.is_completed_batch():\n                    print(\"Batch translation is not completed yet\")\n                    time.sleep(2)\n                    if time.time() - start_time > 300:  # 5 minutes\n                        raise Exception(\"Batch translation timed out after 5 minutes\")\n\n    def make_bilingual_book(self):\n        self.helper = EPUBBookLoaderHelper(\n            self.translate_model,\n            self.accumulated_num,\n            self.translation_style,\n            self.context_flag,\n        )\n        self.batch_init_then_wait()\n        new_book = self._make_new_book(self.origin_book)\n        all_items = list(self.origin_book.get_items())\n        trans_taglist = self.translate_tags.split(\",\")\n        all_p_length = sum(\n            (\n                0\n                if (\n                    (i.get_type() != ITEM_DOCUMENT)\n                    or (i.file_name in self.exclude_filelist.split(\",\"))\n                    or (\n                        self.only_filelist\n                        and i.file_name not in self.only_filelist.split(\",\")\n                    )\n                )\n                else len(bs(i.content, \"html.parser\").findAll(trans_taglist))\n            )\n            for i in all_items\n        )\n        all_p_length += self.allow_navigable_strings * sum(\n            (\n                0\n                if (\n                    (i.get_type() != ITEM_DOCUMENT)\n                    or (i.file_name in self.exclude_filelist.split(\",\"))\n                    or (\n                        self.only_filelist\n                        and i.file_name not in self.only_filelist.split(\",\")\n                    )\n                )\n                else len(bs(i.content, \"html.parser\").findAll(text=True))\n            )\n            for i in all_items\n        )\n        pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)\n        print()\n        index = 0\n        p_to_save_len = len(self.p_to_save)\n        try:\n            if self.retranslate:\n                self.retranslate_book(\n                    index, p_to_save_len, pbar, trans_taglist, self.retranslate\n                )\n                exit(0)\n            # Add the things that don't need to be translated first, so that you can see the img after the interruption\n            for item in self.origin_book.get_items():\n                if item.get_type() != ITEM_DOCUMENT:\n                    new_book.add_item(item)\n\n            document_items = list(self.origin_book.get_items_of_type(ITEM_DOCUMENT))\n\n            if self.enable_parallel and len(document_items) > 1:\n                # Optimize worker count: no point having more workers than chapters\n                effective_workers = min(self.parallel_workers, len(document_items))\n\n                # Parallel processing with proper accumulated_num handling\n                print(f\"🚀 Parallel processing: {len(document_items)} chapters\")\n                if effective_workers < self.parallel_workers:\n                    print(\n                        f\"📊 Optimized workers: {effective_workers} (reduced from {self.parallel_workers})\"\n                    )\n                else:\n                    print(f\"📊 Using {effective_workers} workers\")\n\n                if self.accumulated_num > 1:\n                    print(\n                        f\"📝 Each chapter applies accumulated_num={self.accumulated_num} independently\"\n                    )\n\n                if self.context_flag:\n                    print(\n                        f\"🔗 Context enabled: each chapter maintains independent context (limit={self.translate_model.context_paragraph_limit})\"\n                    )\n                else:\n                    print(f\"🚫 Context disabled for this translation\")\n\n                # Create a simpler progress bar for parallel processing\n                pbar.close()  # Close the original progress bar\n                chapter_pbar = tqdm(\n                    total=len(document_items), desc=\"Chapters\", unit=\"ch\"\n                )\n\n                chapter_data_list = [\n                    (item, trans_taglist, p_to_save_len) for item in document_items\n                ]\n\n                with ThreadPoolExecutor(max_workers=effective_workers) as executor:\n                    future_to_item = {\n                        executor.submit(\n                            self._process_chapter_parallel, chapter_data\n                        ): chapter_data[0]\n                        for chapter_data in chapter_data_list\n                    }\n\n                    for future in as_completed(future_to_item):\n                        item = future_to_item[future]\n                        try:\n                            result = future.result()\n                            if result[\"success\"] and result[\"processed_content\"]:\n                                item.content = result[\"processed_content\"]\n                            new_book.add_item(item)\n                            chapter_pbar.update(1)\n                            chapter_pbar.set_postfix_str(\n                                f\"Latest: {item.file_name[:20]}...\"\n                            )\n\n                        except Exception as e:\n                            print(f\"❌ Error processing {item.file_name}: {e}\")\n                            new_book.add_item(item)\n                            chapter_pbar.update(1)\n\n                chapter_pbar.close()\n                print(f\"✅ Completed all {len(document_items)} chapters\")\n            else:\n                # Sequential processing (original behavior or single chapter)\n                if len(document_items) == 1 and self.enable_parallel:\n                    print(f\"📄 Single chapter detected - using sequential processing\")\n\n                for item in document_items:\n                    index = self.process_item(\n                        item, index, p_to_save_len, pbar, new_book, trans_taglist\n                    )\n\n                if self.accumulated_num > 1:\n                    name, _ = os.path.splitext(self.epub_name)\n                    epub.write_epub(f\"{name}_bilingual.epub\", new_book, {})\n            name, _ = os.path.splitext(self.epub_name)\n            if self.batch_flag:\n                self.translate_model.batch()\n            else:\n                epub.write_epub(f\"{name}_bilingual.epub\", new_book, {})\n            if self.accumulated_num == 1:\n                pbar.close()\n        except KeyboardInterrupt as e:\n            print(e)\n            if self.accumulated_num == 1:\n                print(\"you can resume it next time\")\n                self._save_progress()\n                self._save_temp_book()\n            sys.exit(0)\n        except Exception:\n            traceback.print_exc()\n            sys.exit(0)\n\n    def load_state(self):\n        try:\n            with open(self.bin_path, \"rb\") as f:\n                self.p_to_save = pickle.load(f)\n        except Exception:\n            raise Exception(\"can not load resume file\")\n\n    def _save_temp_book(self):\n        # TODO refactor this logic\n        origin_book_temp = epub.read_epub(self.epub_name)\n        new_temp_book = self._make_new_book(origin_book_temp)\n        p_to_save_len = len(self.p_to_save)\n        trans_taglist = self.translate_tags.split(\",\")\n        index = 0\n        try:\n            for item in origin_book_temp.get_items():\n                if item.get_type() == ITEM_DOCUMENT:\n                    content = item.content\n                    soup = bs(content, \"html.parser\")\n                    p_list = soup.findAll(trans_taglist)\n                    if self.allow_navigable_strings:\n                        p_list.extend(soup.findAll(text=True))\n                    for p in p_list:\n                        if not p.text or self._is_special_text(p.text):\n                            continue\n                        # TODO banch of p to translate then combine\n                        # PR welcome here\n                        if index < p_to_save_len:\n                            new_p = copy(p)\n                            if type(p) is NavigableString:\n                                new_p = self.p_to_save[index]\n                            else:\n                                new_p.string = self.p_to_save[index]\n                            self.helper.insert_trans(\n                                p,\n                                new_p.string,\n                                self.translation_style,\n                                self.single_translate,\n                            )\n                            index += 1\n                        else:\n                            break\n                    # for save temp book\n                    if soup:\n                        item.content = soup.encode()\n                new_temp_book.add_item(item)\n            name, _ = os.path.splitext(self.epub_name)\n            epub.write_epub(f\"{name}_bilingual_temp.epub\", new_temp_book, {})\n        except Exception as e:\n            # TODO handle it\n            print(e)\n\n    def _save_progress(self):\n        try:\n            with open(self.bin_path, \"wb\") as f:\n                pickle.dump(self.p_to_save, f)\n        except Exception:\n            raise Exception(\"can not save resume file\")\n"
  },
  {
    "path": "book_maker/loader/helper.py",
    "content": "import re\nimport backoff\nimport logging\nfrom copy import copy\n\nlogging.basicConfig(level=logging.WARNING)\nlogger = logging.getLogger(__name__)\n\n\nclass EPUBBookLoaderHelper:\n    def __init__(\n        self, translate_model, accumulated_num, translation_style, context_flag\n    ):\n        self.translate_model = translate_model\n        self.accumulated_num = accumulated_num\n        self.translation_style = translation_style\n        self.context_flag = context_flag\n\n    def insert_trans(self, p, text, translation_style=\"\", single_translate=False):\n        if text is None:\n            text = \"\"\n        if (\n            p.string is not None\n            and p.string.replace(\" \", \"\").strip() == text.replace(\" \", \"\").strip()\n        ):\n            return\n        new_p = copy(p)\n        new_p.string = text\n        if translation_style != \"\":\n            new_p[\"style\"] = translation_style\n        p.insert_after(new_p)\n        if single_translate:\n            p.extract()\n\n    @backoff.on_exception(\n        backoff.expo,\n        Exception,\n        on_backoff=lambda details: logger.warning(f\"retry backoff: {details}\"),\n        on_giveup=lambda details: logger.warning(f\"retry abort: {details}\"),\n        jitter=None,\n    )\n    def translate_with_backoff(self, text, context_flag=False):\n        return self.translate_model.translate(text, context_flag)\n\n    def deal_new(self, p, wait_p_list, single_translate=False):\n        self.deal_old(wait_p_list, single_translate, self.context_flag)\n        self.insert_trans(\n            p,\n            shorter_result_link(self.translate_with_backoff(p.text, self.context_flag)),\n            self.translation_style,\n            single_translate,\n        )\n\n    def deal_old(self, wait_p_list, single_translate=False, context_flag=False):\n        if not wait_p_list:\n            return\n\n        result_txt_list = self.translate_model.translate_list(wait_p_list)\n\n        for i in range(len(wait_p_list)):\n            if i < len(result_txt_list):\n                p = wait_p_list[i]\n                self.insert_trans(\n                    p,\n                    shorter_result_link(result_txt_list[i]),\n                    self.translation_style,\n                    single_translate,\n                )\n\n        wait_p_list.clear()\n\n\nurl_pattern = r\"(http[s]?://|www\\.)+(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+\"\n\n\ndef is_text_link(text):\n    return bool(re.compile(url_pattern).match(text.strip()))\n\n\ndef is_text_tail_link(text, num=80):\n    text = text.strip()\n    pattern = r\".*\" + url_pattern + r\"$\"\n    return bool(re.compile(pattern).match(text)) and len(text) < num\n\n\ndef shorter_result_link(text, num=20):\n    match = re.search(url_pattern, text)\n\n    if not match or len(match.group()) < num:\n        return text\n\n    return re.compile(url_pattern).sub(\"...\", text)\n\n\ndef is_text_source(text):\n    return text.strip().startswith(\"Source: \")\n\n\ndef is_text_list(text, num=80):\n    text = text.strip()\n    return re.match(r\"^Listing\\s*\\d+\", text) and len(text) < num\n\n\ndef is_text_figure(text, num=80):\n    text = text.strip()\n    return re.match(r\"^Figure\\s*\\d+\", text) and len(text) < num\n\n\ndef is_text_digit_and_space(s):\n    for c in s:\n        if not c.isdigit() and not c.isspace():\n            return False\n    return True\n\n\ndef is_text_isbn(s):\n    pattern = r\"^[Ee]?ISBN\\s*\\d[\\d\\s]*$\"\n    return bool(re.match(pattern, s))\n\n\ndef not_trans(s):\n    return any(\n        [\n            is_text_link(s),\n            is_text_tail_link(s),\n            is_text_source(s),\n            is_text_list(s),\n            is_text_figure(s),\n            is_text_digit_and_space(s),\n            is_text_isbn(s),\n        ]\n    )\n"
  },
  {
    "path": "book_maker/loader/md_loader.py",
    "content": "import sys\nfrom pathlib import Path\n\nfrom book_maker.utils import prompt_config_to_kwargs\n\nfrom .base_loader import BaseBookLoader\n\n\nclass MarkdownBookLoader(BaseBookLoader):\n    def __init__(\n        self,\n        md_name,\n        model,\n        key,\n        resume,\n        language,\n        model_api_base=None,\n        is_test=False,\n        test_num=5,\n        prompt_config=None,\n        single_translate=False,\n        context_flag=False,\n        context_paragraph_limit=0,\n        temperature=1.0,\n        source_lang=\"auto\",\n    ) -> None:\n        self.md_name = md_name\n        self.translate_model = model(\n            key,\n            language,\n            api_base=model_api_base,\n            temperature=temperature,\n            source_lang=source_lang,\n            **prompt_config_to_kwargs(prompt_config),\n        )\n        self.is_test = is_test\n        self.p_to_save = []\n        self.bilingual_result = []\n        self.bilingual_temp_result = []\n        self.test_num = test_num\n        self.batch_size = 10\n        self.single_translate = single_translate\n        self.md_paragraphs = []\n\n        try:\n            with open(f\"{md_name}\", encoding=\"utf-8\") as f:\n                self.origin_book = f.read().splitlines()\n\n        except Exception as e:\n            raise Exception(\"can not load file\") from e\n\n        self.resume = resume\n        self.bin_path = f\"{Path(md_name).parent}/.{Path(md_name).stem}.temp.bin\"\n        if self.resume:\n            self.load_state()\n\n        self.process_markdown_content()\n\n    def process_markdown_content(self):\n        \"\"\"将原始内容处理成 markdown 段落\"\"\"\n        current_paragraph = []\n        for line in self.origin_book:\n            # 如果是空行且当前段落不为空，保存当前段落\n            if not line.strip() and current_paragraph:\n                self.md_paragraphs.append(\"\\n\".join(current_paragraph))\n                current_paragraph = []\n            # 如果是标题行，单独作为一个段落\n            elif line.strip().startswith(\"#\"):\n                if current_paragraph:\n                    self.md_paragraphs.append(\"\\n\".join(current_paragraph))\n                    current_paragraph = []\n                self.md_paragraphs.append(line)\n            # 其他情况，添加到当前段落\n            else:\n                current_paragraph.append(line)\n\n        # 处理最后一个段落\n        if current_paragraph:\n            self.md_paragraphs.append(\"\\n\".join(current_paragraph))\n\n    @staticmethod\n    def _is_special_text(text):\n        return text.isdigit() or text.isspace() or len(text) == 0\n\n    def _make_new_book(self, book):\n        pass\n\n    def make_bilingual_book(self):\n        index = 0\n        p_to_save_len = len(self.p_to_save)\n\n        try:\n            sliced_list = [\n                self.md_paragraphs[i : i + self.batch_size]\n                for i in range(0, len(self.md_paragraphs), self.batch_size)\n            ]\n            for paragraphs in sliced_list:\n                batch_text = \"\\n\\n\".join(paragraphs)\n                if self._is_special_text(batch_text):\n                    continue\n                if not self.resume or index >= p_to_save_len:\n                    try:\n                        max_retries = 3\n                        retry_count = 0\n                        while retry_count < max_retries:\n                            try:\n                                temp = self.translate_model.translate(batch_text)\n                                break\n                            except AttributeError as ae:\n                                print(f\"翻译出错: {ae}\")\n                                retry_count += 1\n                                if retry_count == max_retries:\n                                    raise Exception(\"翻译模型初始化失败\") from ae\n                    except Exception as e:\n                        print(f\"翻译过程中出错: {e}\")\n                        raise Exception(\"翻译过程中出现错误\") from e\n\n                    self.p_to_save.append(temp)\n                    if not self.single_translate:\n                        self.bilingual_result.append(batch_text)\n                    self.bilingual_result.append(temp)\n                index += self.batch_size\n                if self.is_test and index > self.test_num:\n                    break\n\n            self.save_file(\n                f\"{Path(self.md_name).parent}/{Path(self.md_name).stem}_bilingual.md\",\n                self.bilingual_result,\n            )\n\n        except (KeyboardInterrupt, Exception) as e:\n            print(f\"发生错误: {e}\")\n            print(\"程序将保存进度，您可以稍后继续\")\n            self._save_progress()\n            self._save_temp_book()\n            sys.exit(1)  # 使用非零退出码表示错误\n\n    def _save_temp_book(self):\n        index = 0\n        sliced_list = [\n            self.origin_book[i : i + self.batch_size]\n            for i in range(0, len(self.origin_book), self.batch_size)\n        ]\n\n        for i in range(len(sliced_list)):\n            batch_text = \"\".join(sliced_list[i])\n            self.bilingual_temp_result.append(batch_text)\n            if self._is_special_text(self.origin_book[i]):\n                continue\n            if index < len(self.p_to_save):\n                self.bilingual_temp_result.append(self.p_to_save[index])\n            index += 1\n\n        self.save_file(\n            f\"{Path(self.md_name).parent}/{Path(self.md_name).stem}_bilingual_temp.txt\",\n            self.bilingual_temp_result,\n        )\n\n    def _save_progress(self):\n        try:\n            with open(self.bin_path, \"w\", encoding=\"utf-8\") as f:\n                f.write(\"\\n\".join(self.p_to_save))\n        except Exception as e:\n            raise Exception(\"can not save resume file\") from e\n\n    def load_state(self):\n        try:\n            with open(self.bin_path, encoding=\"utf-8\") as f:\n                self.p_to_save = f.read().splitlines()\n        except Exception as e:\n            raise Exception(\"can not load resume file\") from e\n\n    def save_file(self, book_path, content):\n        try:\n            with open(book_path, \"w\", encoding=\"utf-8\") as f:\n                f.write(\"\\n\".join(content))\n        except Exception as e:\n            raise Exception(\"can not save file\") from e\n"
  },
  {
    "path": "book_maker/loader/pdf_loader.py",
    "content": "import sys\nfrom pathlib import Path\n\nfrom book_maker.utils import prompt_config_to_kwargs\n\nfrom .base_loader import BaseBookLoader\n\nimport fitz\n\nfrom ebooklib import epub\n\n\nclass PDFBookLoader(BaseBookLoader):\n    def __init__(\n        self,\n        pdf_name,\n        model,\n        key,\n        resume,\n        language,\n        model_api_base=None,\n        is_test=False,\n        test_num=5,\n        prompt_config=None,\n        single_translate=False,\n        context_flag=False,\n        context_paragraph_limit=0,\n        temperature=1.0,\n        source_lang=\"auto\",\n        parallel_workers=1,\n    ) -> None:\n        if fitz is None:\n            raise Exception(\"PyMuPDF (fitz) is required to use PDF loader\")\n\n        self.pdf_name = pdf_name\n        self.translate_model = model(\n            key,\n            language,\n            api_base=model_api_base,\n            temperature=temperature,\n            source_lang=source_lang,\n            **prompt_config_to_kwargs(prompt_config),\n        )\n        self.is_test = is_test\n        self.p_to_save = []\n        self.bilingual_result = []\n        self.bilingual_temp_result = []\n        self.test_num = test_num\n        self.batch_size = 10\n        self.single_translate = single_translate\n        self.parallel_workers = max(1, parallel_workers)\n\n        try:\n            doc = fitz.open(self.pdf_name)\n            lines = []\n            for page in doc:\n                text = page.get_text(\"text\")\n                if not text:\n                    continue\n                lines.extend(text.splitlines())\n            self.origin_book = lines\n        except Exception as e:\n            raise Exception(\"can not load file\") from e\n\n        self.resume = resume\n        self.bin_path = f\"{Path(pdf_name).parent}/.{Path(pdf_name).stem}.temp.bin\"\n        if self.resume:\n            self.load_state()\n\n    def _make_new_book(self, book):\n        pass\n\n    def _try_create_epub(self):\n        \"\"\"Try to create an EPUB file from translated content.\n\n        The EPUB is created from the `self.bilingual_result` list which alternates\n        original and translated strings. If EPUB creation fails for any reason,\n        this function will log the error and leave the TXT fallback intact.\n        \"\"\"\n        if epub is None:\n            # ebooklib not installed; skip EPUB generation\n            return False\n\n        if not self.bilingual_result:\n            return False\n\n        try:\n            book = epub.EpubBook()\n            title = Path(self.pdf_name).stem\n            # Minimal metadata\n            try:\n                book.set_identifier(title)\n                book.set_title(title)\n                book.set_language(\n                    self.translate_model.language\n                    if hasattr(self.translate_model, \"language\")\n                    else \"en\"\n                )\n            except Exception:\n                # be tolerant about metadata API differences\n                pass\n\n            chapters = []\n            # build chapters from bilingual_result (pairs)\n            for i in range(0, len(self.bilingual_result), 2):\n                orig = self.bilingual_result[i]\n                trans = (\n                    self.bilingual_result[i + 1]\n                    if i + 1 < len(self.bilingual_result)\n                    else \"\"\n                )\n                # basic html content: original then translated\n                content = \"\"\n                if orig:\n                    content += (\n                        '<div class=\"original\">'\n                        + \"<p>\"\n                        + orig.replace(\"\\n\", \"<br/>\")\n                        + \"</p></div>\"\n                    )\n                if trans:\n                    content += (\n                        '<div class=\"translation\">'\n                        + \"<p>\"\n                        + trans.replace(\"\\n\", \"<br/>\")\n                        + \"</p></div>\"\n                    )\n\n                chap = epub.EpubHtml(\n                    title=f\"part_{i//2}\",\n                    file_name=f\"index_split_{i//2:03d}.xhtml\",\n                    lang=(\n                        book.get_metadata(\"DC\", \"language\")[0][0]\n                        if book.get_metadata(\"DC\", \"language\")\n                        else None\n                    ),\n                )\n                chap.content = content\n                book.add_item(chap)\n                chapters.append(chap)\n\n            # table of contents and spine\n            book.toc = tuple(chapters)\n            book.spine = [\"nav\"] + chapters\n\n            # add navigation files\n            book.add_item(epub.EpubNcx())\n            book.add_item(epub.EpubNav())\n\n            out_path = f\"{Path(self.pdf_name).parent}/{Path(self.pdf_name).stem}_bilingual.epub\"\n            epub.write_epub(out_path, book)\n            return True\n        except Exception as e:\n            print(f\"create epub failed: {e}\")\n            return False\n\n    def make_bilingual_book(self):\n        index = 0\n        p_to_save_len = len(self.p_to_save)\n\n        try:\n            sliced_list = [\n                self.origin_book[i : i + self.batch_size]\n                for i in range(0, len(self.origin_book), self.batch_size)\n            ]\n            for i in sliced_list:\n                # fix the format thanks https://github.com/tudoujunha\n                batch_text = \"\\n\".join(i)\n                if not batch_text.strip():\n                    continue\n                if not self.resume or index >= p_to_save_len:\n                    try:\n                        temp = self.translate_model.translate(batch_text)\n                    except Exception as e:\n                        print(e)\n                        raise Exception(\"Something is wrong when translate\") from e\n                    self.p_to_save.append(temp)\n                    if not self.single_translate:\n                        self.bilingual_result.append(batch_text)\n                    self.bilingual_result.append(temp)\n                index += self.batch_size\n                if self.is_test and index > self.test_num:\n                    break\n\n            txt_out = (\n                f\"{Path(self.pdf_name).parent}/{Path(self.pdf_name).stem}_bilingual.txt\"\n            )\n            self.save_file(txt_out, self.bilingual_result)\n\n            # try to create an EPUB alongside the TXT fallback; if EPUB fails we still keep the TXT file\n            epub_ok = self._try_create_epub()\n            if epub_ok:\n                print(f\"created epub: {Path(self.pdf_name).stem}_bilingual.epub\")\n            else:\n                print(\n                    \"epub creation skipped or failed; bilingual text saved to txt fallback\"\n                )\n\n        except (KeyboardInterrupt, Exception) as e:\n            print(e)\n            print(\"you can resume it next time\")\n            self._save_progress()\n            self._save_temp_book()\n            sys.exit(0)\n\n    def _save_temp_book(self):\n        index = 0\n        sliced_list = [\n            self.origin_book[i : i + self.batch_size]\n            for i in range(0, len(self.origin_book), self.batch_size)\n        ]\n\n        for i in range(len(sliced_list)):\n            batch_text = \"\".join(sliced_list[i])\n            self.bilingual_temp_result.append(batch_text)\n            if index < len(self.p_to_save):\n                self.bilingual_temp_result.append(self.p_to_save[index])\n            index += 1\n\n        self.save_file(\n            f\"{Path(self.pdf_name).parent}/{Path(self.pdf_name).stem}_bilingual_temp.txt\",\n            self.bilingual_temp_result,\n        )\n\n    def _save_progress(self):\n        try:\n            with open(self.bin_path, \"w\", encoding=\"utf-8\") as f:\n                f.write(\"\\n\".join(self.p_to_save))\n        except Exception as e:\n            raise Exception(\"can not save resume file\") from e\n\n    def load_state(self):\n        try:\n            with open(self.bin_path, encoding=\"utf-8\") as f:\n                self.p_to_save = f.read().splitlines()\n        except Exception as e:\n            raise Exception(\"can not load resume file\") from e\n\n    def save_file(self, book_path, content):\n        try:\n            with open(book_path, \"w\", encoding=\"utf-8\") as f:\n                f.write(\"\\n\".join(content))\n        except Exception as e:\n            raise Exception(\"can not save file\") from e\n"
  },
  {
    "path": "book_maker/loader/srt_loader.py",
    "content": "\"\"\"\ninspired by: https://github.com/jesselau76/srt-gpt-translator, MIT License\n\"\"\"\n\nimport re\nimport sys\nfrom pathlib import Path\n\nfrom book_maker.utils import prompt_config_to_kwargs\n\nfrom .base_loader import BaseBookLoader\n\n\nclass SRTBookLoader(BaseBookLoader):\n    def __init__(\n        self,\n        srt_name,\n        model,\n        key,\n        resume,\n        language,\n        model_api_base=None,\n        is_test=False,\n        test_num=5,\n        prompt_config=None,\n        single_translate=False,\n        context_flag=False,\n        context_paragraph_limit=0,\n        temperature=1.0,\n        source_lang=\"auto\",\n    ) -> None:\n        self.srt_name = srt_name\n        self.translate_model = model(\n            key,\n            language,\n            api_base=model_api_base,\n            temperature=temperature,\n            source_lang=source_lang,\n            **prompt_config_to_kwargs(\n                {\n                    \"system\": \"You are a srt subtitle file translator.\",\n                    \"user\": \"Translate the following subtitle text into {language}, but keep the subtitle number and timeline and newlines unchanged: \\n{text}\",\n                }\n            ),\n        )\n        self.is_test = is_test\n        self.p_to_save = []\n        self.bilingual_result = []\n        self.bilingual_temp_result = []\n        self.test_num = test_num\n        self.accumulated_num = 1\n        self.blocks = []\n        self.single_translate = single_translate\n\n        self.resume = resume\n        self.bin_path = f\"{Path(srt_name).parent}/.{Path(srt_name).stem}.temp.bin\"\n        if self.resume:\n            self.load_state()\n\n    def _make_new_book(self, book):\n        pass\n\n    def _parse_srt(self, srt_text):\n        blocks = re.split(\"\\n\\s*\\n\", srt_text)\n\n        final_blocks = []\n        new_block = {}\n        for i in range(0, len(blocks)):\n            block = blocks[i]\n            if block.strip() == \"\":\n                continue\n\n            lines = block.strip().splitlines()\n            new_block[\"number\"] = lines[0].strip()\n            timestamp = lines[1].strip()\n            new_block[\"time\"] = timestamp\n            text = \"\\n\".join(lines[2:]).strip()\n            new_block[\"text\"] = text\n            final_blocks.append(new_block)\n            new_block = {}\n\n        return final_blocks\n\n    def _get_block_text(self, block):\n        return f\"{block['number']}\\n{block['time']}\\n{block['text']}\"\n\n    def _get_block_except_text(self, block):\n        return f\"{block['number']}\\n{block['time']}\"\n\n    def _concat_blocks(self, sliced_text: str, text: str):\n        return f\"{sliced_text}\\n\\n{text}\" if sliced_text else text\n\n    def _get_block_translate(self, block):\n        return f\"{block['number']}\\n{block['text']}\"\n\n    def _get_block_from(self, text):\n        text = text.strip()\n        if not text:\n            return {}\n\n        block = text.splitlines()\n        if len(block) < 2:\n            return {\"number\": block[0], \"text\": \"\"}\n\n        return {\"number\": block[0], \"text\": \"\\n\".join(block[1:])}\n\n    def _get_blocks_from(self, translate: str):\n        if not translate:\n            return []\n\n        blocks = []\n        blocks_text = translate.strip().split(\"\\n\\n\")\n        for text in blocks_text:\n            blocks.append(self._get_block_from(text))\n\n        return blocks\n\n    def _check_blocks(self, translate_blocks, origin_blocks):\n        \"\"\"\n        Check if the translated blocks match the original text, with only a simple check of the beginning numbers.\n        \"\"\"\n        if len(translate_blocks) != len(origin_blocks):\n            return False\n\n        for t in zip(translate_blocks, origin_blocks):\n            i = 0\n            try:\n                i = int(t[0].get(\"number\", 0))\n            except ValueError:\n                m = re.search(r\"\\s*\\d+\", t[0].get(\"number\"))\n                if m:\n                    i = int(m.group())\n\n            j = int(t[1].get(\"number\", -1))\n            if i != j:\n                print(f\"check failed: {i}!={j}\")\n                return False\n\n        return True\n\n    def _get_sliced_list(self):\n        sliced_list = []\n        sliced_text = \"\"\n        begin_index = 0\n        for i, block in enumerate(self.blocks):\n            text = self._get_block_translate(block)\n            if not text:\n                continue\n\n            if len(sliced_text + text) < self.accumulated_num:\n                sliced_text = self._concat_blocks(sliced_text, text)\n            else:\n                if sliced_text:\n                    sliced_list.append((begin_index, i, sliced_text))\n                sliced_text = text\n                begin_index = i\n\n        sliced_list.append((begin_index, len(self.blocks), sliced_text))\n        return sliced_list\n\n    def make_bilingual_book(self):\n        if self.accumulated_num > 512:\n            print(f\"{self.accumulated_num} is too large, shrink it to 512.\")\n            self.accumulated_num = 512\n\n        try:\n            with open(f\"{self.srt_name}\", encoding=\"utf-8\") as f:\n                self.blocks = self._parse_srt(f.read())\n        except Exception as e:\n            raise Exception(\"can not load file\") from e\n\n        index = 0\n        p_to_save_len = len(self.p_to_save)\n\n        try:\n            sliced_list = self._get_sliced_list()\n\n            for sliced in sliced_list:\n                begin, end, text = sliced\n\n                if not self.resume or index + (end - begin) > p_to_save_len:\n                    if index < p_to_save_len:\n                        self.p_to_save = self.p_to_save[:index]\n\n                    try:\n                        temp = self.translate_model.translate(text)\n                    except Exception as e:\n                        print(e)\n                        raise Exception(\"Something is wrong when translate\") from e\n\n                    translated_blocks = self._get_blocks_from(temp)\n\n                    if self.accumulated_num > 1:\n                        if not self._check_blocks(\n                            translated_blocks, self.blocks[begin:end]\n                        ):\n                            translated_blocks = []\n                            # try to translate one by one, so don't accumulate too much\n                            print(\n                                f\"retry it one by one:  {self.blocks[begin]['number']} - {self.blocks[end - 1]['number']}\"\n                            )\n                            for block in self.blocks[begin:end]:\n                                try:\n                                    temp = self.translate_model.translate(\n                                        self._get_block_translate(block)\n                                    )\n                                except Exception as e:\n                                    print(e)\n                                    raise Exception(\n                                        \"Something is wrong when translate\"\n                                    ) from e\n                                translated_blocks.append(self._get_block_from(temp))\n\n                            if not self._check_blocks(\n                                translated_blocks, self.blocks[begin:end]\n                            ):\n                                raise Exception(\n                                    \"retry failed, adjust the srt manually.\"\n                                )\n\n                    for i, block in enumerate(translated_blocks):\n                        text = block.get(\"text\", \"\")\n                        self.p_to_save.append(text)\n                        if self.single_translate:\n                            self.bilingual_result.append(\n                                f\"{self._get_block_except_text(self.blocks[begin + i])}\\n{text}\"\n                            )\n                        else:\n                            self.bilingual_result.append(\n                                f\"{self._get_block_text(self.blocks[begin + i])}\\n{text}\"\n                            )\n                else:\n                    for i, block in enumerate(self.blocks[begin:end]):\n                        text = self.p_to_save[begin + i]\n                        if self.single_translate:\n                            self.bilingual_result.append(\n                                f\"{self._get_block_except_text(self.blocks[begin + i])}\\n{text}\"\n                            )\n                        else:\n                            self.bilingual_result.append(\n                                f\"{self._get_block_text(self.blocks[begin + i])}\\n{text}\"\n                            )\n\n                index += end - begin\n                if self.is_test and index > self.test_num:\n                    break\n\n            self.save_file(\n                f\"{Path(self.srt_name).parent}/{Path(self.srt_name).stem}_bilingual.srt\",\n                self.bilingual_result,\n            )\n\n        except (KeyboardInterrupt, Exception) as e:\n            print(e)\n            print(\"you can resume it next time\")\n            self._save_progress()\n            self._save_temp_book()\n            sys.exit(0)\n\n    def _save_temp_book(self):\n        for i, block in enumerate(self.blocks):\n            if i < len(self.p_to_save):\n                text = self.p_to_save[i]\n                self.bilingual_temp_result.append(\n                    f\"{self._get_block_text(block)}\\n{text}\"\n                )\n            else:\n                self.bilingual_temp_result.append(f\"{self._get_block_text(block)}\\n\")\n\n        self.save_file(\n            f\"{Path(self.srt_name).parent}/{Path(self.srt_name).stem}_bilingual_temp.srt\",\n            self.bilingual_temp_result,\n        )\n\n    def _save_progress(self):\n        try:\n            with open(self.bin_path, \"w\", encoding=\"utf-8\") as f:\n                f.write(\"===\".join(self.p_to_save))\n        except Exception as e:\n            raise Exception(\"can not save resume file\") from e\n\n    def load_state(self):\n        try:\n            with open(self.bin_path, encoding=\"utf-8\") as f:\n                text = f.read()\n                if text:\n                    self.p_to_save = text.split(\"===\")\n                else:\n                    self.p_to_save = []\n\n        except Exception as e:\n            raise Exception(\"can not load resume file\") from e\n\n    def save_file(self, book_path, content):\n        try:\n            with open(book_path, \"w\", encoding=\"utf-8\") as f:\n                f.write(\"\\n\\n\".join(content))\n        except Exception as e:\n            raise Exception(\"can not save file\") from e\n"
  },
  {
    "path": "book_maker/loader/txt_loader.py",
    "content": "import sys\nfrom pathlib import Path\n\nfrom book_maker.utils import prompt_config_to_kwargs\n\nfrom .base_loader import BaseBookLoader\n\n\nclass TXTBookLoader(BaseBookLoader):\n    def __init__(\n        self,\n        txt_name,\n        model,\n        key,\n        resume,\n        language,\n        model_api_base=None,\n        is_test=False,\n        test_num=5,\n        prompt_config=None,\n        single_translate=False,\n        context_flag=False,\n        context_paragraph_limit=0,\n        temperature=1.0,\n        source_lang=\"auto\",\n        parallel_workers=1,\n    ) -> None:\n        self.txt_name = txt_name\n        self.translate_model = model(\n            key,\n            language,\n            api_base=model_api_base,\n            temperature=temperature,\n            source_lang=source_lang,\n            **prompt_config_to_kwargs(prompt_config),\n        )\n        self.is_test = is_test\n        self.p_to_save = []\n        self.bilingual_result = []\n        self.bilingual_temp_result = []\n        self.test_num = test_num\n        self.batch_size = 10\n        self.single_translate = single_translate\n        self.parallel_workers = max(1, parallel_workers)\n\n        try:\n            with open(f\"{txt_name}\", encoding=\"utf-8\") as f:\n                self.origin_book = f.read().splitlines()\n\n        except Exception as e:\n            raise Exception(\"can not load file\") from e\n\n        self.resume = resume\n        self.bin_path = f\"{Path(txt_name).parent}/.{Path(txt_name).stem}.temp.bin\"\n        if self.resume:\n            self.load_state()\n\n    @staticmethod\n    def _is_special_text(text):\n        return text.isdigit() or text.isspace() or len(text) == 0\n\n    def _make_new_book(self, book):\n        pass\n\n    def make_bilingual_book(self):\n        index = 0\n        p_to_save_len = len(self.p_to_save)\n\n        try:\n            sliced_list = [\n                self.origin_book[i : i + self.batch_size]\n                for i in range(0, len(self.origin_book), self.batch_size)\n            ]\n            for i in sliced_list:\n                # fix the format thanks https://github.com/tudoujunha\n                batch_text = \"\\n\".join(i)\n                if self._is_special_text(batch_text):\n                    continue\n                if not self.resume or index >= p_to_save_len:\n                    try:\n                        temp = self.translate_model.translate(batch_text)\n                    except Exception as e:\n                        print(e)\n                        raise Exception(\"Something is wrong when translate\") from e\n                    self.p_to_save.append(temp)\n                    if not self.single_translate:\n                        self.bilingual_result.append(batch_text)\n                    self.bilingual_result.append(temp)\n                index += self.batch_size\n                if self.is_test and index > self.test_num:\n                    break\n\n            self.save_file(\n                f\"{Path(self.txt_name).parent}/{Path(self.txt_name).stem}_bilingual.txt\",\n                self.bilingual_result,\n            )\n\n        except (KeyboardInterrupt, Exception) as e:\n            print(e)\n            print(\"you can resume it next time\")\n            self._save_progress()\n            self._save_temp_book()\n            sys.exit(0)\n\n    def _save_temp_book(self):\n        index = 0\n        sliced_list = [\n            self.origin_book[i : i + self.batch_size]\n            for i in range(0, len(self.origin_book), self.batch_size)\n        ]\n\n        for i in range(len(sliced_list)):\n            batch_text = \"\".join(sliced_list[i])\n            self.bilingual_temp_result.append(batch_text)\n            if self._is_special_text(self.origin_book[i]):\n                continue\n            if index < len(self.p_to_save):\n                self.bilingual_temp_result.append(self.p_to_save[index])\n            index += 1\n\n        self.save_file(\n            f\"{Path(self.txt_name).parent}/{Path(self.txt_name).stem}_bilingual_temp.txt\",\n            self.bilingual_temp_result,\n        )\n\n    def _save_progress(self):\n        try:\n            with open(self.bin_path, \"w\", encoding=\"utf-8\") as f:\n                f.write(\"\\n\".join(self.p_to_save))\n        except Exception as e:\n            raise Exception(\"can not save resume file\") from e\n\n    def load_state(self):\n        try:\n            with open(self.bin_path, encoding=\"utf-8\") as f:\n                self.p_to_save = f.read().splitlines()\n        except Exception as e:\n            raise Exception(\"can not load resume file\") from e\n\n    def save_file(self, book_path, content):\n        try:\n            with open(book_path, \"w\", encoding=\"utf-8\") as f:\n                f.write(\"\\n\".join(content))\n        except Exception as e:\n            raise Exception(\"can not save file\") from e\n"
  },
  {
    "path": "book_maker/obok.py",
    "content": "# The original code comes from:\n# https://github.com/apprenticeharper/DeDRM_tools\n\n# Version 4.1.2 March 2023\n# Update library for crypto for current Windows\n\n# Version 4.1.1 March 2023\n# Make obok.py works as file selector\n\n# Version 4.1.0 February 2021\n# Add detection for Kobo directory location on Linux\n\n# Version 4.0.0 September 2020\n# Python 3.0\n#\n# Version 3.2.5 December 2016\n# Improve detection of good text decryption.\n#\n# Version 3.2.4 December 2016\n# Remove incorrect support for Kobo Desktop under Wine\n#\n# Version 3.2.3 October 2016\n# Fix for windows network user and more xml fixes\n#\n# Version 3.2.2 October 2016\n# Change to the way the new database version is handled.\n#\n# Version 3.2.1 September 2016\n# Update for v4.0 of Windows Desktop app.\n#\n# Version 3.2.0 January 2016\n# Update for latest version of Windows Desktop app.\n# Support Kobo devices in the command line version.\n#\n# Version 3.1.9 November 2015\n# Handle Kobo Desktop under wine on Linux\n#\n# Version 3.1.8 November 2015\n# Handle the case of Kobo Arc or Vox device (i.e. don't crash).\n#\n# Version 3.1.7 October 2015\n# Handle the case of no device or database more gracefully.\n#\n# Version 3.1.6 September 2015\n# Enable support for Kobo devices\n# More character encoding fixes (unicode strings)\n#\n# Version 3.1.5 September 2015\n# Removed requirement that a purchase has been made.\n# Also add in character encoding fixes\n#\n# Version 3.1.4 September 2015\n# Updated for version 3.17 of the Windows Desktop app.\n#\n# Version 3.1.3 August 2015\n# Add translations for Portuguese and Arabic\n#\n# Version 3.1.2 January 2015\n# Add coding, version number and version announcement\n#\n# Version 3.05 October 2014\n# Identifies DRM-free books in the dialog\n#\n# Version 3.04 September 2014\n# Handles DRM-free books as well (sometimes Kobo Library doesn't\n# show download link for DRM-free books)\n#\n# Version 3.03 August 2014\n# If PyCrypto is unavailable try to use libcrypto for AES_ECB.\n#\n# Version 3.02 August 2014\n# Relax checking of application/xhtml+xml  and image/jpeg content.\n#\n# Version 3.01 June 2014\n# Check image/jpeg as well as application/xhtml+xml content. Fix typo\n# in Windows ipconfig parsing.\n#\n# Version 3.0 June 2014\n# Made portable for Mac and Windows, and the only module dependency\n# not part of python core is PyCrypto. Major code cleanup/rewrite.\n# No longer tries the first MAC address; tries them all if it detects\n# the decryption failed.\n#\n# Updated September 2013 by Anon\n# Version 2.02\n# Incorporated minor fixes posted at Apprentice Alf's.\n#\n# Updates July 2012 by Michael Newton\n# PWSD ID is no longer a MAC address, but should always\n# be stored in the registry. Script now works with OS X\n# and checks plist for values instead of registry. Must\n# have biplist installed for OS X support.\n#\n# Original comments left below; note the \"AUTOPSY\" is inaccurate. See\n# KoboLibrary.userkeys and KoboFile.decrypt()\n#\n##########################################################\n#                    KOBO DRM CRACK BY                   #\n#                      PHYSISTICATED                     #\n##########################################################\n# This app was made for Python 2.7 on Windows 32-bit\n#\n# This app needs pycrypto - get from here:\n# http://www.voidspace.org.uk/python/modules.shtml\n#\n# Usage: obok.py\n# Choose the book you want to decrypt\n#\n# Shouts to my krew - you know who you are - and one in\n# particular who gave me a lot of help with this - thank\n# you so much!\n#\n# Kopimi /K\\\n# Keep sharing, keep copying, but remember that nothing is\n# for free - make sure you compensate your favorite\n# authors - and cut out the middle man whenever possible\n# ;) ;) ;)\n#\n# DRM AUTOPSY\n# The Kobo DRM was incredibly easy to crack, but it took\n# me months to get around to making this. Here's the\n# basics of how it works:\n# 1: Get MAC address of first NIC in ipconfig (sometimes\n# stored in registry as pwsdid)\n# 2: Get user ID (stored in tons of places, this gets it\n# from HKEY_CURRENT_USER\\Software\\Kobo\\Kobo Desktop\n# Edition\\Browser\\cookies)\n# 3: Concatenate and SHA256, take the second half - this\n# is your master key\n# 4: Open %LOCALAPPDATA%\\Kobo Desktop Editions\\Kobo.sqlite\n# and dump content_keys\n# 5: Unbase64 the keys, then decode these with the master\n# key - these are your page keys\n# 6: Unzip EPUB of your choice, decrypt each page with its\n# page key, then zip back up again\n#\n# WHY USE THIS WHEN INEPT WORKS FINE? (adobe DRM stripper)\n# Inept works very well, but authors on Kobo can choose\n# what DRM they want to use - and some have chosen not to\n# let people download them with Adobe Digital Editions -\n# they would rather lock you into a single platform.\n#\n# With Obok, you can sync Kobo Desktop, decrypt all your\n# ebooks, and then use them on whatever device you want\n# - you bought them, you own them, you can do what you\n# like with them.\n#\n# Obok is Kobo backwards, but it is also means \"next to\"\n# in Polish.\n# When you buy a real book, it is right next to you. You\n# can read it at home, at work, on a train, you can lend\n# it to a friend, you can scribble on it, and add your own\n# explanations/translations.\n#\n# Obok gives you this power over your ebooks - no longer\n# are you restricted to one device. This allows you to\n# embed foreign fonts into your books, as older Kobo's\n# can't display them properly. You can read your books\n# on your phones, in different PC readers, and different\n# ereader devices. You can share them with your friends\n# too, if you like - you can do that with a real book\n# after all.\n#\n\"\"\"Manage all Kobo books, either encrypted or DRM-free.\"\"\"\n\n__version__ = \"4.1.2\"\n__about__ = f\"Obok v{__version__}\\nCopyright © 2012-2020 Physisticated et al.\"\n\nimport base64\nimport binascii\nimport contextlib\nimport hashlib\nimport os\nimport re\nimport shutil\nimport sqlite3\nimport subprocess\nimport sys\nimport tempfile\nimport xml.etree.ElementTree as ET\nimport zipfile\n\ncan_parse_xml = True\ntry:\n    from xml.etree import ElementTree as ET\n\n    # print \"using xml.etree for xml parsing\"\nexcept ImportError:\n    can_parse_xml = False\n    # print \"Cannot find xml.etree, disabling extraction of serial numbers\"\n\n# List of all known hash keys\nKOBO_HASH_KEYS = [\"88b3a2e13\", \"XzUhGYdFp\", \"NoCanLook\", \"QJhwzAtXL\"]\n\n\nclass ENCRYPTIONError(Exception):\n    pass\n\n\ndef _load_crypto_libcrypto():\n    from ctypes import (\n        CDLL,\n        POINTER,\n        Structure,\n        c_char_p,\n        c_int,\n        c_long,\n        create_string_buffer,\n    )\n    from ctypes.util import find_library\n\n    if sys.platform.startswith(\"win\"):\n        libcrypto = find_library(\"libcrypto\")\n    else:\n        libcrypto = find_library(\"crypto\")\n\n    if libcrypto is None:\n        raise ENCRYPTIONError(\"libcrypto not found\")\n    libcrypto = CDLL(libcrypto)\n\n    AES_MAXNR = 14\n\n    POINTER(c_char_p)\n    POINTER(c_int)\n\n    class AES_KEY(Structure):\n        _fields_ = [(\"rd_key\", c_long * (4 * (AES_MAXNR + 1))), (\"rounds\", c_int)]\n\n    AES_KEY_p = POINTER(AES_KEY)\n\n    def F(restype, name, argtypes):\n        func = getattr(libcrypto, name)\n        func.restype = restype\n        func.argtypes = argtypes\n        return func\n\n    AES_set_decrypt_key = F(c_int, \"AES_set_decrypt_key\", [c_char_p, c_int, AES_KEY_p])\n    AES_ecb_encrypt = F(None, \"AES_ecb_encrypt\", [c_char_p, c_char_p, AES_KEY_p, c_int])\n\n    class AES:\n        def __init__(self, userkey) -> None:\n            self._blocksize = len(userkey)\n            if self._blocksize not in [16, 24, 32]:\n                raise ENCRYPTIONError(_(\"AES improper key used\"))\n            key = self._key = AES_KEY()\n            rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key)\n            if rv < 0:\n                raise ENCRYPTIONError(_(\"Failed to initialize AES key\"))\n\n        def decrypt(self, data):\n            clear = b\"\"\n            for i in range(0, len(data), 16):\n                out = create_string_buffer(16)\n                rv = AES_ecb_encrypt(data[i : i + 16], out, self._key, 0)\n                if rv == 0:\n                    raise ENCRYPTIONError(_(\"AES decryption failed\"))\n                clear += out.raw\n            return clear\n\n    return AES\n\n\ndef _load_crypto_pycrypto():\n    from Crypto.Cipher import AES as _AES\n\n    class AES:\n        def __init__(self, key) -> None:\n            self._aes = _AES.new(key, _AES.MODE_ECB)\n\n        def decrypt(self, data):\n            return self._aes.decrypt(data)\n\n    return AES\n\n\ndef _load_crypto():\n    AES = None\n    cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto)\n    for loader in cryptolist:\n        with contextlib.suppress(ImportError, ENCRYPTIONError):\n            AES = loader()\n            break\n    return AES\n\n\nAES = _load_crypto()\n\n\n# Wrap a stream so that output gets flushed immediately\n# and also make sure that any unicode strings get\n# encoded using \"replace\" before writing them.\nclass SafeUnbuffered:\n    def __init__(self, stream) -> None:\n        self.stream = stream\n        self.encoding = stream.encoding\n        if self.encoding is None:\n            self.encoding = \"utf-8\"\n\n    def write(self, data):\n        if isinstance(data, str):\n            data = data.encode(self.encoding, \"replace\")\n        self.stream.buffer.write(data)\n        self.stream.buffer.flush()\n\n    def __getattr__(self, attr):\n        return getattr(self.stream, attr)\n\n\nclass KoboLibrary:\n    \"\"\"The Kobo library.\n\n    This class represents all the information available from the data\n    written by the Kobo Desktop Edition application, including the list\n    of books, their titles, and the user's encryption key(s).\"\"\"\n\n    def __init__(self, serials=None, device_path=None, desktopkobodir=\"\") -> None:\n        if serials is None:\n            serials = []\n        print(__about__)\n        self.kobodir = \"\"\n        kobodb = \"\"\n\n        # Order of checks\n        # 1. first check if a device_path has been passed in, and whether\n        #    we can find the sqlite db in the respective place\n        # 2. if 1., and we got some serials passed in (from saved\n        #    settings in calibre), just use it\n        # 3. if 1. worked, but we didn't get serials, try to parse them\n        #    from the device, if this didn't work, unset everything\n        # 4. if by now we don't have kobodir set, give up on device and\n        #    try to use the Desktop app.\n\n        # step 1. check whether this looks like a real device\n        if device_path:\n            # we got a device path\n            self.kobodir = os.path.join(device_path, \".kobo\")\n            # devices use KoboReader.sqlite\n            kobodb = os.path.join(self.kobodir, \"KoboReader.sqlite\")\n            if not os.path.isfile(kobodb):\n                # device path seems to be wrong, unset it\n                device_path = \"\"\n                self.kobodir = \"\"\n                kobodb = \"\"\n\n        # step 3. we found a device but didn't get serials, try to get them\n        #\n        # we got a device path but no saved serial\n        # try to get the serial from the device\n        # get serial from device_path/.adobe-digital-editions/device.xml\n        if self.kobodir and len(serials) == 0 and can_parse_xml:\n            # print \"get_device_settings - device_path = {0}\".format(device_path)\n            devicexml = os.path.join(\n                device_path,\n                \".adobe-digital-editions\",\n                \"device.xml\",\n            )\n            # print \"trying to load {0}\".format(devicexml)\n            if os.path.exists(devicexml):\n                # print \"trying to parse {0}\".format(devicexml)\n                xmltree = ET.parse(devicexml)\n                for node in xmltree.iter():\n                    if \"deviceSerial\" in node.tag:\n                        serial = node.text\n                        # print \"found serial {0}\".format(serial)\n                        serials.append(serial)\n                        break\n            else:\n                # print \"cannot get serials from device.\"\n                device_path = \"\"\n                self.kobodir = \"\"\n                kobodb = \"\"\n\n        if self.kobodir == \"\":\n            # step 4. we haven't found a device with serials, so try desktop apps\n            if desktopkobodir != \"\":\n                self.kobodir = desktopkobodir\n\n            if self.kobodir == \"\":\n                if sys.platform.startswith(\"win\"):\n                    import winreg\n\n                    if (\n                        sys.getwindowsversion().major > 5\n                        and \"LOCALAPPDATA\" in os.environ\n                    ):\n                        # Python 2.x does not return unicode env. Use Python 3.x\n                        self.kobodir = winreg.ExpandEnvironmentStrings(\"%LOCALAPPDATA%\")\n                    if self.kobodir == \"\" and \"USERPROFILE\" in os.environ:\n                        # Python 2.x does not return unicode env. Use Python 3.x\n                        self.kobodir = os.path.join(\n                            winreg.ExpandEnvironmentStrings(\"%USERPROFILE%\"),\n                            \"Local Settings\",\n                            \"Application Data\",\n                        )\n                    self.kobodir = os.path.join(\n                        self.kobodir,\n                        \"Kobo\",\n                        \"Kobo Desktop Edition\",\n                    )\n                elif sys.platform.startswith(\"darwin\"):\n                    self.kobodir = os.path.join(\n                        os.environ[\"HOME\"],\n                        \"Library\",\n                        \"Application Support\",\n                        \"Kobo\",\n                        \"Kobo Desktop Edition\",\n                    )\n                elif sys.platform.startswith(\"linux\"):\n                    # sets ~/.config/calibre as the location to store the kobodir location info file and creates this directory if necessary\n                    kobodir_cache_dir = os.path.join(\n                        os.environ[\"HOME\"],\n                        \".config\",\n                        \"calibre\",\n                    )\n                    if not os.path.isdir(kobodir_cache_dir):\n                        os.mkdir(kobodir_cache_dir)\n\n                    # appends the name of the file we're storing the kobodir location info to the above path\n                    kobodir_cache_file = f\"{str(kobodir_cache_dir)}/kobo_location\"\n\n                    \"\"\"if the above file does not exist, recursively searches from the root\n                    of the filesystem until kobodir is found and stores the location of kobodir\n                    in that file so this loop can be skipped in the future\"\"\"\n                    original_stdout = sys.stdout\n                    if not os.path.isfile(kobodir_cache_file):\n                        for root, _dirs, files in os.walk(\"/\"):\n                            for file in files:\n                                if file == \"Kobo.sqlite\":\n                                    kobo_linux_path = str(root)\n                                    with open(\n                                        kobodir_cache_file,\n                                        \"w\",\n                                        encoding=\"utf-8\",\n                                    ) as f:\n                                        sys.stdout = f\n                                        print(kobo_linux_path, end=\"\")\n                                        sys.stdout = original_stdout\n\n                    f = open(kobodir_cache_file, encoding=\"utf-8\")\n                    self.kobodir = f.read()\n\n            # desktop versions use Kobo.sqlite\n            kobodb = os.path.join(self.kobodir, \"Kobo.sqlite\")\n            # check for existence of file\n            if not os.path.isfile(kobodb):\n                # give up here, we haven't found anything useful\n                self.kobodir = \"\"\n                kobodb = \"\"\n\n        if self.kobodir != \"\":\n            self.bookdir = os.path.join(self.kobodir, \"kepub\")\n            # make a copy of the database in a temporary file\n            # so we can ensure it's not using WAL logging which sqlite3 can't do.\n            self.newdb = tempfile.NamedTemporaryFile(mode=\"wb\", delete=False)\n            print(self.newdb.name)\n            with open(kobodb, \"rb\") as olddb:\n                self.newdb.write(olddb.read(18))\n                self.newdb.write(b\"\\x01\\x01\")\n                olddb.read(2)\n                self.newdb.write(olddb.read())\n            self.newdb.close()\n            self.__sqlite = sqlite3.connect(self.newdb.name)\n            self.__cursor = self.__sqlite.cursor()\n            self._userkeys = []\n            self._books = []\n            self._volumeID = []\n            self._serials = serials\n\n    def close(self):\n        \"\"\"Closes the database used by the library.\"\"\"\n        self.__cursor.close()\n        self.__sqlite.close()\n        # delete the temporary copy of the database\n        os.remove(self.newdb.name)\n\n    @property\n    def userkeys(self):\n        \"\"\"The list of potential userkeys being used by this library.\n        Only one of these will be valid.\n        \"\"\"\n        if len(self._userkeys) != 0:\n            return self._userkeys\n        for macaddr in self.__getmacaddrs():\n            self._userkeys.extend(self.__getuserkeys(macaddr))\n        return self._userkeys\n\n    @property\n    def books(self):\n        \"\"\"The list of KoboBook objects in the library.\"\"\"\n        if len(self._books) != 0:\n            return self._books\n        \"\"\"Drm-ed kepub\"\"\"\n        for row in self.__cursor.execute(\n            \"SELECT DISTINCT volumeid, Title, Attribution, Series FROM content_keys, content WHERE contentid = volumeid\",\n        ):\n            self._books.append(\n                KoboBook(\n                    row[0],\n                    row[1],\n                    self.__bookfile(row[0]),\n                    \"kepub\",\n                    self.__cursor,\n                    author=row[2],\n                    series=row[3],\n                ),\n            )\n            self._volumeID.append(row[0])\n        \"\"\"Drm-free\"\"\"\n        for f in os.listdir(self.bookdir):\n            if f not in self._volumeID:\n                row = self.__cursor.execute(\n                    \"SELECT Title, Attribution, Series FROM content WHERE ContentID = '\"\n                    + f\n                    + \"'\",\n                ).fetchone()\n                if row is not None:\n                    fTitle = row[0]\n                    self._books.append(\n                        KoboBook(\n                            f,\n                            fTitle,\n                            self.__bookfile(f),\n                            \"drm-free\",\n                            self.__cursor,\n                            author=row[1],\n                            series=row[2],\n                        ),\n                    )\n                    self._volumeID.append(f)\n        \"\"\"Sort\"\"\"\n        self._books.sort(key=lambda x: x.title)\n        return self._books\n\n    def __bookfile(self, volumeid):\n        \"\"\"The filename needed to open a given book.\"\"\"\n        return os.path.join(self.kobodir, \"kepub\", volumeid)\n\n    def __getmacaddrs(self):\n        \"\"\"The list of all MAC addresses on this machine.\"\"\"\n        macaddrs = []\n        if sys.platform.startswith(\"win\"):\n            c = re.compile(\n                \"\\\\s?(\" + \"[0-9a-f]{2}[:\\\\-]\" * 5 + \"[0-9a-f]{2})(\\\\s|$)\",\n                re.IGNORECASE,\n            )\n            output = subprocess.Popen(\n                \"wmic nic where PhysicalAdapter=True get MACAddress\",\n                shell=True,\n                stdout=subprocess.PIPE,\n                text=True,\n            ).stdout\n            for line in output:\n                if m := c.search(line):\n                    macaddrs.append(re.sub(\"-\", \":\", m[1]).upper())\n        elif sys.platform.startswith(\"darwin\"):\n            c = re.compile(\n                \"\\\\s(\" + \"[0-9a-f]{2}:\" * 5 + \"[0-9a-f]{2})(\\\\s|$)\",\n                re.IGNORECASE,\n            )\n            output = subprocess.check_output(\n                \"/sbin/ifconfig -a\",\n                shell=True,\n                encoding=\"utf-8\",\n            )\n            matches = c.findall(output)\n            macaddrs.extend(m[0].upper() for m in matches)\n        else:\n            # probably linux\n\n            # let's try ip\n            c = re.compile(\n                \"\\\\s(\" + \"[0-9a-f]{2}:\" * 5 + \"[0-9a-f]{2})(\\\\s|$)\",\n                re.IGNORECASE,\n            )\n            for line in os.popen(\"ip -br link\"):\n                if m := c.search(line):\n                    macaddrs.append(m[1].upper())\n\n            # let's try ipconfig under wine\n            c = re.compile(\n                \"\\\\s(\" + \"[0-9a-f]{2}-\" * 5 + \"[0-9a-f]{2})(\\\\s|$)\",\n                re.IGNORECASE,\n            )\n            for line in os.popen(\"ipconfig /all\"):\n                if m := c.search(line):\n                    macaddrs.append(re.sub(\"-\", \":\", m[1]).upper())\n\n        # extend the list of macaddrs in any case with the serials\n        # cannot hurt ;-)\n        macaddrs.extend(self._serials)\n\n        return macaddrs\n\n    def __getuserids(self):\n        userids = []\n        cursor = self.__cursor.execute(\"SELECT UserID FROM user\")\n        row = cursor.fetchone()\n        while row is not None:\n            with contextlib.suppress(Exception):\n                userid = row[0]\n                userids.append(userid)\n            row = cursor.fetchone()\n        return userids\n\n    def __getuserkeys(self, macaddr):\n        userids = self.__getuserids()\n        userkeys = []\n        for hash in KOBO_HASH_KEYS:\n            deviceid = hashlib.sha256((hash + macaddr).encode(\"ascii\")).hexdigest()\n            for userid in userids:\n                userkey = hashlib.sha256(\n                    (deviceid + userid).encode(\"ascii\"),\n                ).hexdigest()\n                userkeys.append(binascii.a2b_hex(userkey[32:]))\n        return userkeys\n\n\nclass KoboBook:\n    \"\"\"A Kobo book.\n\n    A Kobo book contains a number of unencrypted and encrypted files.\n    This class provides a list of the encrypted files.\n\n    Each book has the following instance variables:\n    volumeid - a UUID which uniquely refers to the book in this library.\n    title - the human-readable book title.\n    filename - the complete path and filename of the book.\n    type - either kepub or drm-free\"\"\"\n\n    def __init__(\n        self,\n        volumeid,\n        title,\n        filename,\n        type,\n        cursor,\n        author=None,\n        series=None,\n    ) -> None:\n        self.volumeid = volumeid\n        self.title = title\n        self.author = author\n        self.series = series\n        self.series_index = None\n        self.filename = filename\n        self.type = type\n        self.__cursor = cursor\n        self._encryptedfiles = {}\n\n    @property\n    def encryptedfiles(self):\n        \"\"\"A dictionary of KoboFiles inside the book.\n\n        The dictionary keys are the relative pathnames, which are\n        the same as the pathnames inside the book 'zip' file.\"\"\"\n        if self.type == \"drm-free\":\n            return self._encryptedfiles\n        if len(self._encryptedfiles) != 0:\n            return self._encryptedfiles\n        # Read the list of encrypted files from the DB\n        for row in self.__cursor.execute(\n            \"SELECT elementid,elementkey FROM content_keys,content WHERE volumeid = ? AND volumeid = contentid\",\n            (self.volumeid,),\n        ):\n            self._encryptedfiles[row[0]] = KoboFile(\n                row[0],\n                None,\n                base64.b64decode(row[1]),\n            )\n\n        # Read the list of files from the kepub OPF manifest so that\n        # we can get their proper MIME type.\n        # NOTE: this requires that the OPF file is unencrypted!\n        zin = zipfile.ZipFile(self.filename, \"r\")\n        xmlns = {\n            \"ocf\": \"urn:oasis:names:tc:opendocument:xmlns:container\",\n            \"opf\": \"http://www.idpf.org/2007/opf\",\n        }\n        ocf = ET.fromstring(zin.read(\"META-INF/container.xml\"))\n        opffile = ocf.find(\".//ocf:rootfile\", xmlns).attrib[\"full-path\"]\n        basedir = re.sub(\"[^/]+$\", \"\", opffile)\n        opf = ET.fromstring(zin.read(opffile))\n        zin.close()\n\n        c = re.compile(\"/\")\n        for item in opf.findall(\".//opf:item\", xmlns):\n            # Convert relative URIs\n            href = item.attrib[\"href\"]\n            if not c.match(href):\n                href = \"\".join((basedir, href))\n\n            # Update books we've found from the DB.\n            if href in self._encryptedfiles:\n                mimetype = item.attrib[\"media-type\"]\n                self._encryptedfiles[href].mimetype = mimetype\n        return self._encryptedfiles\n\n    @property\n    def has_drm(self):\n        return self.type != \"drm-free\"\n\n\nclass KoboFile:\n    \"\"\"An encrypted file in a KoboBook.\n\n    Each file has the following instance variables:\n    filename - the relative pathname inside the book zip file.\n    mimetype - the file's MIME type, e.g. 'image/jpeg'\n    key - the encrypted page key.\"\"\"\n\n    def __init__(self, filename, mimetype, key) -> None:\n        self.filename = filename\n        self.mimetype = mimetype\n        self.key = key\n\n    def decrypt(self, userkey, contents):\n        \"\"\"\n        Decrypt the contents using the provided user key and the\n        file page key. The caller must determine if the decrypted\n        data is correct.\"\"\"\n        # The userkey decrypts the page key (self.key)\n        keyenc = AES(userkey)\n        decryptedkey = keyenc.decrypt(self.key)\n        # The decrypted page key decrypts the content\n        pageenc = AES(decryptedkey)\n        return self.__removeaespadding(pageenc.decrypt(contents))\n\n    def check(self, contents):\n        \"\"\"\n        If the contents uses some known MIME types, check if it\n        conforms to the type. Throw a ValueError exception if not.\n        If the contents uses an uncheckable MIME type, don't check\n        it and don't throw an exception.\n        Returns True if the content was checked, False if it was not\n        checked.\"\"\"\n        if self.mimetype == \"application/xhtml+xml\":\n            # assume utf-8 with no BOM\n            textoffset = 0\n            stride = 1\n            print(f\"Checking text:{contents[:10]}:\")\n            # check for byte order mark\n            if contents[:3] == b\"\\xef\\xbb\\xbf\":\n                # seems to be utf-8 with BOM\n                print(\"Could be utf-8 with BOM\")\n                textoffset = 3\n            elif contents[:2] == b\"\\xfe\\xff\":\n                # seems to be utf-16BE\n                print(\"Could be  utf-16BE\")\n                textoffset = 3\n                stride = 2\n            elif contents[:2] == b\"\\xff\\xfe\":\n                # seems to be utf-16LE\n                print(\"Could be  utf-16LE\")\n                textoffset = 2\n                stride = 2\n            else:\n                print(\"Perhaps utf-8 without BOM\")\n\n            # now check that the first few characters are in the ASCII range\n            for i in range(textoffset, textoffset + 5 * stride, stride):\n                if contents[i] < 32 or contents[i] > 127:\n                    # Non-ascii, so decryption probably failed\n                    print(f\"Bad character at {i}, value {contents[i]}\")\n                    raise ValueError\n            print(\"Seems to be good text\")\n            return True\n        if self.mimetype == \"image/jpeg\":\n            if contents[:3] == b\"\\xff\\xd8\\xff\":\n                return True\n            print(f\"Bad JPEG: {contents[:3].hex()}\")\n            raise ValueError\n        return False\n\n    def __removeaespadding(self, contents):\n        \"\"\"\n        Remove the trailing padding, using what appears to be the CMS\n        algorithm from RFC 5652 6.3\"\"\"\n        lastchar = binascii.b2a_hex(contents[-1:])\n        strlen = int(lastchar, 16)\n        padding = strlen\n        if strlen == 1:\n            return contents[:-1]\n        if strlen < 16:\n            for _ in range(strlen):\n                testchar = binascii.b2a_hex(contents[-strlen : -(strlen - 1)])\n                if testchar != lastchar:\n                    padding = 0\n        if padding > 0:\n            contents = contents[:-padding]\n        return contents\n\n\ndef decrypt_book(book, lib):\n    print(f\"Converting {book.title}\")\n    zin = zipfile.ZipFile(book.filename, \"r\")\n    # make filename out of Unicode alphanumeric and whitespace equivalents from title\n    outname = \"{}.epub\".format(re.sub(\"[^\\\\s\\\\w]\", \"_\", book.title, 0, re.UNICODE))\n    if book.type == \"drm-free\":\n        print(\"DRM-free book, conversion is not needed\")\n        shutil.copyfile(book.filename, outname)\n        print(f\"Book saved as {os.path.join(os.getcwd(), outname)}\")\n        return os.path.join(os.getcwd(), outname)\n    for userkey in lib.userkeys:\n        print(f\"Trying key: {userkey.hex()}\")\n        try:\n            zout = zipfile.ZipFile(outname, \"w\", zipfile.ZIP_DEFLATED)\n            for filename in zin.namelist():\n                contents = zin.read(filename)\n                if filename in book.encryptedfiles:\n                    file = book.encryptedfiles[filename]\n                    contents = file.decrypt(userkey, contents)\n                    # Parse failures mean the key is probably wrong.\n                    file.check(contents)\n                zout.writestr(filename, contents)\n            zout.close()\n            print(\"Decryption succeeded.\")\n            print(f\"Book saved as {os.path.join(os.getcwd(), outname)}\")\n            break\n        except ValueError:\n            print(\"Decryption failed.\")\n            zout.close()\n            os.remove(outname)\n    zin.close()\n    return os.path.join(os.getcwd(), outname)\n\n\ndef cli_main(devicedir):\n    serials = []\n\n    lib = KoboLibrary(serials, devicedir)\n\n    for i, book in enumerate(lib.books):\n        print(f\"{i + 1}: {book.title}\")\n\n    choice = input(\"Convert book number... \")\n    try:\n        num = int(choice)\n        books = [lib.books[num - 1]]\n    except (ValueError, IndexError):\n        print(\"Invalid choice. Exiting...\")\n        sys.exit()\n\n    results = [decrypt_book(book, lib) for book in books]\n    lib.close()\n    return results[0]\n\n\nif __name__ == \"__main__\":\n    sys.stdout = SafeUnbuffered(sys.stdout)\n    sys.stderr = SafeUnbuffered(sys.stderr)\n    sys.exit(cli_main())\n"
  },
  {
    "path": "book_maker/translator/__init__.py",
    "content": "from book_maker.translator.caiyun_translator import Caiyun\nfrom book_maker.translator.chatgptapi_translator import ChatGPTAPI\nfrom book_maker.translator.deepl_translator import DeepL\nfrom book_maker.translator.deepl_free_translator import DeepLFree\nfrom book_maker.translator.google_translator import Google\nfrom book_maker.translator.claude_translator import Claude\nfrom book_maker.translator.gemini_translator import Gemini\nfrom book_maker.translator.groq_translator import GroqClient\nfrom book_maker.translator.tencent_transmart_translator import TencentTranSmart\nfrom book_maker.translator.custom_api_translator import CustomAPI\nfrom book_maker.translator.xai_translator import XAIClient\nfrom book_maker.translator.qwen_translator import QwenTranslator\n\nMODEL_DICT = {\n    \"openai\": ChatGPTAPI,\n    \"chatgptapi\": ChatGPTAPI,\n    \"gpt4\": ChatGPTAPI,\n    \"gpt4omini\": ChatGPTAPI,\n    \"gpt4o\": ChatGPTAPI,\n    \"gpt5mini\": ChatGPTAPI,\n    \"o1preview\": ChatGPTAPI,\n    \"o1\": ChatGPTAPI,\n    \"o1mini\": ChatGPTAPI,\n    \"o3mini\": ChatGPTAPI,\n    \"google\": Google,\n    \"caiyun\": Caiyun,\n    \"deepl\": DeepL,\n    \"deeplfree\": DeepLFree,\n    \"claude\": Claude,\n    \"claude-sonnet-4-6\": Claude,\n    \"claude-opus-4-6\": Claude,\n    \"claude-opus-4-5-20251101\": Claude,\n    \"claude-haiku-4-5-20251001\": Claude,\n    \"claude-sonnet-4-5-20250929\": Claude,\n    \"claude-opus-4-1-20250805\": Claude,\n    \"claude-opus-4-20250514\": Claude,\n    \"claude-sonnet-4-20250514\": Claude,\n    \"gemini\": Gemini,\n    \"geminipro\": Gemini,\n    \"groq\": GroqClient,\n    \"tencentransmart\": TencentTranSmart,\n    \"customapi\": CustomAPI,\n    \"xai\": XAIClient,\n    \"qwen\": QwenTranslator,\n    \"qwen-mt-turbo\": QwenTranslator,\n    \"qwen-mt-plus\": QwenTranslator,\n    # add more here\n}\n"
  },
  {
    "path": "book_maker/translator/base_translator.py",
    "content": "import itertools\nfrom abc import ABC, abstractmethod\n\n\nclass Base(ABC):\n    def __init__(self, key, language) -> None:\n        self.keys = itertools.cycle(key.split(\",\"))\n        self.language = language\n\n    @abstractmethod\n    def rotate_key(self):\n        pass\n\n    @abstractmethod\n    def translate(self, text):\n        pass\n\n    def set_deployment_id(self, deployment_id):\n        pass\n"
  },
  {
    "path": "book_maker/translator/caiyun_translator.py",
    "content": "import json\nimport re\nimport time\n\nimport requests\nfrom rich import print\n\nfrom .base_translator import Base\n\n\nclass Caiyun(Base):\n    \"\"\"\n    caiyun translator\n    \"\"\"\n\n    def __init__(self, key, language, **kwargs) -> None:\n        super().__init__(key, language)\n        self.api_url = \"https://api.interpreter.caiyunai.com/v1/translator\"\n        self.headers = {\n            \"content-type\": \"application/json\",\n            \"x-authorization\": f\"token {key}\",\n        }\n        # caiyun api only supports: zh2en, zh2ja, en2zh, ja2zh\n        self.translate_type = \"auto2zh\"\n        if self.language == \"english\":\n            self.translate_type = \"auto2en\"\n        elif self.language == \"japanese\":\n            self.translate_type = \"auto2ja\"\n\n    def rotate_key(self):\n        pass\n\n    def translate(self, text):\n        print(text)\n        # for caiyun translate src issue #279\n        text_list = text.splitlines()\n        num = None\n        if len(text_list) > 1:\n            if text_list[0].isdigit():\n                num = text_list[0]\n        payload = {\n            \"source\": text,\n            \"trans_type\": self.translate_type,\n            \"request_id\": \"demo\",\n            \"detect\": True,\n        }\n        response = requests.request(\n            \"POST\",\n            self.api_url,\n            data=json.dumps(payload),\n            headers=self.headers,\n        )\n        try:\n            t_text = response.json()[\"target\"]\n        except Exception as e:\n            print(str(e), response.text, \"will sleep 60s for the time limit\")\n            if \"limit\" in response.json()[\"message\"]:\n                print(\"will sleep 60s for the time limit\")\n            time.sleep(60)\n            response = requests.request(\n                \"POST\",\n                self.api_url,\n                data=json.dumps(payload),\n                headers=self.headers,\n            )\n            t_text = response.json()[\"target\"]\n\n        print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n        # for issue #279\n        if num:\n            t_text = str(num) + \"\\n\" + t_text\n        return t_text\n"
  },
  {
    "path": "book_maker/translator/chatgptapi_translator.py",
    "content": "import re\nimport time\nimport os\nimport shutil\nfrom copy import copy\nfrom os import environ\nfrom itertools import cycle\nimport json\nfrom threading import Lock\n\nfrom openai import AzureOpenAI, OpenAI, RateLimitError\nfrom rich import print\n\nfrom .base_translator import Base\nfrom ..config import config\n\nCHATGPT_CONFIG = config[\"translator\"][\"chatgptapi\"]\n\nPROMPT_ENV_MAP = {\n    \"user\": \"BBM_CHATGPTAPI_USER_MSG_TEMPLATE\",\n    \"system\": \"BBM_CHATGPTAPI_SYS_MSG\",\n}\n\nGPT35_MODEL_LIST = [\n    \"gpt-3.5-turbo\",\n    \"gpt-3.5-turbo-1106\",\n    \"gpt-3.5-turbo-16k\",\n    \"gpt-3.5-turbo-0613\",\n    \"gpt-3.5-turbo-16k-0613\",\n    \"gpt-3.5-turbo-0301\",\n    \"gpt-3.5-turbo-0125\",\n]\nGPT4_MODEL_LIST = [\n    \"gpt-4-1106-preview\",\n    \"gpt-4\",\n    \"gpt-4-32k\",\n    \"gpt-4o-2024-05-13\",\n    \"gpt-4-0613\",\n    \"gpt-4-32k-0613\",\n]\n\nGPT4oMINI_MODEL_LIST = [\n    \"gpt-4o-mini\",\n    \"gpt-4o-mini-2024-07-18\",\n]\nGPT4o_MODEL_LIST = [\n    \"gpt-4o\",\n    \"gpt-4o-2024-05-13\",\n    \"gpt-4o-2024-08-06\",\n    \"chatgpt-4o-latest\",\n]\nGPT5MINI_MODEL_LIST = [\n    \"gpt-5-mini\",\n]\nO1PREVIEW_MODEL_LIST = [\n    \"o1-preview\",\n    \"o1-preview-2024-09-12\",\n]\nO1_MODEL_LIST = [\n    \"o1\",\n    \"o1-2024-12-17\",\n]\nO1MINI_MODEL_LIST = [\n    \"o1-mini\",\n    \"o1-mini-2024-09-12\",\n]\nO3MINI_MODEL_LIST = [\n    \"o3-mini\",\n]\n\n\nclass ChatGPTAPI(Base):\n    DEFAULT_PROMPT = \"Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text\"\n\n    def __init__(\n        self,\n        key,\n        language,\n        api_base=None,\n        prompt_template=None,\n        prompt_sys_msg=None,\n        temperature=1.0,\n        context_flag=False,\n        context_paragraph_limit=0,\n        **kwargs,\n    ) -> None:\n        super().__init__(key, language)\n        self.key_len = len(key.split(\",\"))\n        self.openai_client = OpenAI(api_key=next(self.keys), base_url=api_base)\n        self.api_base = api_base\n\n        self.prompt_template = (\n            prompt_template\n            or environ.get(PROMPT_ENV_MAP[\"user\"])\n            or self.DEFAULT_PROMPT\n        )\n        self.prompt_sys_msg = (\n            prompt_sys_msg\n            or environ.get(\n                \"OPENAI_API_SYS_MSG\",\n            )  # XXX: for backward compatibility, deprecate soon\n            or environ.get(PROMPT_ENV_MAP[\"system\"])\n            or \"\"\n        )\n        self.system_content = environ.get(\"OPENAI_API_SYS_MSG\") or \"\"\n        self.deployment_id = None\n        self.temperature = temperature\n        self.model_list = None\n        self.context_flag = context_flag\n        self.context_list = []\n        self.context_translated_list = []\n        if context_paragraph_limit > 0:\n            # not set by user, use default\n            self.context_paragraph_limit = context_paragraph_limit\n        else:\n            # set by user, use user's value\n            self.context_paragraph_limit = CHATGPT_CONFIG[\"context_paragraph_limit\"]\n        self.batch_text_list = []\n        self.batch_info_cache = None\n        self.result_content_cache = {}\n        self._api_lock = Lock()\n\n    def rotate_key(self):\n        with self._api_lock:\n            self.openai_client.api_key = next(self.keys)\n\n    def rotate_model(self):\n        with self._api_lock:\n            if self.model_list:\n                self.model = next(self.model_list)\n\n    def create_messages(self, text, intermediate_messages=None):\n        content = self.prompt_template.format(\n            text=text, language=self.language, crlf=\"\\n\"\n        )\n\n        sys_content = self.system_content or self.prompt_sys_msg.format(crlf=\"\\n\")\n        messages = [\n            {\"role\": \"system\", \"content\": sys_content},\n        ]\n\n        if intermediate_messages:\n            messages.extend(intermediate_messages)\n\n        messages.append({\"role\": \"user\", \"content\": content})\n        return messages\n\n    def create_context_messages(self):\n        messages = []\n        if self.context_flag:\n            messages.append({\"role\": \"user\", \"content\": \"\\n\".join(self.context_list)})\n            messages.append(\n                {\n                    \"role\": \"assistant\",\n                    \"content\": \"\\n\".join(self.context_translated_list),\n                }\n            )\n        return messages\n\n    def create_chat_completion(self, text):\n        messages = self.create_messages(text, self.create_context_messages())\n        completion = self.openai_client.chat.completions.create(\n            model=self.model,\n            messages=messages,\n            temperature=self.temperature,\n        )\n        return completion\n\n    def get_translation(self, text):\n        self.rotate_key()\n        self.rotate_model()  # rotate all the model to avoid the limit\n\n        completion = self.create_chat_completion(text)\n\n        # TODO work well or exception finish by length limit\n        # Check if content is not None before encoding\n        if completion.choices[0].message.content is not None:\n            t_text = completion.choices[0].message.content.encode(\"utf8\").decode() or \"\"\n        else:\n            t_text = \"\"\n\n        if self.context_flag:\n            self.save_context(text, t_text)\n\n        return t_text\n\n    def save_context(self, text, t_text):\n        if self.context_paragraph_limit > 0:\n            self.context_list.append(text)\n            self.context_translated_list.append(t_text)\n            # Remove the oldest context\n            if len(self.context_list) > self.context_paragraph_limit:\n                self.context_list.pop(0)\n                self.context_translated_list.pop(0)\n\n    def translate(self, text, needprint=True):\n        start_time = time.time()\n        # todo: Determine whether to print according to the cli option\n        if needprint:\n            print(re.sub(\"\\n{3,}\", \"\\n\\n\", text))\n\n        attempt_count = 0\n        max_attempts = 3\n        t_text = \"\"\n\n        while attempt_count < max_attempts:\n            try:\n                t_text = self.get_translation(text)\n                break\n            except RateLimitError as e:\n                # todo: better sleep time? why sleep alawys about key_len\n                # 1. openai server error or own network interruption, sleep for a fixed time\n                # 2. an apikey has no money or reach limit, don`t sleep, just replace it with another apikey\n                # 3. all apikey reach limit, then use current sleep\n                sleep_time = int(60 / self.key_len)\n                print(e, f\"will sleep {sleep_time} seconds\")\n                time.sleep(sleep_time)\n                attempt_count += 1\n                if attempt_count == max_attempts:\n                    print(f\"Get {attempt_count} consecutive exceptions\")\n                    raise\n            except Exception as e:\n                print(str(e))\n                return\n\n        # todo: Determine whether to print according to the cli option\n        if needprint:\n            print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n\n        time.time() - start_time\n        # print(f\"translation time: {elapsed_time:.1f}s\")\n\n        return t_text\n\n    def translate_and_split_lines(self, text):\n        result_str = self.translate(text, False)\n        lines = result_str.splitlines()\n        lines = [line.strip() for line in lines if line.strip() != \"\"]\n        return lines\n\n    def log_retry(self, state, retry_count, elapsed_time, log_path=\"log/buglog.txt\"):\n        if retry_count == 0:\n            return\n        print(f\"retry {state}\")\n        with open(log_path, \"a\", encoding=\"utf-8\") as f:\n            print(\n                f\"retry {state}, count = {retry_count}, time = {elapsed_time:.1f}s\",\n                file=f,\n            )\n\n    def log_translation_mismatch(\n        self,\n        plist_len,\n        result_list,\n        new_str,\n        sep,\n        log_path=\"log/buglog.txt\",\n    ):\n        if len(result_list) == plist_len:\n            return\n        newlist = new_str.split(sep)\n        with open(log_path, \"a\", encoding=\"utf-8\") as f:\n            print(f\"problem size: {plist_len - len(result_list)}\", file=f)\n            for i in range(len(newlist)):\n                print(newlist[i], file=f)\n                print(file=f)\n                if i < len(result_list):\n                    print(\"............................................\", file=f)\n                    print(result_list[i], file=f)\n                    print(file=f)\n                print(\"=============================\", file=f)\n\n        print(\n            f\"bug: {plist_len} paragraphs of text translated into {len(result_list)} paragraphs\",\n        )\n        print(\"continue\")\n\n    def join_lines(self, text):\n        lines = text.splitlines()\n        new_lines = []\n        temp_line = []\n\n        # join\n        for line in lines:\n            if line.strip():\n                temp_line.append(line.strip())\n            else:\n                if temp_line:\n                    new_lines.append(\" \".join(temp_line))\n                    temp_line = []\n                new_lines.append(line)\n\n        if temp_line:\n            new_lines.append(\" \".join(temp_line))\n\n        text = \"\\n\".join(new_lines)\n        # try to fix #372\n        if not text:\n            return \"\"\n\n        # del ^M\n        text = text.replace(\"^M\", \"\\r\")\n        lines = text.splitlines()\n        filtered_lines = [line for line in lines if line.strip() != \"\\r\"]\n        new_text = \"\\n\".join(filtered_lines)\n\n        return new_text\n\n    def translate_list(self, plist):\n        plist_len = len(plist)\n\n        # Create a list of original texts and add clear numbering markers to each paragraph\n        formatted_text = \"\"\n        for i, p in enumerate(plist, 1):\n            temp_p = copy(p)\n            for sup in temp_p.find_all(\"sup\"):\n                sup.extract()\n            para_text = temp_p.get_text().strip()\n            # Using special delimiters and clear numbering\n            formatted_text += f\"PARAGRAPH {i}:\\n{para_text}\\n\\n\"\n\n        print(f\"plist len = {plist_len}\")\n\n        original_prompt_template = self.prompt_template\n\n        structured_prompt = (\n            f\"Translate the following {plist_len} paragraphs to {{language}}. \"\n            f\"CRUCIAL INSTRUCTION: Format your response using EXACTLY this structure:\\n\\n\"\n            f\"TRANSLATION OF PARAGRAPH 1:\\n[Your translation of paragraph 1 here]\\n\\n\"\n            f\"TRANSLATION OF PARAGRAPH 2:\\n[Your translation of paragraph 2 here]\\n\\n\"\n            f\"... and so on for all {plist_len} paragraphs.\\n\\n\"\n            f\"You MUST provide EXACTLY {plist_len} translated paragraphs. \"\n            f\"Do not merge, split, or rearrange paragraphs. \"\n            f\"Translate each paragraph independently but consistently. \"\n            f\"Keep all numbers and special formatting in your translation. \"\n            f\"Each original paragraph must correspond to exactly one translated paragraph.\"\n        )\n\n        self.prompt_template = structured_prompt + \" ```{text}```\"\n\n        translated_text = self.translate(formatted_text, False)\n\n        # Extract translations from structured output\n        translated_paragraphs = []\n        for i in range(1, plist_len + 1):\n            pattern = (\n                r\"TRANSLATION OF PARAGRAPH \"\n                + str(i)\n                + r\":(.*?)(?=TRANSLATION OF PARAGRAPH \\d+:|\\Z)\"\n            )\n            matches = re.findall(pattern, translated_text, re.DOTALL)\n\n            if matches:\n                translated_paragraph = matches[0].strip()\n                translated_paragraphs.append(translated_paragraph)\n            else:\n                print(f\"Warning: Could not find translation for paragraph {i}\")\n                loose_pattern = (\n                    r\"(?:TRANSLATION|PARAGRAPH|PARA).*?\"\n                    + str(i)\n                    + r\".*?:(.*?)(?=(?:TRANSLATION|PARAGRAPH|PARA).*?\\d+.*?:|\\Z)\"\n                )\n                loose_matches = re.findall(loose_pattern, translated_text, re.DOTALL)\n                if loose_matches:\n                    translated_paragraphs.append(loose_matches[0].strip())\n                else:\n                    translated_paragraphs.append(\"\")\n\n        self.prompt_template = original_prompt_template\n\n        # If the number of extracted paragraphs is incorrect, try the alternative extraction method.\n        if len(translated_paragraphs) != plist_len:\n            print(\n                f\"Warning: Extracted {len(translated_paragraphs)}/{plist_len} paragraphs. Using fallback extraction.\"\n            )\n\n            all_para_pattern = r\"(?:TRANSLATION|PARAGRAPH|PARA).*?(\\d+).*?:(.*?)(?=(?:TRANSLATION|PARAGRAPH|PARA).*?\\d+.*?:|\\Z)\"\n            all_matches = re.findall(all_para_pattern, translated_text, re.DOTALL)\n\n            if all_matches:\n                # Create a dictionary to map translation content based on paragraph numbers\n                para_dict = {}\n                for num_str, content in all_matches:\n                    try:\n                        num = int(num_str)\n                        if 1 <= num <= plist_len:\n                            para_dict[num] = content.strip()\n                    except ValueError:\n                        continue\n\n                # Rebuild the translation list in the original order\n                new_translated_paragraphs = []\n                for i in range(1, plist_len + 1):\n                    if i in para_dict:\n                        new_translated_paragraphs.append(para_dict[i])\n                    else:\n                        new_translated_paragraphs.append(\"\")\n\n                if len(new_translated_paragraphs) == plist_len:\n                    translated_paragraphs = new_translated_paragraphs\n\n        if len(translated_paragraphs) < plist_len:\n            translated_paragraphs.extend(\n                [\"\"] * (plist_len - len(translated_paragraphs))\n            )\n        elif len(translated_paragraphs) > plist_len:\n            translated_paragraphs = translated_paragraphs[:plist_len]\n\n        return translated_paragraphs\n\n    def extract_paragraphs(self, text, paragraph_count):\n        \"\"\"Extract paragraphs from translated text, ensuring paragraph count is preserved.\"\"\"\n        # First try to extract by paragraph numbers (1), (2), etc.\n        result_list = []\n        for i in range(1, paragraph_count + 1):\n            pattern = rf\"\\({i}\\)\\s*(.*?)(?=\\s*\\({i + 1}\\)|\\Z)\"\n            match = re.search(pattern, text, re.DOTALL)\n            if match:\n                result_list.append(match.group(1).strip())\n\n        # If exact pattern matching failed, try another approach\n        if len(result_list) != paragraph_count:\n            pattern = r\"\\((\\d+)\\)\\s*(.*?)(?=\\s*\\(\\d+\\)|\\Z)\"\n            matches = re.findall(pattern, text, re.DOTALL)\n            if matches:\n                # Sort by paragraph number\n                matches.sort(key=lambda x: int(x[0]))\n                result_list = [match[1].strip() for match in matches]\n\n        # Fallback to original line-splitting approach\n        if len(result_list) != paragraph_count:\n            lines = text.splitlines()\n            result_list = [line.strip() for line in lines if line.strip() != \"\"]\n\n        return result_list\n\n    def set_deployment_id(self, deployment_id):\n        self.deployment_id = deployment_id\n        self.openai_client = AzureOpenAI(\n            api_key=next(self.keys),\n            azure_endpoint=self.api_base,\n            api_version=\"2023-07-01-preview\",\n            azure_deployment=self.deployment_id,\n        )\n\n    def set_gpt35_models(self, ollama_model=\"\"):\n        if ollama_model:\n            self.model_list = cycle([ollama_model])\n            return\n        # gpt3 all models for save the limit\n        if self.deployment_id:\n            self.model_list = cycle([\"gpt-35-turbo\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(GPT35_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_gpt4_models(self):\n        # for issue #375 azure can not use model list\n        if self.deployment_id:\n            self.model_list = cycle([\"gpt-4\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(GPT4_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_gpt4omini_models(self):\n        # for issue #375 azure can not use model list\n        if self.deployment_id:\n            self.model_list = cycle([\"gpt-4o-mini\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(GPT4oMINI_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_gpt4o_models(self):\n        # for issue #375 azure can not use model list\n        if self.deployment_id:\n            self.model_list = cycle([\"gpt-4o\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(GPT4o_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_gpt5mini_models(self):\n        # for issue #375 azure can not use model list\n        if self.deployment_id:\n            self.model_list = cycle([\"gpt-5-mini\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(GPT5MINI_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_o1preview_models(self):\n        # for issue #375 azure can not use model list\n        if self.deployment_id:\n            self.model_list = cycle([\"o1-preview\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(O1PREVIEW_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_o1_models(self):\n        # for issue #375 azure can not use model list\n        if self.deployment_id:\n            self.model_list = cycle([\"o1\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(O1_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_o1mini_models(self):\n        # for issue #375 azure can not use model list\n        if self.deployment_id:\n            self.model_list = cycle([\"o1-mini\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(O1MINI_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_o3mini_models(self):\n        # for issue #375 azure can not use model list\n        if self.deployment_id:\n            self.model_list = cycle([\"o3-mini\"])\n        else:\n            my_model_list = [\n                i[\"id\"] for i in self.openai_client.models.list().model_dump()[\"data\"]\n            ]\n            model_list = list(set(my_model_list) & set(O3MINI_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n\n    def set_model_list(self, model_list):\n        model_list = list(set(model_list))\n        print(f\"Using model list {model_list}\")\n        self.model_list = cycle(model_list)\n\n    def batch_init(self, book_name):\n        self.book_name = self.sanitize_book_name(book_name)\n\n    def add_to_batch_translate_queue(self, book_index, text):\n        self.batch_text_list.append({\"book_index\": book_index, \"text\": text})\n\n    def sanitize_book_name(self, book_name):\n        # Replace any characters that are not alphanumeric, underscore, hyphen, or dot with an underscore\n        sanitized_book_name = re.sub(r\"[^\\w\\-_\\.]\", \"_\", book_name)\n        # Remove leading and trailing underscores and dots\n        sanitized_book_name = sanitized_book_name.strip(\"._\")\n        return sanitized_book_name\n\n    def batch_metadata_file_path(self):\n        return os.path.join(os.getcwd(), \"batch_files\", f\"{self.book_name}_info.json\")\n\n    def batch_dir(self):\n        return os.path.join(os.getcwd(), \"batch_files\", self.book_name)\n\n    def custom_id(self, book_index):\n        return f\"{self.book_name}-{book_index}\"\n\n    def is_completed_batch(self):\n        batch_metadata_file_path = self.batch_metadata_file_path()\n\n        if not os.path.exists(batch_metadata_file_path):\n            print(\"Batch result file does not exist\")\n            raise Exception(\"Batch result file does not exist\")\n\n        with open(batch_metadata_file_path, \"r\", encoding=\"utf-8\") as f:\n            batch_info = json.load(f)\n\n        for batch_file in batch_info[\"batch_files\"]:\n            batch_status = self.check_batch_status(batch_file[\"batch_id\"])\n            if batch_status.status != \"completed\":\n                return False\n\n        return True\n\n    def batch_translate(self, book_index):\n        if self.batch_info_cache is None:\n            batch_metadata_file_path = self.batch_metadata_file_path()\n            with open(batch_metadata_file_path, \"r\", encoding=\"utf-8\") as f:\n                self.batch_info_cache = json.load(f)\n\n        batch_info = self.batch_info_cache\n        target_batch = None\n        for batch in batch_info[\"batch_files\"]:\n            if batch[\"start_index\"] <= book_index < batch[\"end_index\"]:\n                target_batch = batch\n                break\n\n        if not target_batch:\n            raise ValueError(f\"No batch found for book_index {book_index}\")\n\n        if target_batch[\"batch_id\"] in self.result_content_cache:\n            result_content = self.result_content_cache[target_batch[\"batch_id\"]]\n        else:\n            batch_status = self.check_batch_status(target_batch[\"batch_id\"])\n            if batch_status.output_file_id is None:\n                raise ValueError(f\"Batch {target_batch['batch_id']} is not completed\")\n            result_content = self.get_batch_result(batch_status.output_file_id)\n            self.result_content_cache[target_batch[\"batch_id\"]] = result_content\n\n        result_lines = result_content.text.split(\"\\n\")\n        custom_id = self.custom_id(book_index)\n        for line in result_lines:\n            if line.strip():\n                result = json.loads(line)\n                if result[\"custom_id\"] == custom_id:\n                    return result[\"response\"][\"body\"][\"choices\"][0][\"message\"][\n                        \"content\"\n                    ]\n\n        raise ValueError(f\"No result found for custom_id {custom_id}\")\n\n    def create_batch_context_messages(self, index):\n        messages = []\n        if self.context_flag:\n            if index % CHATGPT_CONFIG[\n                \"batch_context_update_interval\"\n            ] == 0 or not hasattr(self, \"cached_context_messages\"):\n                context_messages = []\n                for i in range(index - 1, -1, -1):\n                    item = self.batch_text_list[i]\n                    if len(item[\"text\"].split()) >= 100:\n                        context_messages.append(item[\"text\"])\n                        if len(context_messages) == self.context_paragraph_limit:\n                            break\n\n                if len(context_messages) == self.context_paragraph_limit:\n                    print(\"Creating cached context messages\")\n                    self.cached_context_messages = [\n                        {\"role\": \"user\", \"content\": \"\\n\".join(context_messages)},\n                        {\n                            \"role\": \"assistant\",\n                            \"content\": self.get_translation(\n                                \"\\n\".join(context_messages)\n                            ),\n                        },\n                    ]\n\n            if hasattr(self, \"cached_context_messages\"):\n                messages.extend(self.cached_context_messages)\n\n        return messages\n\n    def make_batch_request(self, book_index, text):\n        messages = self.create_messages(\n            text, self.create_batch_context_messages(book_index)\n        )\n        return {\n            \"custom_id\": self.custom_id(book_index),\n            \"method\": \"POST\",\n            \"url\": \"/v1/chat/completions\",\n            \"body\": {\n                # model shuould not be rotate\n                \"model\": self.batch_model,\n                \"messages\": messages,\n                \"temperature\": self.temperature,\n            },\n        }\n\n    def create_batch_files(self, dest_file_path):\n        file_paths = []\n        # max request 50,000 and max size 100MB\n        lines_per_file = 40000\n        current_file = 0\n\n        for i in range(0, len(self.batch_text_list), lines_per_file):\n            current_file += 1\n            file_path = os.path.join(dest_file_path, f\"{current_file}.jsonl\")\n            start_index = i\n            end_index = i + lines_per_file\n\n            # TODO: Split the file if it exceeds 100MB\n            with open(file_path, \"w\", encoding=\"utf-8\") as f:\n                for text in self.batch_text_list[i : i + lines_per_file]:\n                    batch_req = self.make_batch_request(\n                        text[\"book_index\"], text[\"text\"]\n                    )\n                    json.dump(batch_req, f, ensure_ascii=False)\n                    f.write(\"\\n\")\n            file_paths.append(\n                {\n                    \"file_path\": file_path,\n                    \"start_index\": start_index,\n                    \"end_index\": end_index,\n                }\n            )\n\n        return file_paths\n\n    def batch(self):\n        self.rotate_model()\n        self.batch_model = self.model\n        # current working directory\n        batch_dir = self.batch_dir()\n        batch_metadata_file_path = self.batch_metadata_file_path()\n        # cleanup batch dir and result file\n        if os.path.exists(batch_dir):\n            shutil.rmtree(batch_dir)\n        if os.path.exists(batch_metadata_file_path):\n            os.remove(batch_metadata_file_path)\n        os.makedirs(batch_dir, exist_ok=True)\n        # batch execute\n        batch_files = self.create_batch_files(batch_dir)\n        batch_info = []\n        for batch_file in batch_files:\n            file_id = self.upload_batch_file(batch_file[\"file_path\"])\n            batch = self.batch_execute(file_id)\n            batch_info.append(\n                self.create_batch_info(\n                    file_id, batch, batch_file[\"start_index\"], batch_file[\"end_index\"]\n                )\n            )\n        # save batch info\n        batch_info_json = {\n            \"book_id\": self.book_name,\n            \"batch_date\": time.strftime(\"%Y-%m-%d %H:%M:%S\"),\n            \"batch_files\": batch_info,\n        }\n        with open(batch_metadata_file_path, \"w\", encoding=\"utf-8\") as f:\n            json.dump(batch_info_json, f, ensure_ascii=False, indent=2)\n\n    def create_batch_info(self, file_id, batch, start_index, end_index):\n        return {\n            \"input_file_id\": file_id,\n            \"batch_id\": batch.id,\n            \"start_index\": start_index,\n            \"end_index\": end_index,\n            \"prefix\": self.book_name,\n        }\n\n    def upload_batch_file(self, file_path):\n        batch_input_file = self.openai_client.files.create(\n            file=open(file_path, \"rb\"), purpose=\"batch\"\n        )\n        return batch_input_file.id\n\n    def batch_execute(self, file_id):\n        current_time = time.strftime(\"%Y-%m-%d %H:%M:%S\")\n        res = self.openai_client.batches.create(\n            input_file_id=file_id,\n            endpoint=\"/v1/chat/completions\",\n            completion_window=\"24h\",\n            metadata={\n                \"description\": f\"Batch job for {self.book_name} at {current_time}\"\n            },\n        )\n        if res.errors:\n            print(res.errors)\n            raise Exception(f\"Batch execution failed: {res.errors}\")\n        return res\n\n    def check_batch_status(self, batch_id):\n        return self.openai_client.batches.retrieve(batch_id)\n\n    def get_batch_result(self, output_file_id):\n        return self.openai_client.files.content(output_file_id)\n"
  },
  {
    "path": "book_maker/translator/claude_translator.py",
    "content": "import re\nfrom rich import print\nfrom anthropic import Anthropic\n\nfrom .base_translator import Base\n\n\nclass Claude(Base):\n    def __init__(\n        self,\n        key,\n        language,\n        api_base=None,\n        prompt_template=None,\n        prompt_sys_msg=None,\n        temperature=1.0,\n        context_flag=False,\n        context_paragraph_limit=5,\n        **kwargs,\n    ) -> None:\n        super().__init__(key, language)\n        self.api_url = api_base or \"https://api.anthropic.com\"\n        self.client = Anthropic(base_url=api_base, api_key=key, timeout=20)\n        self.model = \"claude-haiku-4-5-20251001\"  # default it for now\n        self.language = language\n        self.prompt_template = (\n            prompt_template\n            or \"Help me translate the text within triple backticks into {language} and provide only the translated result.\\n```{text}```\"\n        )\n        self.prompt_sys_msg = prompt_sys_msg or \"\"\n        self.temperature = temperature\n        self.context_flag = context_flag\n        self.context_list = []\n        self.context_translated_list = []\n        self.context_paragraph_limit = context_paragraph_limit\n\n    def rotate_key(self):\n        pass\n\n    def set_claude_model(self, model_name):\n        self.model = model_name\n\n    def create_messages(self, text, intermediate_messages=None):\n        \"\"\"Create messages for the current translation request\"\"\"\n        current_msg = {\n            \"role\": \"user\",\n            \"content\": self.prompt_template.format(\n                text=text,\n                language=self.language,\n            ),\n        }\n\n        messages = []\n        if intermediate_messages:\n            messages.extend(intermediate_messages)\n        messages.append(current_msg)\n\n        return messages\n\n    def create_context_messages(self):\n        \"\"\"Create a message pair containing all context paragraphs\"\"\"\n        if not self.context_flag or not self.context_list:\n            return []\n\n        # Create a single message pair for all previous context\n        return [\n            {\n                \"role\": \"user\",\n                \"content\": self.prompt_template.format(\n                    text=\"\\n\\n\".join(self.context_list),\n                    language=self.language,\n                ),\n            },\n            {\"role\": \"assistant\", \"content\": \"\\n\\n\".join(self.context_translated_list)},\n        ]\n\n    def save_context(self, text, t_text):\n        \"\"\"Save the current translation pair to context\"\"\"\n        if not self.context_flag:\n            return\n\n        self.context_list.append(text)\n        self.context_translated_list.append(t_text)\n\n        # Keep only the most recent paragraphs within the limit\n        if len(self.context_list) > self.context_paragraph_limit:\n            self.context_list.pop(0)\n            self.context_translated_list.pop(0)\n\n    def translate(self, text):\n        print(text)\n        self.rotate_key()\n\n        # Create messages with context\n        messages = self.create_messages(text, self.create_context_messages())\n\n        r = self.client.messages.create(\n            max_tokens=4096,\n            messages=messages,\n            system=self.prompt_sys_msg,\n            temperature=self.temperature,\n            model=self.model,\n        )\n        t_text = r.content[0].text\n\n        if self.context_flag:\n            self.save_context(text, t_text)\n\n        print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n        return t_text\n"
  },
  {
    "path": "book_maker/translator/custom_api_translator.py",
    "content": "from .base_translator import Base\nimport re\nimport json\nimport requests\nimport time\nfrom rich import print\n\n\nclass CustomAPI(Base):\n    \"\"\"\n    Custom API translator\n    \"\"\"\n\n    def __init__(self, custom_api, language, **kwargs) -> None:\n        super().__init__(custom_api, language)\n        self.language = language\n        self.custom_api = custom_api\n\n    def rotate_key(self):\n        pass\n\n    def translate(self, text):\n        print(text)\n        custom_api = self.custom_api\n        data = {\"text\": text, \"source_lang\": \"auto\", \"target_lang\": self.language}\n        post_data = json.dumps(data)\n        r = requests.post(url=custom_api, data=post_data, timeout=10).text\n        t_text = json.loads(r)[\"data\"]\n        print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n        time.sleep(5)\n        return t_text\n"
  },
  {
    "path": "book_maker/translator/deepl_free_translator.py",
    "content": "import time\nimport random\nimport re\n\nfrom book_maker.utils import LANGUAGES, TO_LANGUAGE_CODE\n\nfrom .base_translator import Base\nfrom rich import print\nfrom PyDeepLX import PyDeepLX\n\n\nclass DeepLFree(Base):\n    \"\"\"\n    DeepL free translator\n    \"\"\"\n\n    def __init__(self, key, language, **kwargs) -> None:\n        super().__init__(key, language)\n        l = language if language in LANGUAGES else TO_LANGUAGE_CODE.get(language)\n        if l not in [\n            \"bg\",\n            \"zh\",\n            \"cs\",\n            \"da\",\n            \"nl\",\n            \"en-US\",\n            \"en-GB\",\n            \"et\",\n            \"fi\",\n            \"fr\",\n            \"de\",\n            \"el\",\n            \"hu\",\n            \"id\",\n            \"it\",\n            \"ja\",\n            \"lv\",\n            \"lt\",\n            \"pl\",\n            \"pt-PT\",\n            \"pt-BR\",\n            \"ro\",\n            \"ru\",\n            \"sk\",\n            \"sl\",\n            \"es\",\n            \"sv\",\n            \"tr\",\n            \"uk\",\n            \"ko\",\n            \"nb\",\n        ]:\n            raise Exception(f\"DeepL do not support {l}\")\n        self.language = l\n        self.time_random = [0.3, 0.5, 1, 1.3, 1.5, 2]\n\n    def rotate_key(self):\n        pass\n\n    def translate(self, text):\n        print(text)\n        t_text = str(PyDeepLX.translate(text, \"EN\", self.language))\n        # spider rule\n        time.sleep(random.choice(self.time_random))\n        print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n        return t_text\n"
  },
  {
    "path": "book_maker/translator/deepl_translator.py",
    "content": "import json\nimport time\n\nimport requests\nimport re\n\nfrom book_maker.utils import LANGUAGES, TO_LANGUAGE_CODE\n\nfrom .base_translator import Base\nfrom rich import print\n\n\nclass DeepL(Base):\n    \"\"\"\n    DeepL translator\n    \"\"\"\n\n    def __init__(self, key, language, **kwargs) -> None:\n        super().__init__(key, language)\n        self.api_url = \"https://dpl-translator.p.rapidapi.com/translate\"\n        self.headers = {\n            \"content-type\": \"application/json\",\n            \"X-RapidAPI-Key\": \"\",\n            \"X-RapidAPI-Host\": \"dpl-translator.p.rapidapi.com\",\n        }\n        l = language if language in LANGUAGES else TO_LANGUAGE_CODE.get(language)\n        if l not in [\n            \"bg\",\n            \"zh\",\n            \"cs\",\n            \"da\",\n            \"nl\",\n            \"en-US\",\n            \"en-GB\",\n            \"et\",\n            \"fi\",\n            \"fr\",\n            \"de\",\n            \"el\",\n            \"hu\",\n            \"id\",\n            \"it\",\n            \"ja\",\n            \"lv\",\n            \"lt\",\n            \"pl\",\n            \"pt-PT\",\n            \"pt-BR\",\n            \"ro\",\n            \"ru\",\n            \"sk\",\n            \"sl\",\n            \"es\",\n            \"sv\",\n            \"tr\",\n            \"uk\",\n            \"ko\",\n            \"nb\",\n        ]:\n            raise Exception(f\"DeepL do not support {l}\")\n        self.language = l\n\n    def rotate_key(self):\n        self.headers[\"X-RapidAPI-Key\"] = f\"{next(self.keys)}\"\n\n    def translate(self, text):\n        self.rotate_key()\n        print(text)\n        payload = {\"text\": text, \"source\": \"EN\", \"target\": self.language}\n        try:\n            response = requests.request(\n                \"POST\",\n                self.api_url,\n                data=json.dumps(payload),\n                headers=self.headers,\n            )\n        except Exception as e:\n            print(e)\n            time.sleep(30)\n            response = requests.request(\n                \"POST\",\n                self.api_url,\n                data=json.dumps(payload),\n                headers=self.headers,\n            )\n        t_text = response.json().get(\"text\", \"\")\n        print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n        return t_text\n"
  },
  {
    "path": "book_maker/translator/gemini_translator.py",
    "content": "import re\nimport time\nfrom os import environ\nfrom itertools import cycle\n\nimport google.generativeai as genai\nfrom google.generativeai.types.generation_types import (\n    StopCandidateException,\n    BlockedPromptException,\n)\nfrom rich import print\n\nfrom .base_translator import Base\n\ngeneration_config = {\n    \"temperature\": 1.0,\n    \"top_p\": 1,\n    \"top_k\": 1,\n    \"max_output_tokens\": 8192,\n}\n\nsafety_settings = {\n    \"HATE\": \"BLOCK_NONE\",\n    \"HARASSMENT\": \"BLOCK_NONE\",\n    \"SEXUAL\": \"BLOCK_NONE\",\n    \"DANGEROUS\": \"BLOCK_NONE\",\n}\n\nPROMPT_ENV_MAP = {\n    \"user\": \"BBM_GEMINIAPI_USER_MSG_TEMPLATE\",\n    \"system\": \"BBM_GEMINIAPI_SYS_MSG\",\n}\n\nGEMINIPRO_MODEL_LIST = [\n    \"gemini-1.5-pro\",\n    \"gemini-1.5-pro-latest\",\n    \"gemini-1.5-pro-001\",\n    \"gemini-1.5-pro-002\",\n]\n\nGEMINIFLASH_MODEL_LIST = [\n    \"gemini-1.5-flash\",\n    \"gemini-1.5-flash-latest\",\n    \"gemini-1.5-flash-001\",\n    \"gemini-1.5-flash-002\",\n    \"gemini-2.0-flash-exp\",\n    \"gemini-2.5-flash-preview-04-17\",\n]\n\n\nclass Gemini(Base):\n    \"\"\"\n    Google gemini translator\n    \"\"\"\n\n    DEFAULT_PROMPT = \"Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text\"\n\n    def __init__(\n        self,\n        key,\n        language,\n        prompt_template=None,\n        prompt_sys_msg=None,\n        context_flag=False,\n        temperature=1.0,\n        **kwargs,\n    ) -> None:\n        super().__init__(key, language)\n        self.context_flag = context_flag\n        self.prompt = (\n            prompt_template\n            or environ.get(PROMPT_ENV_MAP[\"user\"])\n            or self.DEFAULT_PROMPT\n        )\n        self.prompt_sys_msg = (\n            prompt_sys_msg\n            or environ.get(PROMPT_ENV_MAP[\"system\"])\n            or None  # Allow None, but not empty string\n        )\n        self.interval = 3\n        genai.configure(api_key=next(self.keys))\n        generation_config[\"temperature\"] = temperature\n\n    def create_convo(self):\n        model = genai.GenerativeModel(\n            model_name=self.model,\n            generation_config=generation_config,\n            safety_settings=safety_settings,\n            system_instruction=self.prompt_sys_msg,\n        )\n        self.convo = model.start_chat()\n        # print(model)  # Uncomment to debug and inspect the model details.\n\n    def rotate_model(self):\n        self.model = next(self.model_list)\n        self.create_convo()\n        print(f\"Using model {self.model}\")\n\n    def rotate_key(self):\n        genai.configure(api_key=next(self.keys))\n        self.create_convo()\n\n    def translate(self, text):\n        delay = 1\n        exponential_base = 2\n        attempt_count = 0\n        max_attempts = 7\n\n        t_text = \"\"\n        print(text)\n        # same for caiyun translate src issue #279 gemini for #374\n        text_list = text.splitlines()\n        num = None\n        if len(text_list) > 1:\n            if text_list[0].isdigit():\n                num = text_list[0]\n\n        while attempt_count < max_attempts:\n            try:\n                self.convo.send_message(\n                    self.prompt.format(text=text, language=self.language)\n                )\n                t_text = self.convo.last.text.strip()\n                # 检查是否包含特定标签,如果有则只返回标签内的内容\n                tag_pattern = (\n                    r\"<step3_refined_translation>(.*?)</step3_refined_translation>\"\n                )\n                tag_match = re.search(tag_pattern, t_text, re.DOTALL)\n                if tag_match:\n                    print(\n                        \"[bold green]\"\n                        + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text)\n                        + \"[/bold green]\"\n                    )\n                    t_text = tag_match.group(1).strip()\n                    # print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n                break\n            except StopCandidateException as e:\n                print(\n                    f\"Translation failed due to StopCandidateException: {e} Attempting to switch model...\"\n                )\n                self.rotate_model()\n            except BlockedPromptException as e:\n                print(\n                    f\"Translation failed due to BlockedPromptException: {e} Attempting to switch model...\"\n                )\n                self.rotate_model()\n            except Exception as e:\n                print(\n                    f\"Translation failed due to {type(e).__name__}: {e} Will sleep {delay} seconds\"\n                )\n                time.sleep(delay)\n                delay *= exponential_base\n\n                self.rotate_key()\n                if attempt_count >= 1:\n                    self.rotate_model()\n\n            attempt_count += 1\n\n        if attempt_count == max_attempts:\n            print(f\"Translation failed after {max_attempts} attempts.\")\n            return\n\n        if self.context_flag:\n            if len(self.convo.history) > 10:\n                self.convo.history = self.convo.history[2:]\n        else:\n            self.convo.history = []\n\n        print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n        # for rate limit(RPM)\n        time.sleep(self.interval)\n        if num:\n            t_text = str(num) + \"\\n\" + t_text\n        return t_text\n\n    def set_interval(self, interval):\n        self.interval = interval\n\n    def set_geminipro_models(self):\n        self.set_models(GEMINIPRO_MODEL_LIST)\n\n    def set_geminiflash_models(self):\n        self.set_models(GEMINIFLASH_MODEL_LIST)\n\n    def set_models(self, allowed_models):\n        available_models = [\n            re.sub(r\"^models/\", \"\", i.name) for i in genai.list_models()\n        ]\n        model_list = sorted(\n            list(set(available_models) & set(allowed_models)),\n            key=allowed_models.index,\n        )\n        print(f\"Using model list {model_list}\")\n        self.model_list = cycle(model_list)\n        self.rotate_model()\n\n    def set_model_list(self, model_list):\n        # keep the order of input\n        model_list = sorted(list(set(model_list)), key=model_list.index)\n        print(f\"Using model list {model_list}\")\n        self.model_list = cycle(model_list)\n        self.rotate_model()\n"
  },
  {
    "path": "book_maker/translator/google_translator.py",
    "content": "import re\nimport requests\nfrom rich import print\n\nfrom book_maker.utils import TO_LANGUAGE_CODE\nfrom .base_translator import Base\n\n\nclass Google(Base):\n    \"\"\"\n    google translate\n    \"\"\"\n\n    def __init__(self, key, language, **kwargs) -> None:\n        super().__init__(key, language)\n\n        # Convert language name to code if needed, otherwise use as-is\n        if language.lower() in TO_LANGUAGE_CODE:\n            language_code = TO_LANGUAGE_CODE[language.lower()]\n        else:\n            language_code = language\n\n        self.api_url = f\"https://translate.google.com/translate_a/single?client=it&dt=qca&dt=t&dt=rmt&dt=bd&dt=rms&dt=sos&dt=md&dt=gt&dt=ld&dt=ss&dt=ex&otf=2&dj=1&hl=en&ie=UTF-8&oe=UTF-8&sl=auto&tl={language_code}\"\n        self.headers = {\n            \"Content-Type\": \"application/x-www-form-urlencoded\",\n            \"User-Agent\": \"GoogleTranslate/6.29.59279 (iPhone; iOS 15.4; en; iPhone14,2)\",\n        }\n        # TODO support more models here\n        self.session = requests.session()\n        self.language = language\n\n    def rotate_key(self):\n        pass\n\n    def translate(self, text):\n        print(text)\n        \"\"\"r = self.session.post(\n            self.api_url,\n            headers=self.headers,\n            data=f\"q={requests.utils.quote(text)}\",\n        )\n        if not r.ok:\n            return text\n        t_text = \"\".join(\n            [sentence.get(\"trans\", \"\") for sentence in r.json()[\"sentences\"]],\n        )\"\"\"\n        t_text = self._retry_translate(text)\n        print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n        return t_text\n\n    def _retry_translate(self, text, timeout=3):\n        time = 0\n        while time <= timeout:\n            time += 1\n            r = self.session.post(\n                self.api_url,\n                headers=self.headers,\n                data=f\"q={requests.utils.quote(text)}\",\n                timeout=3,\n            )\n            if r.ok:\n                t_text = \"\".join(\n                    [sentence.get(\"trans\", \"\") for sentence in r.json()[\"sentences\"]],\n                )\n                return t_text\n        return text\n"
  },
  {
    "path": "book_maker/translator/groq_translator.py",
    "content": "from groq import Groq\nfrom .chatgptapi_translator import ChatGPTAPI\nfrom os import linesep\nfrom itertools import cycle\n\nGROQ_MODEL_LIST = [\n    \"llama3-8b-8192\",\n    \"llama3-70b-8192\",\n    \"mixtral-8x7b-32768\",\n    \"gemma-7b-it\",\n]\n\n\nclass GroqClient(ChatGPTAPI):\n    def rotate_model(self):\n        if not self.model_list:\n            model_list = list(set(GROQ_MODEL_LIST))\n            print(f\"Using model list {model_list}\")\n            self.model_list = cycle(model_list)\n        self.model = next(self.model_list)\n\n    def create_chat_completion(self, text):\n        self.groq_client = Groq(api_key=next(self.keys))\n\n        content = f\"{self.prompt_template.format(text=text, language=self.language, crlf=linesep)}\"\n        sys_content = self.system_content or self.prompt_sys_msg.format(crlf=\"\\n\")\n\n        messages = [\n            {\"role\": \"system\", \"content\": sys_content},\n            {\"role\": \"user\", \"content\": content},\n        ]\n\n        if self.deployment_id:\n            return self.groq_client.chat.completions.create(\n                engine=self.deployment_id,\n                messages=messages,\n                temperature=self.temperature,\n                azure=True,\n            )\n        return self.groq_client.chat.completions.create(\n            model=self.model,\n            messages=messages,\n            temperature=self.temperature,\n        )\n"
  },
  {
    "path": "book_maker/translator/litellm_translator.py",
    "content": "from os import linesep\n\nfrom litellm import completion\n\nfrom book_maker.translator.chatgptapi_translator import ChatGPTAPI\n\nPROMPT_ENV_MAP = {\n    \"user\": \"BBM_CHATGPTAPI_USER_MSG_TEMPLATE\",\n    \"system\": \"BBM_CHATGPTAPI_SYS_MSG\",\n}\n\n\nclass liteLLM(ChatGPTAPI):\n    def create_chat_completion(self, text):\n        # content = self.prompt_template.format(\n        #     text=text, language=self.language, crlf=\"\\n\"\n        # )\n\n        content = f\"{self.context if self.context_flag else ''} {self.prompt_template.format(text=text, language=self.language, crlf=linesep)}\"\n\n        sys_content = self.system_content or self.prompt_sys_msg.format(crlf=\"\\n\")\n\n        context_sys_str = \"For each passage given, you may be provided a summary of the story up until this point (wrapped in tags '<summary>' and '</summary>') for context within the query, to provide background context of the story up until this point. If it's provided, use the context summary to aid you in translation with deeper comprehension, and write a new summary above the returned translation, wrapped in '<summary>' HTML-like tags, including important details (if relevant) from the new passage, retaining the most important key details from the existing summary, and dropping out less important details. If the summary is blank, assume it is the start of the story and write a summary from scratch. Do not make the summary longer than a paragraph, and smaller details can be replaced based on the relative importance of new details. The summary should be formatted in straightforward, inornate text, briefly summarising the entire story (from the start, including information before the given passage, leading up to the given passage) to act as an instructional payload for a Large-Language AI Model to fully understand the context of the passage.\"\n\n        sys_content = f\"{self.system_content or self.prompt_sys_msg.format(crlf=linesep)} {context_sys_str if self.context_flag else ''} \"\n\n        messages = [\n            {\"role\": \"system\", \"content\": sys_content},\n            {\"role\": \"user\", \"content\": content},\n        ]\n\n        if self.deployment_id:\n            return completion(\n                engine=self.deployment_id,\n                messages=messages,\n                temperature=self.temperature,\n                azure=True,\n            )\n\n        return completion(\n            model=\"gpt-3.5-turbo\",\n            messages=messages,\n            temperature=self.temperature,\n        )\n"
  },
  {
    "path": "book_maker/translator/qwen_translator.py",
    "content": "import re\nimport time\nfrom rich import print\nfrom openai import OpenAI\n\nfrom .base_translator import Base\n\n\nclass QwenTranslator(Base):\n    \"\"\"\n    Qwen-MT translator using Alibaba Cloud's DashScope API\n    Specialized translation model supporting 92 languages with advanced features\n    Official documentation: https://help.aliyun.com/document_detail/2860790.html\n\n    Todo: support more languages, terminology, and domain hints\n    \"\"\"\n\n    # Language mapping from bilingual_book_maker format to Qwen language codes\n    LANGUAGE_MAP = {\n        # Common languages\n        \"english\": \"English\",\n        \"chinese\": \"Chinese\",\n        \"simplified chinese\": \"Chinese\",\n        \"traditional chinese\": \"Traditional Chinese\",\n        \"japanese\": \"Japanese\",\n        \"korean\": \"Korean\",\n        \"spanish\": \"Spanish\",\n        \"french\": \"French\",\n        \"german\": \"German\",\n        \"portuguese\": \"Portuguese\",\n        \"italian\": \"Italian\",\n        \"russian\": \"Russian\",\n        \"arabic\": \"Arabic\",\n        \"hindi\": \"Hindi\",\n        \"thai\": \"Thai\",\n        \"vietnamese\": \"Vietnamese\",\n        \"indonesian\": \"Indonesian\",\n        \"malay\": \"Malay\",\n        \"dutch\": \"Dutch\",\n        \"turkish\": \"Turkish\",\n        \"polish\": \"Polish\",\n        \"czech\": \"Czech\",\n        \"hungarian\": \"Hungarian\",\n        \"romanian\": \"Romanian\",\n        \"greek\": \"Greek\",\n        \"hebrew\": \"Hebrew\",\n        \"finnish\": \"Finnish\",\n        \"danish\": \"Danish\",\n        \"swedish\": \"Swedish\",\n        \"norwegian\": \"Norwegian Bokmål\",\n        \"ukrainian\": \"Ukrainian\",\n        \"bulgarian\": \"Bulgarian\",\n        \"serbian\": \"Serbian\",\n        \"croatian\": \"Croatian\",\n        \"slovenian\": \"Slovenian\",\n        \"slovak\": \"Slovak\",\n        \"lithuanian\": \"Lithuanian\",\n        \"latvian\": \"Latvian\",\n        \"estonian\": \"Estonian\",\n        # Add more mappings as needed\n    }\n\n    def __init__(\n        self,\n        key,\n        language,\n        model=\"qwen-mt-turbo\",\n        source_lang=\"auto\",\n        api_base=None,\n        prompt_template=None,  # Not used for translation models\n        prompt_sys_msg=None,  # Not used for translation models\n        temperature=None,  # Not used for translation models\n        context_flag=False,\n        context_paragraph_limit=5,\n        terminology=None,\n        domain_hint=None,\n        **kwargs,\n    ) -> None:\n        super().__init__(key, language)\n\n        # API configuration\n        self.api_base = api_base or \"https://dashscope.aliyuncs.com/compatible-mode/v1\"\n        self.client = OpenAI(\n            api_key=next(self.keys), base_url=self.api_base, timeout=60\n        )\n\n        # Model configuration\n        self.model = self.set_qwen_model(model)\n        self.source_lang = source_lang\n        self.target_lang = self._map_language(language)\n\n        # Advanced features\n        self.terminology = self.set_terminology(terminology)\n        self.domain_hint = self.set_domain_hint(domain_hint)\n\n        # Context/Translation memory support\n        self.context_flag = context_flag\n        self.context_list = []\n        self.context_translated_list = []\n        self.context_paragraph_limit = context_paragraph_limit\n\n        print(\"[bold blue]Qwen Translator initialized:[/bold blue]\")\n        print(f\"  Model: {self.model}\")\n        print(f\"  Source Language: {self.source_lang}\")\n        print(f\"  Target Language: {self.target_lang}\")\n        if self.domain_hint:\n            print(f\"  Domain Hint: {self.domain_hint}\")\n\n    def rotate_key(self):\n        \"\"\"Rotate API key for load balancing\"\"\"\n        try:\n            self.client.api_key = next(self.keys)\n        except StopIteration:\n            pass\n\n    def _map_language(self, language):\n        \"\"\"Map language name to Qwen language format\"\"\"\n        language_lower = language.lower().strip()\n\n        # Direct mapping\n        if language_lower in self.LANGUAGE_MAP:\n            return self.LANGUAGE_MAP[language_lower]\n\n        # Try partial matching for common variations\n        for key, value in self.LANGUAGE_MAP.items():\n            if language_lower in key or key in language_lower:\n                return value\n\n        # Fallback to original language name with proper capitalization\n        return language.title()\n\n    def _create_translation_options(self):\n        \"\"\"Create translation options for the API request\"\"\"\n        options = {\"source_lang\": self.source_lang, \"target_lang\": self.target_lang}\n\n        # Add terminology if provided\n        if self.terminology and len(self.terminology) > 0:\n            options[\"terms\"] = self.terminology\n\n        # Add domain hint if provided (must be in English)\n        if self.domain_hint and len(self.domain_hint) > 0:\n            options[\"domains\"] = self.domain_hint\n\n        # Add translation memory if context is enabled\n        if self.context_flag and self.context_list:\n            tm_list = []\n            for src, tgt in zip(self.context_list, self.context_translated_list):\n                tm_list.append({\"source\": src, \"target\": tgt})\n            if tm_list:\n                options[\"tm_list\"] = tm_list\n\n        return options\n\n    def save_context(self, text, t_text):\n        \"\"\"Save the current translation pair to context for translation memory\"\"\"\n        if not self.context_flag:\n            return\n\n        self.context_list.append(text)\n        self.context_translated_list.append(t_text)\n\n        # Keep only the most recent paragraphs within the limit\n        if len(self.context_list) > self.context_paragraph_limit:\n            self.context_list.pop(0)\n            self.context_translated_list.pop(0)\n\n    def translate(self, text, needprint=True):\n        \"\"\"Main translation method\"\"\"\n        start_time = time.time()\n\n        if needprint:\n            print(re.sub(r\"\\n{3,}\", \"\\n\\n\", text))\n\n        attempt_count = 0\n        max_attempts = 3\n        t_text = \"\"\n\n        while attempt_count < max_attempts:\n            try:\n                self.rotate_key()\n\n                # Prepare messages\n                messages = [{\"role\": \"user\", \"content\": text}]\n\n                # Create translation options\n                translation_options = self._create_translation_options()\n\n                # Make API request\n                completion = self.client.chat.completions.create(\n                    model=self.model,\n                    messages=messages,\n                    extra_body={\"translation_options\": translation_options},\n                )\n\n                # Extract translated text\n                if completion.choices[0].message.content:\n                    t_text = completion.choices[0].message.content.strip()\n                else:\n                    t_text = \"\"\n\n                # Save to context for translation memory\n                if self.context_flag and t_text:\n                    self.save_context(text, t_text)\n\n                break\n\n            except Exception as e:\n                attempt_count += 1\n                print(\n                    f\"[red]Translation attempt {attempt_count} failed: {str(e)}[/red]\"\n                )\n\n                if attempt_count >= max_attempts:\n                    print(\n                        f\"[red]Translation failed after {max_attempts} attempts[/red]\"\n                    )\n                    t_text = text  # Fallback to original text\n                else:\n                    time.sleep(1)  # Wait before retry\n\n        if needprint:\n            print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n\n        end_time = time.time()\n        print(f\"[dim]Translation time: {end_time - start_time:.2f}s[/dim]\")\n\n        return t_text\n\n    def set_terminology(self, terminology):\n        \"\"\"Set custom terminology for translation\n\n        Args:\n            terminology: List of dict with 'source' and 'target' keys\n                        e.g., [{\"source\": \"API\", \"target\": \"应用程序接口\"}]\n        \"\"\"\n        self.terminology = terminology or []\n        print(f\"[blue]Terminology updated: {len(self.terminology)} terms[/blue]\")\n\n    def set_domain_hint(self, domain_hint):\n        \"\"\"Set domain hint for specialized translation\n\n        Args:\n            domain_hint: String describing the domain in English\n                        e.g., \"Technical documentation for software development\"\n        \"\"\"\n        self.domain_hint = domain_hint or \"\"\n        print(f\"[blue]Domain hint set: {self.domain_hint}[/blue]\")\n\n    def set_qwen_model(self, model_name):\n        \"\"\"Set Qwen model type\n\n        Args:\n            model_name: Either \"qwen-mt-turbo\" or \"qwen-mt-plus\"\n        \"\"\"\n        if model_name in [\"qwen-mt-turbo\", \"qwen-mt-plus\"]:\n            self.model = model_name\n            print(f\"[blue]Qwen model set to: {self.model}[/blue]\")\n        else:\n            self.model = \"qwen-mt-turbo\"\n            print(\n                f\"[red]Invalid Qwen model: {model_name}. Using default: {self.model}[/red]\"\n            )\n"
  },
  {
    "path": "book_maker/translator/tencent_transmart_translator.py",
    "content": "import re\nimport time\nimport uuid\nimport requests\n\nfrom rich import print\nfrom .base_translator import Base\n\n\nclass TencentTranSmart(Base):\n    \"\"\"\n    Tencent TranSmart translator\n    \"\"\"\n\n    def __init__(self, key, language, **kwargs) -> None:\n        super().__init__(key, language)\n        self.api_url = \"https://transmart.qq.com/api/imt\"\n        self.header = {\n            \"authority\": \"transmart.qq.com\",\n            \"content-type\": \"application/json\",\n            \"origin\": \"https://transmart.qq.com\",\n            \"referer\": \"https://transmart.qq.com/zh-CN/index\",\n            \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36\",\n        }\n        self.uuid = str(uuid.uuid4())\n        self.session = requests.Session()\n        self.translate_type = \"zh\"\n        if self.language == \"english\":\n            self.translate_type = \"en\"\n\n    def rotate_key(self):\n        pass\n\n    def translate(self, text):\n        print(text)\n        source_language, text_list = self.text_analysis(text)\n        client_key = self.get_client_key()\n        api_form_data = {\n            \"header\": {\n                \"fn\": \"auto_translation\",\n                \"client_key\": client_key,\n            },\n            \"type\": \"plain\",\n            \"model_category\": \"normal\",\n            \"source\": {\n                \"lang\": source_language,\n                \"text_list\": [\"\"] + text_list + [\"\"],\n            },\n            \"target\": {\"lang\": self.translate_type},\n        }\n\n        response = self.session.post(\n            self.api_url, json=api_form_data, headers=self.header, timeout=3\n        )\n        t_text = \"\".join(response.json()[\"auto_translation\"])\n        print(\"[bold green]\" + re.sub(\"\\n{3,}\", \"\\n\\n\", t_text) + \"[/bold green]\")\n        return t_text\n\n    def text_analysis(self, text):\n        client_key = self.get_client_key()\n        self.header.update({\"Cookie\": \"TSMT_CLIENT_KEY={}\".format(client_key)})\n        analysis_request_data = {\n            \"header\": {\n                \"fn\": \"text_analysis\",\n                \"session\": \"\",\n                \"client_key\": client_key,\n                \"user\": \"\",\n            },\n            \"text\": text,\n            \"type\": \"plain\",\n            \"normalize\": {\"merge_broken_line\": \"false\"},\n        }\n        r = self.session.post(\n            self.api_url, json=analysis_request_data, headers=self.header\n        )\n        if not r.ok:\n            return text\n        response_json_data = r.json()\n        text_list = [item[\"tgt_str\"] for item in response_json_data[\"sentence_list\"]]\n        language = response_json_data[\"language\"]\n        return language, text_list\n\n    def get_client_key(self):\n        return \"browser-chrome-121.0.0-Windows_10-{}-{}\".format(\n            self.uuid, int(time.time() * 1e3)\n        )\n"
  },
  {
    "path": "book_maker/translator/xai_translator.py",
    "content": "from openai import OpenAI\nfrom .chatgptapi_translator import ChatGPTAPI\n\nXAI_MODEL_LIST = [\n    \"grok-beta\",\n]\n\n\nclass XAIClient(ChatGPTAPI):\n    def __init__(self, key, language, api_base=None, **kwargs) -> None:\n        super().__init__(key, language)\n        self.model_list = XAI_MODEL_LIST\n        self.api_url = str(api_base) if api_base else \"https://api.x.ai/v1\"\n        self.openai_client = OpenAI(api_key=key, base_url=self.api_url)\n\n    def rotate_model(self):\n        self.model = self.model_list[0]\n"
  },
  {
    "path": "book_maker/utils.py",
    "content": "import tiktoken\n\n# Borrowed from : https://github.com/openai/whisper\nLANGUAGES = {\n    \"en\": \"english\",\n    \"zh-hans\": \"simplified chinese\",\n    \"zh\": \"simplified chinese\",\n    \"zh-hant\": \"traditional chinese\",\n    \"zh-yue\": \"cantonese\",\n    \"de\": \"german\",\n    \"es\": \"spanish\",\n    \"ru\": \"russian\",\n    \"ko\": \"korean\",\n    \"fr\": \"french\",\n    \"ja\": \"japanese\",\n    \"pt\": \"portuguese\",\n    \"tr\": \"turkish\",\n    \"pl\": \"polish\",\n    \"ca\": \"catalan\",\n    \"nl\": \"dutch\",\n    \"ar\": \"arabic\",\n    \"sv\": \"swedish\",\n    \"it\": \"italian\",\n    \"id\": \"indonesian\",\n    \"hi\": \"hindi\",\n    \"fi\": \"finnish\",\n    \"vi\": \"vietnamese\",\n    \"he\": \"hebrew\",\n    \"uk\": \"ukrainian\",\n    \"el\": \"greek\",\n    \"ms\": \"malay\",\n    \"cs\": \"czech\",\n    \"ro\": \"romanian\",\n    \"da\": \"danish\",\n    \"hu\": \"hungarian\",\n    \"ta\": \"tamil\",\n    \"no\": \"norwegian\",\n    \"th\": \"thai\",\n    \"ur\": \"urdu\",\n    \"hr\": \"croatian\",\n    \"bg\": \"bulgarian\",\n    \"lt\": \"lithuanian\",\n    \"la\": \"latin\",\n    \"mi\": \"maori\",\n    \"ml\": \"malayalam\",\n    \"cy\": \"welsh\",\n    \"sk\": \"slovak\",\n    \"te\": \"telugu\",\n    \"fa\": \"persian\",\n    \"lv\": \"latvian\",\n    \"bn\": \"bengali\",\n    \"sr\": \"serbian\",\n    \"az\": \"azerbaijani\",\n    \"sl\": \"slovenian\",\n    \"kn\": \"kannada\",\n    \"et\": \"estonian\",\n    \"mk\": \"macedonian\",\n    \"br\": \"breton\",\n    \"eu\": \"basque\",\n    \"is\": \"icelandic\",\n    \"hy\": \"armenian\",\n    \"ne\": \"nepali\",\n    \"mn\": \"mongolian\",\n    \"bs\": \"bosnian\",\n    \"kk\": \"kazakh\",\n    \"sq\": \"albanian\",\n    \"sw\": \"swahili\",\n    \"gl\": \"galician\",\n    \"mr\": \"marathi\",\n    \"pa\": \"punjabi\",\n    \"si\": \"sinhala\",\n    \"km\": \"khmer\",\n    \"sn\": \"shona\",\n    \"yo\": \"yoruba\",\n    \"so\": \"somali\",\n    \"af\": \"afrikaans\",\n    \"oc\": \"occitan\",\n    \"ka\": \"georgian\",\n    \"be\": \"belarusian\",\n    \"tg\": \"tajik\",\n    \"sd\": \"sindhi\",\n    \"gu\": \"gujarati\",\n    \"am\": \"amharic\",\n    \"yi\": \"yiddish\",\n    \"lo\": \"lao\",\n    \"uz\": \"uzbek\",\n    \"fo\": \"faroese\",\n    \"ht\": \"haitian creole\",\n    \"ps\": \"pashto\",\n    \"tk\": \"turkmen\",\n    \"nn\": \"nynorsk\",\n    \"mt\": \"maltese\",\n    \"sa\": \"sanskrit\",\n    \"lb\": \"luxembourgish\",\n    \"my\": \"myanmar\",\n    \"bo\": \"tibetan\",\n    \"tl\": \"tagalog\",\n    \"mg\": \"malagasy\",\n    \"as\": \"assamese\",\n    \"tt\": \"tatar\",\n    \"haw\": \"hawaiian\",\n    \"ln\": \"lingala\",\n    \"ha\": \"hausa\",\n    \"ba\": \"bashkir\",\n    \"jw\": \"javanese\",\n    \"su\": \"sundanese\",\n}\n\n# language code lookup by name, with a few language aliases\nTO_LANGUAGE_CODE = {\n    **{language: code for code, language in LANGUAGES.items()},\n    \"burmese\": \"my\",\n    \"valencian\": \"ca\",\n    \"flemish\": \"nl\",\n    \"haitian\": \"ht\",\n    \"letzeburgesch\": \"lb\",\n    \"pushto\": \"ps\",\n    \"panjabi\": \"pa\",\n    \"moldavian\": \"ro\",\n    \"moldovan\": \"ro\",\n    \"sinhalese\": \"si\",\n    \"castilian\": \"es\",\n}\n\n\ndef prompt_config_to_kwargs(prompt_config):\n    prompt_config = prompt_config or {}\n    return dict(\n        prompt_template=prompt_config.get(\"user\", None),\n        prompt_sys_msg=prompt_config.get(\"system\", None),\n    )\n\n\n# ref: https://platform.openai.com/docs/guides/chat/introduction\ndef num_tokens_from_text(text, model=\"gpt-3.5-turbo-0301\"):\n    messages = (\n        {\n            \"role\": \"user\",\n            \"content\": text,\n        },\n    )\n\n    \"\"\"Returns the number of tokens used by a list of messages.\"\"\"\n    try:\n        encoding = tiktoken.encoding_for_model(model)\n    except KeyError:\n        encoding = tiktoken.get_encoding(\"cl100k_base\")\n    if model == \"gpt-3.5-turbo-0301\":  # note: future models may deviate from this\n        num_tokens = 0\n        for message in messages:\n            num_tokens += (\n                4  # every message follows <im_start>{role/name}\\n{content}<im_end>\\n\n            )\n            for key, value in message.items():\n                num_tokens += len(encoding.encode(value))\n                if key == \"name\":  # if there's a name, the role is omitted\n                    num_tokens += -1  # role is always required and always 1 token\n        num_tokens += 2  # every reply is primed with <im_start>assistant\n        return num_tokens\n    else:\n        raise NotImplementedError(\n            f\"\"\"num_tokens_from_messages() is not presently implemented for model {model}.\n  See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\"\n        )\n"
  },
  {
    "path": "disclaimer.md",
    "content": "Disclaimer:\n\n1. The purpose of this project, bilingual_book_maker, is to assist users in creating multilingual versions of epub files and books. It is only applicable to books that have entered the public domain and is not intended for use with copyrighted material. We strongly advise users to read the copyright information carefully before using this project and to comply with relevant laws and regulations in order to protect their own and others' rights.\n2. In no event shall the authors or developers be liable for any loss or damage caused by the use of this project. Users assume all risks associated with the use of this project. Users must confirm that they have obtained permission from the original copyright holder or used open source EPUB files before using this project to avoid potential copyright risks.\n\nIf you have any concerns or suggestions about the use of this project, please contact us through the issues section.\n\n\n免责声明：\n\n1. 该项目设计目的是为了帮助用户制作多语言版本的epub文件和图书，仅适用于进入公共版权领域书籍，不适用于有版权的书籍。我们强烈建议用户在使用该项目时仔细阅读其版权信息并遵守相关法律和规定，以保护自己和他人的权益。\n2. 在任何情况下，作者和开发者不对因使用该项目而导致的任何损失或损害承担任何责任。使用该项目的风险由用户自行承担。用户必须在使用该项目之前，确认其已获得了原著作权人的许可或使用了公开可用的开源EPUB文件，以避免可能存在的版权风险。\n\n如果您对该项目的使用有任何疑虑或建议，请通过 issues 与我们联系。\n"
  },
  {
    "path": "docs/book_source.md",
    "content": "# Translate from Different Sources\n\n## txt/srt\nTxt files and srt files are plain text files. This program can translate plain text.\n\n    python3 make_book.py --book_name test_books/the_little_prince.txt --test --language zh-hans\n\n## epub\nepub is made of html files. By default, we only translate contents in `<p>`. Use `--translate-tags` to specify tags need for translation. Use comma to separate multiple tags. For example: `--translate-tags h1,h2,h3,p,div`\n\n    bbook_maker --book_name test_books/animal_farm.epub --openai_key ${openai_key} --translate-tags div,p\n\nIf you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. <br>\n**Note that it's best to look for e-books that are more standardized if possible.**\n\n## e-reader\nUse `--book_from` option to specify e-reader type (Now only `kobo` is available), and use `--device_path` to specify the mounting point.\n\n    # Translate books download from Rakuten Kobo on kobo e-reader\n    bbook_maker --book_from kobo --device_path /tmp/kobo\n"
  },
  {
    "path": "docs/cmd.md",
    "content": "# Command Line Options\n\n## Test translate\n`--test` <br>\n\nUse this option to preview the result if you haven't paid for the service or just want to test. Note that there is a limit and it may take some time.\n\n```sh\nbbook_maker --book_name test_books/Lex_Fridman_episode_322.srt --openai_key ${openai_key}  --test\n```\n\n```sh\nbbook_maker --book_name test_books/animal_farm.epub --openai_key ${openai_key}  --test --language zh-hans\n```\n\n`--test_num <TEST_NUM>`<br>\n\nUse this option to set how many paragraph you want to translate for testing. Default is 10.\n\n## Resume\n`--resume` <br>\n\nUse this option to manually resume the process after an interruption.\n\n## Retranslate (epub only)\n`--retranslate <translated_filepath, file_name_in_epub, start_str [, end_str]>`<br>\n\nIf a file in epub is not translated well, it supports to re-translate part of epub separately.\n\nThis option take 4 arguments: `translated_filepath`, `file_name_in_epub`, `start_str`, `end_str`. `end_str` is optional.\n\n- Retranslate from start_str to end_str's tag:\n\n        bbook_maker --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which' 'This kind of thing is not a good symptom. Obviously'\n\n- Retranslate start_str's tag:\n        \n        bbook_maker --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which'\n\n- Retranslate start_str's tag, auto find filename:\n        \n        bbook_maker --book_name \"test_books/animal_farm.epub\" --retranslate 'test_books/animal_farm_bilingual.epub' '' 'in spite of the present book shortage which'\n\n**Warning:**\n\n**It deletes from the tag at start_str of the finished book to the next tag at end_str, and then re-translates.**\n\n**Therefore, please make sure that the next tag of end_str is the translated content. (If end_str is not provided, the next label of start_str is guaranteed to be the translated content.) There can be missing translations between the two strings, but if end_str is not translated, there will be problems.**\n\n\n\n\n## Customize output style (epub only)\n`--translation_style <TRANSLATION_STYLE>`<br>\n\nSupport changing the output style of epub files.\n\n    bbook_maker --book_name test_books/animal_farm.epub --translation_style \"color: #4a4a4a; font-style: normal; background-color: #f7f7f7; padding: 5px; margin: 10px 0; border-radius: 5px;\"\n\n![output_style](https://user-images.githubusercontent.com/89069008/226104545-7c029bb1-5325-46d4-a1eb-ec4e7bbaee97.png)\n## Proxy\n`--proxy <PROXY>` <br>\n\nUse this option to specify proxy server for internet access. Enter a string such as `http://127.0.0.1:7890` .\n\n## API base\n`--api_base <API_BASE_URL>`<br>\n\nIf you want to change api_base like using Cloudflare Workers, use this option to support it.<br>\n\n    bbook_maker --book_name 'animal_farm.epub' --openai_key sk-XXXXX --api_base 'https://xxxxx/v1'\n**Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**\n\n## Microsoft Azure Endpoints\n`--api_base <API_BASE_URL>` `--deployment_id <DEPLOYMENT_ID>`<br>\n\nYou can use the api endpoint provided from Microsoft.\n\n\n    bbook_maker --book_name 'animal_farm.epub' --openai_key XXXXX --api_base 'https://example-endpoint.openai.azure.com' --deployment_id 'deployment-name'\n\n**Note : Current only support chatgptapi model for deployment_id. And `api_base` must be provided when using `deployment_id`. You can check [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal) for more information about `deployment_id`.**\n\n## Batch size (txt only)\n`--batch_size`<br>\n\nUse this parameter to specify the number of lines for batch translation. Default is 10. (Currently only effective for txt files).\n```sh\npython3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20\n```\n\n## Accumulated Num\n`--accumulated_num <ACCUMULATED_NUM>`<br>\n\nWait for how many tokens have been accumulated before starting the translation. gpt3.5 limits the total_token to 4090. \n\nFor example, if you use --accumulated_num 1600, maybe openai will\noutput 2200 tokens and maybe 200 tokens for other messages in the system messages user messages. 1600+2200+200=4000, so you are close to the limit. \n\nYou have to choose your own\nvalue, there is no way to tell if the limit is reached before sending request.\n"
  },
  {
    "path": "docs/disclaimer.md",
    "content": "Disclaimer:\n\n1. The purpose of this project, bilingual_book_maker, is to assist users in creating multilingual versions of epub files and books. It is only applicable to books that have entered the public domain and is not intended for use with copyrighted material. We strongly advise users to read the copyright information carefully before using this project and to comply with relevant laws and regulations in order to protect their own and others' rights.\n2. In no event shall the authors or developers be liable for any loss or damage caused by the use of this project. Users assume all risks associated with the use of this project. Users must confirm that they have obtained permission from the original copyright holder or used open source EPUB files before using this project to avoid potential copyright risks.\n\nIf you have any concerns or suggestions about the use of this project, please contact us through the issues section.\n\n\n免责声明：\n\n1. 该项目设计目的是为了帮助用户制作多语言版本的epub文件和图书，仅适用于进入公共版权领域书籍，不适用于有版权的书籍。我们强烈建议用户在使用该项目时仔细阅读其版权信息并遵守相关法律和规定，以保护自己和他人的权益。\n2. 在任何情况下，作者和开发者不对因使用该项目而导致的任何损失或损害承担任何责任。使用该项目的风险由用户自行承担。用户必须在使用该项目之前，确认其已获得了原著作权人的许可或使用了公开可用的开源EPUB文件，以避免可能存在的版权风险。\n\n如果您对该项目的使用有任何疑虑或建议，请通过 issues 与我们联系。\n"
  },
  {
    "path": "docs/env_settings.md",
    "content": "# Environment Settings\nYou can also write information into env to skip some options.\n\n## Model keys\n```\n# Set env BBM_OPENAI_API_KEY to ignore option --openai_key\nexport BBM_OPENAI_API_KEY=${your_api_key}\n\n# Set env BBM_CAIYUN_API_KEY to ignore option --caiyun_key\nexport BBM_CAIYUN_API_KEY=${your_api_key}\n```"
  },
  {
    "path": "docs/index.md",
    "content": "# bilingual book maker\n\nThe `bilingual_book_maker` is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books.\n\nThis tool is exclusively designed for translating epub books that have entered the public domain and is not intended for copyrighted works. Before using this tool, please review the project's **[disclaimer](disclaimer.md)**."
  },
  {
    "path": "docs/installation.md",
    "content": "# Installation\n## pip\nbilingual_book_maker has been published as a [Python package](https://pypi.org/project/bbook-maker/) and can be install by `pip`. (Recommend in a virtual environment.)\n```sh\npip install -U bbook_maker\n```\n\n## git\nYou can also install from github if you want to use the latest version.\n```sh\ngit clone git@github.com:yihong0618/bilingual_book_maker.git\npip install .\n```"
  },
  {
    "path": "docs/model_lang.md",
    "content": "# Model and Languages\n## Models\n`-m, --model <Model>` <br>\n\nCurrently `bbook_maker` supports these models: `chatgptapi` , `gpt3` , `google` , `caiyun` , `deepl` , `deeplfree` , `gpt4` , `gpt4omini` , `gpt5mini` , `o1-preview` , `o1` , `o1-mini` , `o3-mini` , `claude` , `customapi`.\nDefault model is `chatgptapi` . \n\n### OPENAI models\n\nThere are several models you can choose from.\n\n* gpt3\n\n    \n\n        bbook_maker --book_name test_books/animal_farm.epub --model gpt3 --openai_key ${openai_key}\n\n    \n\n* chatgpiapi\n\n\n    `chatgptapi` is [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis), which is used by ChatGPT currently.\n\n        bbook_maker --book_name test_books/animal_farm.epub --model chatgptapi --openai_key ${openai_key}\n\n* gpt4\n\n    \n\n        bbook_maker --book_name test_books/animal_farm.epub --model gpt4 --openai_key ${openai_key}\n\n    If using `gpt4` , you can add `--use_context` to add a context paragraph to each passage sent to the model for translation.\n\n  \n\n            \n        bbook_maker --book_name test_books/animal_farm.epub --model gpt4 --openai_key ${openai_key} --use_context\n\n    The option `--use_context` prompts the GPT4 model to create a one-paragraph summary. \n\n    \n\n    If it is the beginning of the translation, it will summarize the entire passage sent (the size depending on `--accumulated_num` ).\n\n    \n\n    If it has any proceeding passage, it will amend the summary to include details from the most recent passage, creating a running one-paragraph context payload of the important details of the entire translated work, which improves consistency of flow and tone of each translation.\n\n* gpt5mini\n\n    `gpt5mini` uses the `gpt-5-mini` model.\n\n        bbook_maker --book_name test_books/animal_farm.epub --model gpt5mini --openai_key ${openai_key}\n\n**Note 1: Use `--openai_key` option to specify OpenAI API key. If you have multiple keys, separate them by commas (xxx, xxx, xxx) to reduce errors caused by API call limits.**\n\n**Note 2: You can just set the environment variable `BBM_OPENAI_API_KEY` instead the openai_key. See [Environment setting](settings.md).**\n\n### CAIYUN \n\nUsing Caiyun model to translate. The api currently only support: \n\n        \n\n1. Simplified Chinese <-> English\n2. Simplified Chinese <-> Japanese\n\nThe official Caiyun has provided a test token (3975l6lr5pcbvidl6jl2). You can apply your own token by following this [tutorial].(https://bobtranslate.com/service/translate/caiyun.html)\n\n            \n    bbook_maker --model caiyun --caiyun_key 3975l6lr5pcbvidl6jl2 --book_name test_books/animal_farm.epub\n\n### DEEPL\n\nThere are two models you can choose from.\n\n    \n\n* deepl: [DeepL Translator](https://rapidapi.com/splintPRO/api/dpl-translator). <br>\n\n    \n\n    Need to pay to get the token. Use `--model deepl --deepl_key ${deepl_key}`\n\n        \n\n        bbook_maker --book_name test_books/animal_farm.epub --model deepl --deepl_key ${deepl_key}\n\n        \n\n* deeplfree: DeepL free model\n\n        \n\n        bbook_maker --book_name test_books/animal_farm.epub --model deeplfree\n\n### Claude\n\nSupport [Claude](https://console.anthropic.com/docs) model. Use `--model claude --claude_key ${claude_key}` .\n\n    bbook_maker --book_name test_books/animal_farm.epub --model claude --claude_key ${claude_key}\n            \n\n### Custom API\nSupport CustomAPI model. Use `--model customapi --custom_api ${custom_api}` .\n\n    bbook_maker --book_name test_books/animal_farm.epub --model customapi --custom_api ${custom_api}  \n\n### Google\n\nSupport google model. Use `--model google`\n\n## Languages\n`--language <LANGUAGE>` <br>\n\nSet target languages. All models except for `caiyun` supports lots of languages. You can use `bbook_maker --help` to check available languages. Default target language is `\"Simplified Chinese\"` .\n\n```sh\nbbook_maker --book_name test_books/animal_farm.epub --model chatgptapi --openai_key ${openai_key} --language ja\n```\n\n```sh\nbbook_maker --book_name test_books/animal_farm.epub --model chatgptapi --openai_key ${openai_key} --language \"Simplified Chinese\"\n```"
  },
  {
    "path": "docs/prompt.md",
    "content": "# Tweak the prompt\n\nTo tweak the prompt, use the `--prompt` parameter. Valid placeholders for the `user` role template include `{text}` and `{language}`. It supports a few ways to configure the prompt:\n\n- If you don't need to set the `system` role content, you can simply set it up like this: `--prompt \"Translate {text} to {language}.\"` or `--prompt prompt_template_sample.txt`\n\n        # prompt_template_sample.txt\n        Translate the given text to {language}. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. If the text cannot be translated, return the original text as is. Do not translate person's name. Do not add any additional text in the translation. The text to be translated is: \n        {text}\n        \n\n- If you need to set the `system` role content, you can use the following format: `--prompt '{\"user\":\"Translate {text} to {language}\", \"system\": \"You are a professional translator.\"}'` or `--prompt prompt_template_sample.json`\n\n        # prompt_template_sample.json\n        {\n            \"system\": \"You are a professional translator.\", \n            \"user\": \"Translate the given text to {language}. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. If the text cannot be translated, return the original text as is. Do not translate person's name. Do not add any additional text in the translation. The text to be translated is:\\n{text}\"\n        }\n\nYou can also set the `user` and `system` role prompt by setting environment variables: `BBM_CHATGPTAPI_USER_MSG_TEMPLATE` and `BBM_CHATGPTAPI_SYS_MSG`.\n\n- You can now use PromptDown format (`.md` files) for more structured prompts: `--prompt prompt_md.prompt.md`\n\n        # Translation Prompt\n        \n        ## System Message\n        You are a professional translator who specializes in accurate translations.\n        \n        ## Conversation\n        \n        | Role  | Content                                  |\n        |-------|------------------------------------------|\n        | User  | Please translate the following text into {language}:\\n\\n{text} |\n        \n        # OR using Developer Message (for newer AI models)\n        \n        # Translation Prompt\n        \n        ## Developer Message\n        You are a professional translator who specializes in accurate translations.\n        \n        ## Conversation\n        \n        | Role  | Content                                  |\n        |-------|------------------------------------------|\n        | User  | Please translate the following text into {language}:\\n\\n{text} |\n\n## Examples\n```sh\npython3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.txt\n# or\npython3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.json\n# or\npython3 make_book.py --book_name test_books/animal_farm.epub --prompt \"Please translate \\`{text}\\` to {language}\"\n```\n"
  },
  {
    "path": "docs/quickstart.md",
    "content": "# QuickStart\nAfter successfully install the package, you can see `bbook-maker` is in the output of `pip list`.\n\n## Preparation\n1. ChatGPT or OpenAI [token](https://platform.openai.com/account/api-keys)\n2. epub/txt books\n3. Environment with internet access or proxy\n4. Python 3.8+\n\n## Use\nYou can use by command `bbook_maker`. A sample book, `test_books/animal_farm.epub`, is provided for testing purposes.\n```sh\nbbook_maker --book_name ${path of a book} --openai_key ${openai_key}\n\n# Example\nbbook_maker --book_name test_books/animal_farm.epub --openai_key ${openai_key}\n```\nOr, you can use the [script](https://github.com/yihong0618/bilingual_book_maker/blob/main/make_book.py) provided by repository.\n```sh\npython3 make_book.py --book_name ${path of a book} --openai_key ${openai_key}\n\n# Example\npython3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key}\n```\n\nOnce the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.\n\n\n**Note: If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.**\n"
  },
  {
    "path": "make_book.py",
    "content": "from book_maker.cli import main\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "mkdocs.yml",
    "content": "site_name: bilingual book maker\ntheme:\n  name: material\n  features:\n    - navigation.tabs\n    - navigation.tabs.sticky\n    - content.code.copy\n\nnav: \n  - Home : index.md\n  - Getting started: \n    - Installation: installation.md\n    - QuickStart: quickstart.md\n  - Usage:\n    - Model and languages: model_lang.md\n    - Command line options: cmd.md\n    - Translate from different source: book_source.md\n    - Environment setting: env_settings.md\n    - Tweak the prompt: prompt.md\n  - Disclaimer: disclaimer.md\n    "
  },
  {
    "path": "prompt_md.json",
    "content": "{\n  \"system\": \"You are a highly skilled translator responsible for translating the content of books in Markdown format from English into Chinese.\",\n  \"user\": \"## Strategies\\nYou will follow a three-step translation process:\\n### 1. Translate the input content from English into Chinese, respect the intention of the original text, keep the original Markdown format unchanged, and do not delete or omit any content, nor add additional explanations or remarks.\\n### 2. Read the original text and the translation carefully, and then put forward constructive criticism and helpful suggestions to improve the translation. The final style and tone of the translation should conform to the Chinese language style.\\nYou must strictly follow the rules below.\\n- Never change the Markdown markup structure. Don't add or remove links. Do not change any URL.\\n- Never touch or change the contents of code blocks even if they appear to have a bug.\\n- Always preserve the original line breaks. Do not add or remove blank lines.\\n- Never touch any permalink at the end of each heading.\\n- Never touch HTML-like tags such as `<Notes>`.\\nWhen writing suggestions, pay attention to whether there are ways to improve the translation in terms of:\\n- Accuracy (by correcting errors such as additions, mistranslations, omissions or untranslated text).\\n- Fluency (by applying the rules of Chinese grammar, spelling and punctuation, and ensuring there is no unnecessary repetition).\\n- Conciseness and abbreviation (please appropriately simplify and abbreviate the translation result while keeping the original meaning unchanged to avoid the translation being too lengthy).\\n### 3. Based on the results of steps 1 and 2, refine and polish the translation, and do not add additional explanations or remarks.\\n## Output\\nFor each step of the translation process, output the results within the appropriate XML tags:\\n<step1_initial_translation>\\n[Insert your initial translation here.]\\n</step1_initial_translation>\\n<step2_reflection>\\n[Insert your reflection on the translation and put forward specific here, useful and constructive suggestions to improve the translation. Each suggestion should target a specific part of the translation.]\\n</step2_reflection>\\n<step3_refined_translation>\\n[Insert your refined and polished translation here.]\\n</step3_refined_translation>\\n## Input\\nThe following is the content of the book that needs to be translated within the <INPUT> tag:\\n<INPUT>{text}</INPUT>\"\n}"
  },
  {
    "path": "prompt_md.prompt.md",
    "content": "# Translation Prompt\n\n## Developer Message\n\nYou are a professional translator who specializes in accurate, natural-sounding translations that preserve the original meaning, tone, and style of the text.\n\n## Conversation\n\n| Role  | Content                                                                   |\n|-------|---------------------------------------------------------------------------|\n| User  | Please translate the following text into {language}:\\n\\n{text}            |\n"
  },
  {
    "path": "prompt_template_sample.json",
    "content": "{\n  \"system\": \"You are a highly skilled academic translator. Please complete the translation task according to the following instructions and provide only the final polished translation.\",\n  \"user\": \"## Strategies\\nYou will follow a three-step translation process:\\n### Step.1 Initial Direct Translation: Translate the content from English to Chinese sentence by sentence, respecting the original intent without deleting, omitting, or adding any extra explanations or notes.\\n ### Step.2 Reflection and Revision: Carefully review both the input content and the initial direct translation from Step 1. Check if the translation conveys the original meaning, if the grammatical structure is correct, if word choices are appropriate, and if there are any ambiguities or polysemous words. The final style and tone should conform to Chinese language conventions. \\nYou must strictly follow the rules below.\\n- Don't add or remove links. Do not change any URL.\\n- Do not translate the reference list.\\n- Never touch,change or translate the mathematical formulas.\\n- Never touch,change or translate the contents of code blocks even if they appear to have a bug.\\n- Always preserve the original line breaks. Do not add or remove blank lines.\\nProvide constructive criticism and helpful suggestions to improve: \\n- translation accuracy (correct additions, mistranslations, omissions, or untranslated text errors),\\n- fluency (apply Chinese grammar, spelling, and punctuation rules, and ensure no unnecessary repetition), \\n- conciseness (streamline the translation results while maintaining the original meaning, avoiding wordiness).\\n ### Step.3 Polish and Optimize: Based on the results from Steps 1 and 2, refine and polish the translation, ensuring the final translation adheres to Chinese style without additional explanations or notes. The content to be translated is wrapped in the following <INPUT> tags:\\n\\n<INPUT>{text}</INPUT>. \\n\\nPlease write and output only the final polished translation here: \"\n}\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"bbook-maker\"\ndescription = \"The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books.\"\nreadme = \"README.md\"\nlicense = {text = \"MIT\"}\ndynamic = [\"version\"]\nrequires-python = \">=3.10\"\nauthors = [\n    { name = \"yihong0618\", email = \"zouzou0208@gmail.com\" },\n]\nclassifiers = [\n    \"License :: OSI Approved :: MIT License\",\n    \"Operating System :: OS Independent\",\n    \"Programming Language :: Python :: 3\",\n]\ndependencies = [\n    \"anthropic\",\n    \"backoff\",\n    \"bs4\",\n    \"ebooklib\",\n    \"google-generativeai\",\n    \"langdetect\",\n    \"litellm\",\n    \"openai>=1.1.1\",\n    \"PyDeepLX\",\n    \"requests\",\n    \"rich\",\n    \"tiktoken\",\n    \"tqdm\",\n    \"groq>=0.5.0\",\n    \"promptdown>=0.9.0\",\n    \"PyMuPDF\",\n]\n\n[project.scripts]\nbbook_maker = \"book_maker.cli:main\"\npromptdown = \"promptdown_cli:main\"\n\n[project.urls]\nHomepage = \"https://github.com/yihong0618/bilingual_book_maker\"\n\n[tool.pdm]\nplugins = [\"pdm-autoexport\"]\n[[tool.pdm.autoexport]]\nfilename = \"requirements.txt\"\nwithout-hashes = true\n[build-system]\nrequires = [\"pdm-backend>=2.0.0\"]\nbuild-backend = \"pdm.backend\"\n[tool.pdm.version]\nsource = \"scm\"\n"
  },
  {
    "path": "tests/test_epub_metadata.py",
    "content": "import pytest\nfrom ebooklib import epub\n\nfrom book_maker.loader.epub_loader import EPUBBookLoader\n\n\ndef test_epub_loader_handles_custom_metadata(tmp_path):\n    source_book = epub.EpubBook()\n    source_book.add_metadata(\"DC\", \"title\", \"Metadata Copy Test\", {\"id\": \"title-id\"})\n    source_book.add_metadata(\"DC\", \"creator\", \"Tester\", {\"role\": \"aut\"})\n\n    # Simulate a namespace that ebooklib does not recognise; the legacy approach\n    # copied this verbatim and ebooklib failed while writing the book back.\n    source_book.metadata[\"custom\"] = [\n        (\"foo-tag\", \"bar-value\", {\"attr\": \"value\"}),\n    ]\n\n    legacy_book = epub.EpubBook()\n    legacy_book.metadata = source_book.metadata\n    with pytest.raises(AttributeError):\n        epub.write_epub(str(tmp_path / \"legacy.epub\"), legacy_book)\n\n    loader = EPUBBookLoader.__new__(EPUBBookLoader)\n    rebuilt_book = loader._make_new_book(source_book)\n\n    output_path = tmp_path / \"rebuilt.epub\"\n    epub.write_epub(str(output_path), rebuilt_book)\n    assert output_path.exists()\n\n    dc_namespace = epub.NAMESPACES[\"DC\"]\n    titles = rebuilt_book.metadata[dc_namespace][\"title\"]\n    creators = rebuilt_book.metadata[dc_namespace][\"creator\"]\n\n    assert (\"Metadata Copy Test\", {\"id\": \"title-id\"}) in titles\n    assert (\"Tester\", {\"role\": \"aut\"}) in creators\n    assert \"custom\" not in rebuilt_book.metadata\n"
  },
  {
    "path": "tests/test_integration.py",
    "content": "import os\nimport shutil\nimport subprocess\nimport sys\nfrom pathlib import Path\n\nimport pytest\n\n\n@pytest.fixture()\ndef test_book_dir() -> str:\n    \"\"\"Return test book dir\"\"\"\n    # TODO: Can move this to conftest.py if there will be more unittests\n    return str(Path(__file__).parent.parent / \"test_books\")\n\n\ndef test_google_translate_epub(test_book_dir, tmpdir):\n    \"\"\"Test google translate epub\"\"\"\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"Liber_Esther.epub\"),\n        os.path.join(tmpdir, \"Liber_Esther.epub\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"Liber_Esther.epub\"),\n            \"--test\",\n            \"--test_num\",\n            \"20\",\n            \"--model\",\n            \"google\",\n        ],\n        env=os.environ.copy(),\n    )\n\n    assert os.path.isfile(os.path.join(tmpdir, \"Liber_Esther_bilingual.epub\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"Liber_Esther_bilingual.epub\")) != 0\n\n\ndef test_deepl_free_translate_epub(test_book_dir, tmpdir):\n    \"\"\"Test deepl free translate epub\"\"\"\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"Liber_Esther.epub\"),\n        os.path.join(tmpdir, \"Liber_Esther.epub\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"Liber_Esther.epub\"),\n            \"--test\",\n            \"--test_num\",\n            \"20\",\n            \"--model\",\n            \"deeplfree\",\n        ],\n        env=os.environ.copy(),\n    )\n\n    assert os.path.isfile(os.path.join(tmpdir, \"Liber_Esther_bilingual.epub\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"Liber_Esther_bilingual.epub\")) != 0\n\n\ndef test_google_translate_epub_cli():\n    pass\n\n\ndef test_google_translate_txt(test_book_dir, tmpdir):\n    \"\"\"Test google translate txt\"\"\"\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"the_little_prince.txt\"),\n        os.path.join(tmpdir, \"the_little_prince.txt\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"the_little_prince.txt\"),\n            \"--test\",\n            \"--test_num\",\n            \"20\",\n            \"--model\",\n            \"google\",\n        ],\n        env=os.environ.copy(),\n    )\n    assert os.path.isfile(os.path.join(tmpdir, \"the_little_prince_bilingual.txt\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"the_little_prince_bilingual.txt\")) != 0\n\n\ndef test_google_translate_txt_batch_size(test_book_dir, tmpdir):\n    \"\"\"Test google translate txt with batch_size\"\"\"\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"the_little_prince.txt\"),\n        os.path.join(tmpdir, \"the_little_prince.txt\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"the_little_prince.txt\"),\n            \"--test\",\n            \"--batch_size\",\n            \"30\",\n            \"--test_num\",\n            \"20\",\n            \"--model\",\n            \"google\",\n        ],\n        env=os.environ.copy(),\n    )\n\n    assert os.path.isfile(os.path.join(tmpdir, \"the_little_prince_bilingual.txt\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"the_little_prince_bilingual.txt\")) != 0\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"BBM_CAIYUN_API_KEY\"),\n    reason=\"No BBM_CAIYUN_API_KEY in environment variable.\",\n)\ndef test_caiyun_translate_txt(test_book_dir, tmpdir):\n    \"\"\"Test caiyun translate txt\"\"\"\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"the_little_prince.txt\"),\n        os.path.join(tmpdir, \"the_little_prince.txt\"),\n    )\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"the_little_prince.txt\"),\n            \"--test\",\n            \"--batch_size\",\n            \"10\",\n            \"--test_num\",\n            \"100\",\n            \"--model\",\n            \"caiyun\",\n        ],\n        env=os.environ.copy(),\n    )\n\n    assert os.path.isfile(os.path.join(tmpdir, \"the_little_prince_bilingual.txt\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"the_little_prince_bilingual.txt\")) != 0\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"BBM_DEEPL_API_KEY\"),\n    reason=\"No BBM_DEEPL_API_KEY in environment variable.\",\n)\ndef test_deepl_translate_txt(test_book_dir, tmpdir):\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"the_little_prince.txt\"),\n        os.path.join(tmpdir, \"the_little_prince.txt\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"the_little_prince.txt\"),\n            \"--test\",\n            \"--batch_size\",\n            \"30\",\n            \"--test_num\",\n            \"20\",\n            \"--model\",\n            \"deepl\",\n        ],\n        env=os.environ.copy(),\n    )\n\n    assert os.path.isfile(os.path.join(tmpdir, \"the_little_prince_bilingual.txt\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"the_little_prince_bilingual.txt\")) != 0\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"BBM_DEEPL_API_KEY\"),\n    reason=\"No BBM_DEEPL_API_KEY in environment variable.\",\n)\ndef test_deepl_translate_srt(test_book_dir, tmpdir):\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"Lex_Fridman_episode_322.srt\"),\n        os.path.join(tmpdir, \"Lex_Fridman_episode_322.srt\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"Lex_Fridman_episode_322.srt\"),\n            \"--test\",\n            \"--batch_size\",\n            \"30\",\n            \"--test_num\",\n            \"2\",\n            \"--model\",\n            \"deepl\",\n        ],\n        env=os.environ.copy(),\n    )\n\n    assert os.path.isfile(os.path.join(tmpdir, \"Lex_Fridman_episode_322_bilingual.srt\"))\n    assert (\n        os.path.getsize(os.path.join(tmpdir, \"Lex_Fridman_episode_322_bilingual.srt\"))\n        != 0\n    )\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"OPENAI_API_KEY\"),\n    reason=\"No OPENAI_API_KEY in environment variable.\",\n)\ndef test_openai_translate_epub_zh_hans(test_book_dir, tmpdir):\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"lemo.epub\"),\n        os.path.join(tmpdir, \"lemo.epub\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"lemo.epub\"),\n            \"--test\",\n            \"--test_num\",\n            \"5\",\n            \"--language\",\n            \"zh-hans\",\n        ],\n        env=os.environ.copy(),\n    )\n    assert os.path.isfile(os.path.join(tmpdir, \"lemo_bilingual.epub\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"lemo_bilingual.epub\")) != 0\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"OPENAI_API_KEY\"),\n    reason=\"No OPENAI_API_KEY in environment variable.\",\n)\ndef test_openai_translate_epub_ja_prompt_txt(test_book_dir, tmpdir):\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"animal_farm.epub\"),\n        os.path.join(tmpdir, \"animal_farm.epub\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"animal_farm.epub\"),\n            \"--test\",\n            \"--test_num\",\n            \"5\",\n            \"--language\",\n            \"ja\",\n            \"--model\",\n            \"gpt3\",\n            \"--prompt\",\n            \"prompt_template_sample.txt\",\n        ],\n        env=os.environ.copy(),\n    )\n    assert os.path.isfile(os.path.join(tmpdir, \"animal_farm_bilingual.epub\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"animal_farm_bilingual.epub\")) != 0\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"OPENAI_API_KEY\"),\n    reason=\"No OPENAI_API_KEY in environment variable.\",\n)\ndef test_openai_translate_epub_ja_prompt_json(test_book_dir, tmpdir):\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"animal_farm.epub\"),\n        os.path.join(tmpdir, \"animal_farm.epub\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"animal_farm.epub\"),\n            \"--test\",\n            \"--test_num\",\n            \"5\",\n            \"--language\",\n            \"ja\",\n            \"--prompt\",\n            \"prompt_template_sample.json\",\n        ],\n        env=os.environ.copy(),\n    )\n    assert os.path.isfile(os.path.join(tmpdir, \"animal_farm_bilingual.epub\"))\n    assert os.path.getsize(os.path.join(tmpdir, \"animal_farm_bilingual.epub\")) != 0\n\n\n@pytest.mark.skipif(\n    not os.environ.get(\"OPENAI_API_KEY\"),\n    reason=\"No OPENAI_API_KEY in environment variable.\",\n)\ndef test_openai_translate_srt(test_book_dir, tmpdir):\n    shutil.copyfile(\n        os.path.join(test_book_dir, \"Lex_Fridman_episode_322.srt\"),\n        os.path.join(tmpdir, \"Lex_Fridman_episode_322.srt\"),\n    )\n\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            os.path.join(tmpdir, \"Lex_Fridman_episode_322.srt\"),\n            \"--test\",\n            \"--test_num\",\n            \"20\",\n        ],\n        env=os.environ.copy(),\n    )\n    assert os.path.isfile(os.path.join(tmpdir, \"Lex_Fridman_episode_322_bilingual.srt\"))\n    assert (\n        os.path.getsize(os.path.join(tmpdir, \"Lex_Fridman_episode_322_bilingual.srt\"))\n        != 0\n    )\n"
  },
  {
    "path": "tests/test_pdf_cli.py",
    "content": "import subprocess\nimport sys\nfrom pathlib import Path\n\nimport pytest\n\nfitz = pytest.importorskip(\"fitz\")\n\n\ndef test_pdf_cli_creates_txt_and_optional_epub(tmp_path):\n    pdf_path = tmp_path / \"cli_test.pdf\"\n    doc = fitz.open()\n    page = doc.new_page()\n    page.insert_text((72, 72), \"CLI test\\nPDF content\")\n    doc.save(str(pdf_path))\n\n    # run CLI\n    subprocess.run(\n        [\n            sys.executable,\n            \"make_book.py\",\n            \"--book_name\",\n            str(pdf_path),\n            \"--test\",\n            \"--test_num\",\n            \"5\",\n            \"--model\",\n            \"google\",\n        ],\n        check=True,\n    )\n\n    txt_out = tmp_path / \"cli_test_bilingual.txt\"\n    assert txt_out.exists()\n    assert txt_out.stat().st_size > 0\n\n    # if ebooklib is installed, an epub should be created\n    try:\n        import ebooklib\n    except Exception:\n        ebooklib = None\n\n    if ebooklib is not None:\n        epub_out = tmp_path / \"cli_test_bilingual.epub\"\n        assert epub_out.exists()\n        assert epub_out.stat().st_size > 0\n"
  },
  {
    "path": "tests/test_pdf_loader.py",
    "content": "import os\nfrom pathlib import Path\n\nimport pytest\n\nfitz = pytest.importorskip(\"fitz\")\n\nfrom book_maker.loader.pdf_loader import PDFBookLoader\n\n\nclass DummyModel:\n    def __init__(\n        self,\n        key,\n        language,\n        api_base=None,\n        temperature=1.0,\n        source_lang=\"auto\",\n        **kwargs,\n    ):\n        pass\n\n    def translate(self, text):\n        return f\"<T>{text}\"\n\n    def translate_list(self, texts):\n        return [f\"<T>{t}\" for t in texts]\n\n\ndef test_pdf_loader_extracts_and_translates(tmp_path):\n    pdf_path = tmp_path / \"test.pdf\"\n    doc = fitz.open()\n    page = doc.new_page()\n    page.insert_text((72, 72), \"Hello world\\nThis is a PDF test\")\n    doc.save(str(pdf_path))\n\n    loader = PDFBookLoader(\n        str(pdf_path),\n        DummyModel,\n        key=\"\",\n        resume=False,\n        language=\"en\",\n        is_test=True,\n        test_num=5,\n    )\n\n    assert len(loader.origin_book) > 0\n\n    loader.make_bilingual_book()\n\n    out_file = tmp_path / \"test_bilingual.txt\"\n    assert out_file.exists()\n    assert out_file.stat().st_size > 0\n    # basic content check\n    content = out_file.read_text(encoding=\"utf-8\")\n    assert \"<T>\" in content\n\n    # if ebooklib is installed, an EPUB should also be produced\n    try:\n        import ebooklib\n    except Exception:\n        ebooklib = None\n\n    if ebooklib is not None:\n        epub_file = tmp_path / \"test_bilingual.epub\"\n        assert epub_file.exists()\n        assert epub_file.stat().st_size > 0\n"
  },
  {
    "path": "typos.toml",
    "content": "# See https://github.com/crate-ci/typos/blob/master/docs/reference.md to configure typos\n[default.extend-words]\nsur = \"sur\"\nbanch = \"banch\" # TODO: not sure if this is a typo or not\nfo = \"fo\"\nba = \"ba\"\n[files]\nextend-exclude = [\"LICENSE\"]\n"
  }
]