[
  {
    "path": ".github/workflows/cli.yml",
    "content": "name: Publish Python Package\n\non:\n  push:\n    tags:\n      - 'v*.*.*'  # 只有打 tag 才触发\n\npermissions:\n  id-token: write  # 用 OIDC 登录\n  contents: read\n\njobs:\n  build-and-publish:\n    runs-on: ubuntu-latest\n    steps:\n    - uses: actions/checkout@v3\n    - name: Set up Python\n      uses: actions/setup-python@v4\n      with:\n        python-version: '3.11'\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade build\n    - name: Build package\n      run: python -m build\n    - name: Publish package to PyPI\n      uses: pypa/gh-action-pypi-publish@release/v1\n      with:\n        packages-dir: dist/\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n**/.next/\n.DS_Store\n\n*.log\nlogs/\n*__pycache__\nuv.lock\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# UV\n#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#uv.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#pdm.lock\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\n#   in version control.\n#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control\n.pdm.toml\n.pdm-python\n.pdm-build/\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n.idea/\n\n# Ruff stuff:\n.ruff_cache/\n\n# PyPI configuration file\n.pypirc\n"
  },
  {
    "path": ".kiro/hooks/readme-translation-hook.kiro.hook",
    "content": "{\n  \"enabled\": true,\n  \"name\": \"README Translation & Grammar Check\",\n  \"description\": \"Automatically checks README.md for grammar and spelling errors, corrects them, and translates the corrected content into Simplified Chinese, Japanese, Korean, French, and German\",\n  \"version\": \"1\",\n  \"when\": {\n    \"type\": \"fileEdited\",\n    \"patterns\": [\n      \"README.md\"\n    ]\n  },\n  \"then\": {\n    \"type\": \"askAgent\",\n    \"prompt\": \"The README.md file has been modified. Please:\\n\\n1. First, carefully read the updated README.md content and check for any English grammar or spelling mistakes\\n2. If any errors are found, correct them and update the README.md file with the corrected version\\n3. Then translate the corrected README.md content into the following languages and save each translation as a separate file:\\n   - Simplified Chinese → README-CN.md\\n   - Japanese → README-JP.md  \\n   - Korean → README-KR.md\\n   - French → README-FR.md\\n   - German → README-DE.md\\n\\nEnsure that:\\n- All technical terms, code blocks, and formatting are preserved in translations\\n- File paths, URLs, and command examples remain unchanged\\n- The translations are accurate and maintain the original meaning\\n- Each translated file follows the same structure as the original README.md\"\n  }\n}"
  },
  {
    "path": ".kiro/steering/ebook-mcp-server-guide.md",
    "content": "---\ninclusion: always\n---\n\n# Ebook MCP Server Development Guide\n\n## Architecture\nFastMCP-based server for EPUB/PDF processing with tools for metadata extraction, content parsing, and format conversion.\n\n**Core Structure:**\n- `src/ebook_mcp/main.py` - MCP server entry point with FastMCP app\n- `src/ebook_mcp/tools/` - MCP tool implementations (epub_helper.py, pdf_helper.py, logger_config.py)\n- Use `uv` package manager exclusively\n\n## Code Conventions\n\n**Python Standards:**\n- Python 3.10+ with full type annotations\n- PEP 8 compliance, English-only documentation\n- Specific exception handling: `FileNotFoundError` over generic `Exception`\n- Always log debug info before processing: `logger.debug(f\"Calling {tool_name}: {params}\")`\n\n**MCP Tool Pattern (MANDATORY):**\n```python\n@mcp.tool()\ndef tool_name(param: str) -> ReturnType:\n    \"\"\"Brief description.\n    \n    Args:\n        param: Description with format examples\n        \n    Returns:\n        ReturnType: Description\n        \n    Raises:\n        FileNotFoundError: When file doesn't exist\n        Exception: For parsing/processing errors\n    \"\"\"\n    logger.debug(f\"Calling tool_name: {param}\")\n    try:\n        # Implementation\n        return result\n    except FileNotFoundError as e:\n        raise FileNotFoundError(str(e))\n    except Exception as e:\n        raise Exception(str(e))\n```\n\n## Processing Rules\n\n**EPUB Processing:**\n- Use `ebooklib` library exclusively\n- **CRITICAL:** \n- Use `get_epub_chapter_markdown` (fixes truncation issues)\n- Convert all content to Markdown format\n- Handle missing TOC gracefully with fallback methods\n\n**PDF Processing:**\n- Use `PyMuPDF` (fitz) library exclusively\n- Support both page-based and chapter-based extraction\n- When TOC missing, fallback to page-by-page extraction\n- Provide both plain text and Markdown output formats\n\n**Error Handling Pattern:**\n- File operations: Raise `FileNotFoundError` for missing files\n- Format/parsing errors: Use specific exceptions with descriptive messages\n- Wrap optional imports in try-except blocks\n- Always preserve original error context in exception messages\n\n## Available Tools Reference\n\n**EPUB:** `get_all_epub_files`, `get_epub_metadata`, `get_epub_toc`, `get_epub_chapter_markdown_fixed`\n**PDF:** `get_all_pdf_files`, `get_pdf_metadata`, `get_pdf_toc`, `get_pdf_page_text`, `get_pdf_page_markdown`, `get_pdf_chapter_content`\n\n## Development Commands\n```bash\n# Test suite\nuv run pytest [specific_test_file]\n\n# Development server\nuv run mcp dev src/ebook_mcp/main.py\n\n# Debug with MCP inspector\nnpx @modelcontextprotocol/inspector uv --directory . run src/ebook_mcp/main.py\n```\n\n**Logging:** Files in `logs/ebook-mcp_server_YYYYMMDD_HHMMSS.log`, DEBUG level, console + file output\n\n"
  },
  {
    "path": ".python-version",
    "content": "3.12\n"
  },
  {
    "path": ".vscode/settings.json",
    "content": "{\n}"
  },
  {
    "path": "CHANGELOG.md",
    "content": "# Changelog\n\nAll notable changes to this project will be documented in this file.\n\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),\nand this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n\n## [Unreleased]\n\n## [0.1.7] - 2025-08-06\n\n### 🔧 Refactored\n- **Modernized Dependency Management**: Removed `requirements.txt`, fully using `pyproject.toml` for dependency management\n  - Deleted `requirements.txt` file\n  - Updated installation instructions in all README files\n  - Unified use of modern Python package management standards\n  - Simplified installation process: `uv pip install -e .` or `pip install -e .`\n\n- **PDF Processing Optimization**: Removed `PyPDF2` dependency, fully using `PyMuPDF`\n  - Removed `PyPDF2` imports and `get_meta_pypdf2` function from `pdf_helper.py`\n  - Updated `pyproject.toml`, removed `PyPDF2` dependency\n  - Deleted `test_pdf_metadata_comparison.py` test file\n  - Updated related tests, removed `PyPDF2` related tests\n\n  - Enhanced PDF metadata extraction functionality, providing richer metadata information\n\n### 🌍 Added\n- **Internationalization Support**: Added multilingual README documentation\n  - Added German README (`README-DE.md`)\n  - Added French README (`README-FR.md`)\n  - Added Japanese README (`README-JP.md`)\n  - Added Korean README (`README-KR.md`)\n  - Added Kiro translation tool configuration (`.kiro/hooks/readme-translation-hook.kiro.hook`)\n\n### 🔧 Technical Improvements\n- **Dependency Management**: Compliant with modern Python project standards (PEP 518/621)\n- **PDF Processing**: Improved performance and stability, reduced dependency conflicts\n- **Test Coverage**: All tests passing (76 passed, 5 skipped)\n- **Code Quality**: Simplified code structure, improved maintainability\n\n### 📝 Documentation\n- Updated installation instructions in all README files\n- Added multilingual support documentation\n- Updated MCP client example documentation\n- Improved project documentation accessibility\n\n### 🗑️ Removed\n- `requirements.txt` file\n- `PyPDF2` dependency and related code\n- `test_pdf_metadata_comparison.py` test file\n- Outdated installation instruction references\n\n### 🔄 Backward Compatibility\n- ✅ Maintained API compatibility, no need to modify existing code\n- ✅ All MCP tools working normally\n- ✅ Functional integrity guaranteed\n\n### 📦 Installation Instructions\n```bash\n# Development environment\ngit clone <repository-url>\ncd ebook-mcp\nuv pip install -e .\n# or\npip install -e .\n\n# Run tests\n./run_tests.sh\n# or\npytest src/ebook_mcp/tests/\n```\n\n### 🎯 Impact Assessment\n- **Positive Impact**:\n  - Simplified dependency management\n  - Improved PDF processing performance\n  - Enhanced internationalization support\n  - Reduced maintenance complexity\n  - Compliant with modern Python project standards\n\n- **Potential Impact**:\n  - Users need to update installation methods\n  - Removed specific PyPDF2 features (replaced by PyMuPDF)\n\n### 🔄 Migration Guide\nFor existing users:\n1. Delete `requirements.txt` file (if exists)\n2. Reinstall using `uv pip install -e .`\n3. Update CI/CD configuration (if using requirements.txt)\n\n---\n\n## [0.1.4] - 2025-08-05\n\n### 🔧 Fixed\n- Fixed subchapter truncation issue in EPUB chapter extraction\n- Added `get_epub_chapter_markdown_fixed` tool\n- Improved chapter boundary detection logic\n- Updated related tests and documentation\n\n### 📝 Documentation\n- Added `HOW-TO-TEST.md` testing documentation\n- Updated test runner scripts\n- Improved error handling and logging\n\n## [0.1.3] - 2025-08-04\n\n### 🌟 Added\n- Added comprehensive unit test suite\n- Created test configuration files and runner scripts\n- Added test documentation and examples\n\n### 🔧 Improved\n- Improved error handling mechanisms\n- Optimized code structure and readability\n- Enhanced test coverage\n\n## [0.1.2] - 2025-08-03\n\n### 🌟 Added\n- Added PDF chapter content extraction functionality\n- Support for extracting content by chapter title\n- Added Markdown format output support\n\n### 🔧 Improved\n- Optimized PDF metadata extraction\n- Improved error handling\n- Updated API documentation\n\n## [0.0.1] - 2025-08-02\n\n### 🔧 Fixed\n- Fixed compatibility issues in PDF processing\n- Improved EPUB metadata extraction\n- Optimized file path handling\n\n### 📝 Documentation\n- Updated installation instructions\n- Added usage examples\n- Improved API documentation\n\n## [1.0.0] - 2025-08-01\n\n### 🌟 Initial Release\n- EPUB and PDF format support\n- Basic file processing APIs\n- MCP client examples - Claude, DeepSeek, OpenAI\n- Support for running server from PyPI\n- Basic metadata extraction functionality\n- Table of contents extraction support\n- Chapter content extraction functionality\n\n---\n\n## Version Notes\n\n### Semantic Versioning\n- **Major version**: Incompatible API changes\n- **Minor version**: Backward-compatible functionality additions\n- **Patch version**: Backward-compatible bug fixes\n\n### Change Types\n- **Added**: New features\n- **Changed**: Changes to existing functionality\n- **Deprecated**: Features that will be removed soon\n- **Removed**: Removed features\n- **Fixed**: Bug fixes\n- **Security**: Security improvements "
  },
  {
    "path": "HOW-TO-TEST.md",
    "content": "# Ebook-MCP 单元测试指南\n\n本指南说明如何运行 ebook-mcp 项目的服务器部分单元测试。\n\n## 测试文件结构\n\n```\nsrc/ebook_mcp/tests/\n├── conftest.py              # pytest 配置和共享 fixtures\n├── test_main.py             # main.py 的单元测试\n├── test_epub_helper.py      # epub_helper.py 的单元测试\n├── test_pdf_helper.py       # pdf_helper.py 的单元测试\n├── test_azw.py              # 现有的 AZW 测试\n└── run_tests.py             # 测试运行脚本\n```\n\n## 测试覆盖范围\n\n### main.py 测试\n- EPUB 相关工具函数测试\n  - `get_all_epub_files`\n  - `get_epub_metadata`\n  - `get_epub_toc`\n  - `get_epub_chapter_markdown`\n- PDF 相关工具函数测试\n  - `get_all_pdf_files`\n  - `get_pdf_metadata`\n  - `get_pdf_toc`\n  - `get_pdf_page_text`\n  - `get_pdf_page_markdown`\n  - `get_pdf_chapter_content`\n- 错误处理测试\n  - 文件不存在\n  - 解析错误\n  - 异常处理\n\n### epub_helper.py 测试\n- 文件操作测试\n- EPUB 解析测试\n- 目录结构处理测试\n- HTML 清理和转换测试\n- 章节提取测试\n\n### pdf_helper.py 测试\n- PDF 文件操作测试\n- 元数据提取测试\n- 目录提取测试\n- 页面文本提取测试\n- 章节内容提取测试\n\n## 运行测试\n\n### 方法 1: 使用测试运行脚本\n\n```bash\n# 运行所有测试\npython src/ebook_mcp/tests/run_tests.py\n\n# 列出所有测试文件\npython src/ebook_mcp/tests/run_tests.py list\n\n# 运行特定测试文件\npython src/ebook_mcp/tests/run_tests.py run test_main.py\n```\n\n### 方法 2: 使用 pytest 直接运行\n\n```bash\n# 运行所有测试\npytest src/ebook_mcp/tests/ -v\n\n# 运行特定测试文件\npytest src/ebook_mcp/tests/test_main.py -v\n\n# 运行特定测试类\npytest src/ebook_mcp/tests/test_main.py::TestEpubFunctions -v\n\n# 运行特定测试方法\npytest src/ebook_mcp/tests/test_main.py::TestEpubFunctions::test_get_all_epub_files_empty_directory -v\n```\n\n### 方法 3: 从项目根目录运行\n\n```bash\n# 从项目根目录运行所有测试\npython -m pytest src/ebook_mcp/tests/ -v\n\n# 运行特定测试\npython -m pytest src/ebook_mcp/tests/test_main.py -v\n```\n\n### 方法 4: 使用 uv 运行（如果使用 uv 管理依赖）\n\n```bash\n# 运行所有测试\nuv run pytest src/ebook_mcp/tests/ -v\n\n# 运行特定测试\nuv run pytest src/ebook_mcp/tests/test_main.py -v\n```\n\n### 方法 5: 运行基本测试（不需要外部依赖）\n\n```bash\n# 运行基本测试（推荐用于快速验证）\npytest src/ebook_mcp/tests/test_basic.py -v\n\n# 或者\npython -m pytest src/ebook_mcp/tests/test_basic.py -v\n```\n\n## 测试环境要求\n\n### 基本依赖\n确保已安装以下依赖：\n\n```bash\n# 如果使用 pip\npip install pytest\npip install pytest-cov  # 可选：用于代码覆盖率\n\n# 如果使用 uv\nuv add --dev pytest\nuv add --dev pytest-cov  # 可选：用于代码覆盖率\n```\n\n### 完整依赖（用于完整测试）\n如果要运行所有测试（包括需要外部库的测试），需要安装项目依赖：\n\n```bash\n# 安装项目依赖\npip install -e .\n\n# 或者使用 uv\nuv sync --dev\n```\n\n### 依赖说明\n- **基本测试** (`test_basic.py`): 不需要外部依赖，可以立即运行 ✅\n- **主要功能测试** (`test_main.py`, `test_epub_helper.py`, `test_pdf_helper.py`): 需要项目依赖 ⚠️\n- **AZW 测试** (`test_azw.py`): 需要 AZW 模块（如果可用）⏭️\n\n### 测试状态\n- ✅ **基本测试**: 10个测试全部通过\n- ⏭️ **AZW 测试**: 4个测试跳过（模块不可用）\n- ⚠️ **主要功能测试**: 需要安装项目依赖后运行\n\n## 测试输出示例\n\n### 成功运行示例\n```\nRunning ebook-mcp unit tests...\n==================================================\ntest_main.py::TestEpubFunctions::test_get_all_epub_files_empty_directory PASSED\ntest_main.py::TestEpubFunctions::test_get_all_epub_files_with_epub_files PASSED\ntest_main.py::TestEpubFunctions::test_get_epub_metadata_success PASSED\n...\ntest_pdf_helper.py::TestPdfHelper::test_get_all_pdf_files_empty_directory PASSED\ntest_pdf_helper.py::TestPdfHelper::test_get_all_pdf_files_with_pdf_files PASSED\n...\n\n==================================================\n✅ All tests passed!\n```\n\n### 失败运行示例\n```\ntest_main.py::TestEpubFunctions::test_get_epub_metadata_file_not_found FAILED\n...\nAssertionError: Expected FileNotFoundError to be raised, but no exception was raised.\n```\n\n## 测试策略\n\n### 单元测试原则\n1. **隔离性**: 每个测试都是独立的，不依赖其他测试\n2. **可重复性**: 测试可以在任何环境下重复运行\n3. **快速性**: 测试运行速度快\n4. **完整性**: 覆盖正常情况和异常情况\n\n### Mock 使用\n- 使用 `unittest.mock` 来模拟外部依赖\n- 模拟文件系统操作\n- 模拟 EPUB 和 PDF 解析库\n\n### 测试数据\n- 使用临时文件和目录\n- 使用模拟数据而不是真实文件\n- 测试完成后自动清理\n\n## 调试测试\n\n### 查看详细输出\n```bash\n# 运行测试并显示详细输出\npytest src/ebook_mcp/tests/ -v -s\n\n# 在失败时停止\npytest src/ebook_mcp/tests/ -x\n\n# 显示本地变量\npytest src/ebook_mcp/tests/ --tb=long\n```\n\n### 运行特定测试\n```bash\n# 运行包含特定关键词的测试\npytest src/ebook_mcp/tests/ -k \"epub\" -v\n\n# 运行特定类的测试\npytest src/ebook_mcp/tests/ -k \"TestEpubFunctions\" -v\n\n# 运行特定方法的测试\npytest src/ebook_mcp/tests/ -k \"test_get_epub_metadata\" -v\n```\n\n## 添加新测试\n\n### 为新功能添加测试\n1. 在相应的测试文件中添加测试类或方法\n2. 使用描述性的测试方法名\n3. 包含正常情况和异常情况的测试\n4. 使用适当的 mock 来隔离依赖\n\n### 测试方法命名规范\n- 使用 `test_` 前缀\n- 描述测试的功能和场景\n- 例如: `test_get_epub_metadata_success`, `test_get_epub_metadata_file_not_found`\n\n### 测试文档\n- 每个测试方法都应该有文档字符串\n- 说明测试的目的和预期结果\n- 对于复杂的测试，说明测试步骤\n\n## 持续集成\n\n这些测试可以集成到 CI/CD 流程中：\n\n```yaml\n# 示例 GitHub Actions 配置\n- name: Run tests\n  run: |\n    python -m pytest src/ebook_mcp/tests/ -v --cov=src/ebook_mcp\n```\n\n## 故障排除\n\n### 常见问题\n\n1. **导入错误**: 确保 Python 路径正确设置\n   ```bash\n   # 从项目根目录运行\n   cd /path/to/ebook-mcp\n   python -m pytest src/ebook_mcp/tests/ -v\n   ```\n\n2. **Mock 问题**: 检查 mock 路径是否正确\n   ```python\n   # 确保 mock 路径与实际的导入路径一致\n   @patch('ebook_mcp.main.epub_helper.get_meta')\n   ```\n\n3. **文件权限**: 确保有创建临时文件的权限\n   ```bash\n   # 检查当前目录权限\n   ls -la\n   ```\n\n4. **依赖问题**: 确保所有测试依赖已安装\n   ```bash\n   # 安装开发依赖\n   pip install -e .[dev]\n   # 或使用 uv\n   uv sync --dev\n   ```\n\n### 获取帮助\n\n如果遇到问题，可以：\n\n1. 查看测试输出中的错误信息\n2. 使用 `-v` 参数获取详细输出\n3. 使用 `--tb=long` 查看完整的错误堆栈\n4. 检查测试文件中的 mock 设置 "
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README-CN.md",
    "content": "[![MseeP.ai Security Assessment Badge](https://mseep.net/pr/onebirdrocks-ebook-mcp-badge.png)](https://mseep.ai/app/onebirdrocks-ebook-mcp)\n\n# Ebook-MCP\n\n<img src=\"logo.png\" alt=\"ebook-mcp logo\" width=\"132\" height=\"132\">\n\n[English](https://github.com/onebirdrocks/ebook-mcp/blob/main/README.md) | [中文](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-CN.md) | [日本語](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-JP.md) | [한국어](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-KR.md) | [Français](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-FR.md) | [Deutsch](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-DE.md)\n\nEbook-MCP 是一个强大的模型上下文协议（MCP）服务器，用于处理电子书。基于[模型上下文协议](https://github.com/modelcontextprotocol)构建，它提供了一套标准化的API，用于LLM应用程序与电子书处理功能之间的无缝集成。目前支持EPUB和PDF格式。\n\n## 使用场景与价值\n\nEbook-MCP 通过与您的阅读材料进行自然语言对话，改变了您与数字图书的互动方式。它与现代AI驱动的IDE（如Cursor和Claude）无缝集成，让您能够：\n\n- **智能图书馆管理**：简单地询问\"显示我下载文件夹中的所有EPUB文件\"或\"在我的图书馆中找到关于GenAI的书籍\"\n- **互动阅读体验**：与您的书籍进行自然对话：\n  - \"给我简要介绍一下《LLM工程师手册》\"\n  - \"第3章涵盖了什么内容？\"\n  - \"总结这本书中关于RAG的要点\"\n- **主动学习支持**：通过AI驱动的互动增强您的学习：\n  - \"基于第5章的RAG概念创建一个测验\"\n  - \"解释本章讨论的架构之间的差异\"\n  - \"基于本节的概念给我一些实践练习\"\n- **内容导航**：使用自然语言查询轻松浏览您的书籍：\n  - \"找到所有讨论提示工程的章节\"\n  - \"显示关于微调的章节\"\n  - \"带我到关于向量数据库的部分\"\n\n通过弥合传统电子书与AI功能之间的差距，Ebook-MCP帮助读者通过直观的、基于对话的互动从他们的数字图书馆中提取更多价值。\n\n快速演示 - 使用DeepSeek运行（请查看mcp-client-example文件夹下所有Claude/OpenAI的示例）\n```\n查询：请检查我下载文件夹中的所有epub书籍。用户ID是onebird。\n......\n\n查询：你能问我几个问题来测试我对这本书的理解吗？\n\n回答：当然！这里有5个关于沃尔特·艾萨克森的《埃隆·马斯克》的问题来测试你对这本书的理解。请根据你的知识回答——我会在每个回答后给出反馈和补充细节。\n\n问题1：\n马斯克的童年和家庭背景如何塑造了他后来的性格和职业生涯？\n（提示：他与父亲的关系，在南非的成长经历等）\n\n问题2：\nZip2和X.com（后来的PayPal）是马斯克早期的创业公司。它们解决了什么问题？马斯克最终是如何退出这些公司的？\n（提示：Zip2的商业模式，X.com与Confinity的合并等）\n\n问题3：\n2008年，SpaceX和特斯拉都濒临破产。马斯克是如何拯救这两家公司的？\n（提示：猎鹰1号的第四次发射，NASA合同，特斯拉的最后一刻救援等）\n\n问题4：\n围绕马斯克管理风格的争议有哪些？书中哪些具体事件说明了他的\"硬核\"或\"极端\"工作文化？\n（提示：弗里蒙特工厂的\"生产地狱\"，推特裁员等）\n\n问题5：\n马斯克对人工智能（AI）的态度是什么？为什么他同时支持OpenAI和Neuralink，这两者似乎有冲突的目标？\n（提示：AI作为生存威胁，脑机接口愿景等）\n\n请回答每个问题。我会提供反馈和额外细节或纠正任何误解！\n（如果问题不清楚，请随时问我。）\n\n查询：\n问题1：他父亲的虐待态度使他对他人缺乏同情心。\n问题2：Zip2解决了电子地图和在线黄页的问题。X.com解决了在线支付问题。\n问题3：他获得了NASA的合同。特斯拉通过在中国建厂解决了生产问题。\n问题4：我不知道。请为我回答。\n问题5：他认为开源技术对确保AI安全和开放是必要的。\n\n回答：你的答案抓住了书中的一些要点。让我为问题4和5添加细节、纠正一些不准确之处并提供进一步背景：\n\n问题1 — 纠正与补充\n父亲的影响：\n马斯克的父亲埃罗尔确实极其严厉——例如，强迫年幼的埃隆跪下数米粒数小时。书中强调这种情感脱敏塑造了马斯克成为一个极其目标导向的人——对他人的痛苦漠不关心，但对自己的目标执着追求。\n\n反向影响：\n这并不是说马斯克没有同情心——而是选择性的。例如，他可能冷淡地对待员工，但会亲自研究医疗设备来解决儿子X的健康问题。\n\n问题2 — 纠正与补充\nZip2：\n本质上是在线黄页+地图导航服务（帮助报纸上线）。马斯克亲自编写了大部分早期代码并睡在办公室。最终，康柏以3.07亿美元收购了Zip2（马斯克获得2200万美元）。\n\nX.com：\n不仅仅是在线支付——这是马斯克试图创建综合金融平台的尝试（旨在取代银行）。后来与Confinity（PayPal的前身）合并。马斯克因内部冲突被驱逐，但当eBay收购PayPal时，马斯克赚了1.8亿美元。\n（注：马斯克后来买回了X.com域名——现在是推特母公司的名称）。\n\n问题3 — 纠正与补充\nSpaceX：\n2008年，猎鹰1号在第四次发射中成功（前三次失败耗尽了资金）。紧接着，SpaceX获得了16亿美元的NASA货物补给合同——这是公司的生命线。\n\n特斯拉：\n2008年，特斯拉通过戴姆勒5000万美元的投资和美国能源部的贷款得以生存。\n在中国建设超级工厂（2019年）是后来的事。在早期，生存依赖于Model S的推出（2012年）和马斯克的个人赌注——出售所有房地产来资助特斯拉。\n\n问题4 — 解释：围绕他管理风格的争议\n\"硬核\"文化：\n\n在SpaceX/特斯拉：马斯克要求员工每周工作80+小时，甚至睡在工厂。（\"如果你不在这里，你的替代者就在。\"）\n\n收购推特后：他立即裁员75%的员工，并要求剩余员工签署\"硬核承诺\"——同意长时间和高强度工作。\n\n极端案例：\n\n在弗里蒙特工厂：在Model 3的\"生产地狱\"期间，马斯克睡在生产线上，曾对员工大喊称他们\"比狗还差\"（他后来道歉）。\n\n解雇高管：马斯克曾因为PowerPoint演示文稿的字体大小不够大而当场解雇了一名特斯拉高级主管。\n\n问题5 — 纠正与补充\nAI威胁论：\n马斯克认为AI可能摧毁人类（\"比核武器更危险\"）。因此：\n\nOpenAI：他最初支持它以促进安全、开源的AI（但后来因控制权争议而离开）。\n\nNeuralink：他旨在使用脑机接口将人类与AI融合——以防止人类被抛在后面。\n\n矛盾：\n在警告AI风险的同时，马斯克同时在特斯拉推进全自动驾驶（FSD）技术，并创立了xAI来研究通用人工智能（AGI）。\n\n总体评价：\n✅ 你准确地抓住了关键事件，但细节需要加强（如时间线和具体数字）。\n🔍 建议进一步学习的领域：马斯克的多线程管理风格（同时运营五家公司）和他的媒体操控技巧（通过推特治理）。\n```\n\n## 功能特性\n\n### EPUB支持\n- 提取元数据（标题、作者、出版日期等）\n- 提取目录\n- 提取章节内容（Markdown输出）\n- 批量处理EPUB文件\n\n### PDF支持\n- 提取元数据（标题、作者、创建日期等）\n- 提取目录\n- 按页码提取内容\n- 按章节标题提取内容\n- Markdown输出支持\n- 批量处理PDF文件\n\n## 安装\n\n1. 克隆仓库：\n```bash\ngit clone https://github.com/yourusername/ebook-mcp.git\ncd ebook-mcp\n```\n\n2. 使用`uv`安装依赖：\n```bash\nuv pip install -e .\n```\n\n## 使用方法\n\n### 在开发模式下启动MCP服务器\n\n在开发模式下运行服务器：\n```bash\nuv run mcp dev src/ebook_mcp/main.py\n```\n\n您可以访问 http://localhost:5173/ 进行测试和调试\n您也可以安装检查器进行测试。\n```\nnpx @modelcontextprotocol/inspector uv --directory . run src/ebook_mcp/main.py\n```\n\n### 在生产模式下启动MCP服务器\n\n运行服务器：\n```bash\nuv run src/ebook_mcp/main.py\n```\n\n#### 在Cursor中配置MCP\n\n在Cursor中添加以下配置\n```bash\n\"ebook-mcp\":{\n            \"command\": \"uv\",\n            \"args\": [\n                \"--directory\",\n                \"/Users/onebird/github/ebook-mcp/src/ebook_mcp/\",\n                \"run\",\n                \"main.py\"\n            ]\n        }\n```\n\n### EPUB处理示例\n\n```python\n# 获取目录中的所有EPUB文件\nepub_files = get_all_epub_files(\"/path/to/books\")\n\n# 获取EPUB元数据\nmetadata = get_metadata(\"/path/to/book.epub\")\n\n# 获取目录\ntoc = get_toc(\"/path/to/book.epub\")\n\n# 获取特定章节内容（Markdown格式）\nchapter_content = get_chapter_markdown(\"/path/to/book.epub\", \"chapter_id\")\n```\n\n### PDF处理示例\n\n```python\n# 获取目录中的所有PDF文件\npdf_files = get_all_pdf_files(\"/path/to/books\")\n\n# 获取PDF元数据\nmetadata = get_pdf_metadata(\"/path/to/book.pdf\")\n\n# 获取目录\ntoc = get_pdf_toc(\"/path/to/book.pdf\")\n\n# 获取特定页面内容\npage_text = get_pdf_page_text(\"/path/to/book.pdf\", 1)\npage_markdown = get_pdf_page_markdown(\"/path/to/book.pdf\", 1)\n\n# 获取特定章节内容\nchapter_content, page_numbers = get_pdf_chapter_content(\"/path/to/book.pdf\", \"Chapter 1\")\n```\n\n## API参考\n\n### EPUB APIs\n\n#### `get_all_epub_files(path: str) -> List[str]`\n获取指定目录中的所有EPUB文件。\n\n#### `get_metadata(epub_path: str) -> Dict[str, Union[str, List[str]]]`\n从EPUB文件获取元数据。\n\n#### `get_toc(epub_path: str) -> List[Tuple[str, str]]`\n从EPUB文件获取目录。\n\n#### `get_chapter_markdown(epub_path: str, chapter_id: str) -> str`\n获取Markdown格式的章节内容。\n\n### PDF APIs\n\n#### `get_all_pdf_files(path: str) -> List[str]`\n获取指定目录中的所有PDF文件。\n\n#### `get_pdf_metadata(pdf_path: str) -> Dict[str, Union[str, List[str]]]`\n从PDF文件获取元数据。\n\n#### `get_pdf_toc(pdf_path: str) -> List[Tuple[str, int]]`\n从PDF文件获取目录。\n\n#### `get_pdf_page_text(pdf_path: str, page_number: int) -> str`\n从特定页面获取纯文本内容。\n\n#### `get_pdf_page_markdown(pdf_path: str, page_number: int) -> str`\n从特定页面获取Markdown格式内容。\n\n#### `get_pdf_chapter_content(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]`\n通过章节标题获取章节内容和对应页码。\n\n## 依赖项\n\n主要依赖包括：\n- ebooklib：EPUB文件处理\n- PyPDF2：基础PDF处理\n- PyMuPDF：高级PDF处理\n- beautifulsoup4：HTML解析\n- html2text：HTML到Markdown转换\n- pydantic：数据验证\n- fastmcp：MCP服务器框架\n\n## 重要说明\n\n1. PDF处理依赖于文档的目录。如果没有目录，某些功能可能无法工作。\n2. 对于大型PDF文件，建议按页面范围处理，以避免一次加载整个文件。\n3. EPUB章节ID必须从目录结构中获取。\n\n## 架构\n\n```\n           ┌────────────────────────────┐\n           │         代理层             │\n           │  - 翻译策略                │\n           │  - 风格一致性检查          │\n           │  - LLM调用与交互           │\n           └────────────▲─────────────┘\n                        │ 工具调用\n           ┌────────────┴─────────────┐\n           │        MCP工具层          │\n           │  - extract_chapter        │\n           │  - write_translated_chapter│\n           │  - generate_epub          │\n           └────────────▲─────────────┘\n                        │ 系统/IO调用\n           ┌────────────┴─────────────┐\n           │     系统基础层            │\n           │  - 文件读取              │\n           │  - ebooklib解析          │\n           │  - 文件路径存储/检查      │\n           └────────────────────────────┘\n```\n\n## 贡献\n\n我们欢迎Issues和Pull Requests！\n\n有关最近更改的详细信息，请参阅 [CHANGELOG.md](CHANGELOG.md)。"
  },
  {
    "path": "README-DE.md",
    "content": "[![MseeP.ai Security Assessment Badge](https://mseep.net/pr/onebirdrocks-ebook-mcp-badge.png)](https://mseep.ai/app/onebirdrocks-ebook-mcp)\n\n# Ebook-MCP\n\n<img src=\"logo.png\" alt=\"ebook-mcp logo\" width=\"132\" height=\"132\">\n\n[English](https://github.com/onebirdrocks/ebook-mcp/blob/main/README.md) | [中文](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-CN.md) | [日本語](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-JP.md) | [한국어](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-KR.md) | [Français](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-FR.md) | [Deutsch](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-DE.md)\n\nEbook-MCP ist ein leistungsstarker Model Context Protocol (MCP) Server für die Verarbeitung elektronischer Bücher. Aufgebaut auf dem [Model Context Protocol](https://github.com/modelcontextprotocol), bietet es eine Reihe standardisierter APIs für die nahtlose Integration zwischen LLM-Anwendungen und E-Book-Verarbeitungsfunktionen. Unterstützt derzeit EPUB- und PDF-Formate.\n\n## Anwendungsfälle und Wert\n\nEbook-MCP transformiert die Art, wie Sie mit Ihren digitalen Büchern interagieren, indem es natürlichsprachliche Gespräche mit Ihren Lesematerialien ermöglicht. Es integriert sich nahtlos in moderne KI-gestützte IDEs wie Cursor und Claude und ermöglicht Ihnen:\n\n- **Intelligente Bibliotheksverwaltung**: Fragen Sie einfach \"Zeigen Sie mir alle EPUB-Dateien in meinem Downloads-Ordner\" oder \"Finden Sie Bücher über GenAI in meiner Bibliothek\"\n- **Interaktive Leseerfahrung**: Führen Sie natürliche Gespräche über Ihre Bücher:\n  - \"Geben Sie mir eine kurze Einführung in das 'LLM Engineer Handbook'\"\n  - \"Was wird in Kapitel 3 behandelt?\"\n  - \"Fassen Sie die wichtigsten Punkte über RAG aus diesem Buch zusammen\"\n- **Aktive Lernunterstützung**: Verbessern Sie Ihr Lernen durch KI-gestützte Interaktionen:\n  - \"Erstellen Sie ein Quiz basierend auf den RAG-Konzepten aus Kapitel 5\"\n  - \"Erklären Sie die Unterschiede zwischen den in diesem Kapitel diskutierten Architekturen\"\n  - \"Geben Sie mir praktische Übungen basierend auf den Konzepten in diesem Abschnitt\"\n- **Inhaltsnavigation**: Navigieren Sie einfach durch Ihre Bücher mit natürlichsprachlichen Abfragen:\n  - \"Finden Sie alle Abschnitte, die Prompt Engineering diskutieren\"\n  - \"Zeigen Sie mir die Kapitel über Fine-Tuning\"\n  - \"Bringen Sie mich zu dem Teil über Vektordatenbanken\"\n\nDurch die Überbrückung der Lücke zwischen traditionellen E-Books und KI-Fähigkeiten hilft Ebook-MCP Lesern dabei, mehr Wert aus ihrer digitalen Bibliothek durch intuitive, gesprächsbasierte Interaktionen zu extrahieren.\n\nSchnelle Demo - Ausführung mit DeepSeek (Bitte überprüfen Sie alle Beispiele mit Claude/OpenAI im mcp-client-example Ordner)\n```\nAnfrage: Bitte überprüfen Sie alle epub-Bücher in meinem Download-Ordner. Benutzer-ID ist onebird.\n......\n\nAnfrage: Können Sie mir ein paar Fragen stellen, um mein Verständnis dieses Buches zu testen?\n\nAntwort: Sicher! Hier sind 5 Fragen über Elon Musk von Walter Isaacson, um Ihr Verständnis des Buches zu testen. Bitte antworten Sie basierend auf Ihrem Wissen — ich werde Feedback geben und Details nach jeder Antwort hinzufügen.\n\nFrage 1:\nWie haben Musks Kindheit und familiärer Hintergrund seine Persönlichkeit und Karriere später im Leben geprägt?\n(Hinweis: Seine Beziehung zu seinem Vater, Aufwachsen in Südafrika, etc.)\n\nFrage 2:\nZip2 und X.com (später PayPal) waren Musks frühe Startups. Welche Probleme haben sie gelöst? Wie ist Musk schließlich aus diesen Unternehmen ausgestiegen?\n(Hinweis: Zip2s Geschäftsmodell, X.coms Fusion mit Confinity, etc.)\n\nFrage 3:\n2008 standen sowohl SpaceX als auch Tesla am Rande des Bankrotts. Wie hat Musk diese beiden Unternehmen gerettet?\n(Hinweis: Falcon 1s vierter Start, NASA-Vertrag, Teslas Last-Minute-Rettung, etc.)\n\nFrage 4:\nWelche Kontroversen umgeben Musks Führungsstil? Welche spezifischen Vorfälle im Buch veranschaulichen seine \"Hardcore\"- oder \"extreme\" Arbeitskultur?\n(Hinweis: Fremont-Fabrik \"Produktionshölle\", Twitter-Entlassungen, etc.)\n\nFrage 5:\nWas ist Musks Einstellung zur Künstlichen Intelligenz (KI)? Warum unterstützte er gleichzeitig sowohl OpenAI als auch Neuralink, die widersprüchliche Ziele zu haben scheinen?\n(Hinweis: KI als existenzielle Bedrohung, Gehirn-Maschine-Schnittstellen-Vision, etc.)\n\nBitte beantworten Sie jede Frage. Ich werde Feedback und zusätzliche Details geben oder Missverständnisse korrigieren!\n(Wenn eine Frage unklar ist, fragen Sie mich gerne.)\n\nAnfrage:\nFrage 1: Die missbräuchliche Haltung seines Vaters machte ihn weniger empathisch gegenüber anderen.\nFrage 2: Zip2 löste das Problem elektronischer Karten und Online-Gelbe Seiten. X.com löste Online-Zahlungen.\nFrage 3: Er sicherte sich einen Vertrag mit der NASA. Tesla löste seine Produktionsprobleme durch den Bau einer Fabrik in China.\nFrage 4: Ich weiß es nicht. Bitte antworten Sie für mich.\nFrage 5: Er glaubt, dass Open-Source-Technologie notwendig ist, um sicherzustellen, dass KI sicher und offen ist.\n\nAntwort: Ihre Antworten haben einige wichtige Punkte aus dem Buch erfasst. Lassen Sie mich Details hinzufügen, einige Ungenauigkeiten korrigieren und weiteren Hintergrund für die Fragen 4 und 5 liefern:\n\nFrage 1 — Korrektur und Ergänzung\nEinfluss seines Vaters:\nMusks Vater Errol war tatsächlich extrem hart — zum Beispiel zwang er den jungen Elon, stundenlang zu knien und Reiskörner zu zählen. Das Buch betont, dass diese emotionale Desensibilisierung Musk zu einer extrem zielorientierten Person formte — gleichgültig gegenüber dem Leiden anderer, aber obsessiv seinen Zielen verpflichtet.\n\nUmgekehrte Auswirkung:\nEs ist nicht so, dass Musk unfähig zur Empathie ist — sie ist selektiv. Zum Beispiel kann er Angestellte kalt behandeln, aber persönlich medizinische Geräte für die Gesundheitsprobleme seines Sohnes X studieren.\n\nFrage 2 — Korrektur und Ergänzung\nZip2:\nIm Wesentlichen ein Online-Gelbe-Seiten + Kartennavigationsdienst (half Zeitungen online zu gehen). Musk schrieb viel des frühen Codes selbst und schlief im Büro. Schließlich erwarb Compaq Zip2 für 307 Millionen Dollar (Musk erhielt 22 Millionen Dollar).\n\nX.com:\nNicht nur Online-Zahlungen — es war Musks Versuch, eine umfassende Finanzplattform zu schaffen (mit dem Ziel, Banken zu ersetzen). Später fusionierte es mit Confinity (dem Vorläufer von PayPal). Musk wurde aufgrund interner Konflikte verdrängt, aber als eBay PayPal erwarb, verdiente Musk 180 Millionen Dollar.\n(Hinweis: Musk kaufte später die Domain X.com zurück — es ist jetzt der Name von Twitters Muttergesellschaft).\n\nFrage 3 — Korrektur und Ergänzung\nSpaceX:\n2008 gelang Falcon 1 beim vierten Start (nach drei Fehlschlägen, die ihre Mittel erschöpften). Direkt danach sicherte sich SpaceX einen 1,6-Milliarden-Dollar-NASA-Vertrag für Frachtversorgung — eine Rettungsleine für das Unternehmen.\n\nTesla:\n2008 überlebte Tesla durch eine 50-Millionen-Dollar-Investition von Daimler und ein Darlehen vom US-Energieministerium.\nDer Bau der Gigafactory in China (2019) kam viel später. In den frühen Jahren hing das Überleben vom Start des Model S (2012) und Musks persönlichem Einsatz ab — dem Verkauf all seiner Immobilien zur Finanzierung von Tesla.\n\nFrage 4 — Erklärung: Kontroverse um seinen Führungsstil\n\"Hardcore\"-Kultur:\n\nBei SpaceX/Tesla: Musk verlangte von Angestellten, 80+ Stunden pro Woche zu arbeiten, sogar in der Fabrik zu schlafen. (\"Wenn Sie nicht hier sind, ist Ihr Ersatz da.\")\n\nNach der Twitter-Übernahme: Er entließ sofort 75% der Belegschaft und verlangte von den verbleibenden Angestellten, ein \"Hardcore-Commitment\" zu unterschreiben — Zustimmung zu langen Stunden und hochintensiver Leistung.\n\nExtreme Fälle:\n\nIn der Fremont-Fabrik: Während der \"Produktionshölle\" des Model 3 schlief Musk auf der Produktionslinie und schrie einmal Angestellte an, nannte sie \"schlechter als Hunde\" (er entschuldigte sich später).\n\nEntlassung von Führungskräften: Musk entließ einmal eine Tesla-Führungskraft auf der Stelle, weil die Schriftgröße in ihrer PowerPoint-Präsentation nicht groß genug war.\n\nFrage 5 — Korrektur und Ergänzung\nKI-Bedrohungstheorie:\nMusk glaubt, dass KI die Menschheit zerstören könnte (\"gefährlicher als Atomwaffen\"). Daher:\n\nOpenAI: Er unterstützte es zunächst, um sichere, Open-Source-KI zu fördern (verließ aber später aufgrund von Streitigkeiten über die Kontrolle).\n\nNeuralink: Er zielt darauf ab, Gehirn-Maschine-Schnittstellen zu verwenden, um Menschen mit KI zu verschmelzen — um zu verhindern, dass Menschen zurückgelassen werden.\n\nWidersprüche:\nWährend er vor KI-Risiken warnt, treibt Musk gleichzeitig die Full Self-Driving (FSD) Technologie bei Tesla voran und hat xAI gegründet, um Artificial General Intelligence (AGI) zu erforschen.\n\nGesamtbewertung:\n✅ Sie haben die wichtigsten Ereignisse genau erfasst, aber die Details müssen verstärkt werden (wie Zeitpläne und spezifische Zahlen).\n🔍 Empfohlene Bereiche für weiteres Studium: Musks Multi-Thread-Führungsstil (gleichzeitiges Führen von fünf Unternehmen) und seine Fähigkeit zur Medienmanipulation (Regieren durch Twitter).\n```\n\n## Funktionen\n\n### EPUB-Unterstützung\n- Metadaten extrahieren (Titel, Autor, Veröffentlichungsdatum, etc.)\n- Inhaltsverzeichnis extrahieren\n- Kapitelinhalt extrahieren (mit Markdown-Ausgabe)\n- Stapelverarbeitung von EPUB-Dateien\n\n### PDF-Unterstützung\n- Metadaten extrahieren (Titel, Autor, Erstellungsdatum, etc.)\n- Inhaltsverzeichnis extrahieren\n- Inhalt nach Seitenzahl extrahieren\n- Inhalt nach Kapiteltitel extrahieren\n- Markdown-Ausgabe-Unterstützung\n- Stapelverarbeitung von PDF-Dateien\n\n## Installation\n\n1. Repository klonen:\n```bash\ngit clone https://github.com/yourusername/ebook-mcp.git\ncd ebook-mcp\n```\n\n2. Abhängigkeiten mit `uv` installieren:\n```bash\nuv pip install -e .\n```\n\n## Verwendung\n\n### MCP-Server im Entwicklungsmodus starten\n\nServer im Entwicklungsmodus ausführen:\n```bash\nuv run mcp dev src/ebook_mcp/main.py\n```\n\nSie können http://localhost:5173/ für Test- und Debugging-Zwecke besuchen\nSie können auch den Inspector für den Test installieren.\n```\nnpx @modelcontextprotocol/inspector uv --directory . run src/ebook_mcp/main.py\n```\n\n### MCP-Server im Produktionsmodus starten\n\nServer ausführen:\n```bash\nuv run src/ebook_mcp/main.py\n```\n\n#### MCP in Cursor konfigurieren\n\nFolgende Konfiguration in Cursor hinzufügen\n```bash\n\"ebook-mcp\":{\n            \"command\": \"uv\",\n            \"args\": [\n                \"--directory\",\n                \"/Users/onebird/github/ebook-mcp/src/ebook_mcp/\",\n                \"run\",\n                \"main.py\"\n            ]\n        }\n```\n\n### EPUB-Verarbeitungsbeispiele\n\n```python\n# Alle EPUB-Dateien in einem Verzeichnis abrufen\nepub_files = get_all_epub_files(\"/path/to/books\")\n\n# EPUB-Metadaten abrufen\nmetadata = get_metadata(\"/path/to/book.epub\")\n\n# Inhaltsverzeichnis abrufen\ntoc = get_toc(\"/path/to/book.epub\")\n\n# Spezifischen Kapitelinhalt abrufen (im Markdown-Format)\nchapter_content = get_chapter_markdown(\"/path/to/book.epub\", \"chapter_id\")\n```\n\n### PDF-Verarbeitungsbeispiele\n\n```python\n# Alle PDF-Dateien in einem Verzeichnis abrufen\npdf_files = get_all_pdf_files(\"/path/to/books\")\n\n# PDF-Metadaten abrufen\nmetadata = get_pdf_metadata(\"/path/to/book.pdf\")\n\n# Inhaltsverzeichnis abrufen\ntoc = get_pdf_toc(\"/path/to/book.pdf\")\n\n# Spezifischen Seiteninhalt abrufen\npage_text = get_pdf_page_text(\"/path/to/book.pdf\", 1)\npage_markdown = get_pdf_page_markdown(\"/path/to/book.pdf\", 1)\n\n# Spezifischen Kapitelinhalt abrufen\nchapter_content, page_numbers = get_pdf_chapter_content(\"/path/to/book.pdf\", \"Chapter 1\")\n```\n\n## API-Referenz\n\n### EPUB-APIs\n\n#### `get_all_epub_files(path: str) -> List[str]`\nAlle EPUB-Dateien im angegebenen Verzeichnis abrufen.\n\n#### `get_metadata(epub_path: str) -> Dict[str, Union[str, List[str]]]`\nMetadaten aus einer EPUB-Datei abrufen.\n\n#### `get_toc(epub_path: str) -> List[Tuple[str, str]]`\nInhaltsverzeichnis aus einer EPUB-Datei abrufen.\n\n#### `get_chapter_markdown(epub_path: str, chapter_id: str) -> str`\nKapitelinhalt im Markdown-Format abrufen.\n\n### PDF-APIs\n\n#### `get_all_pdf_files(path: str) -> List[str]`\nAlle PDF-Dateien im angegebenen Verzeichnis abrufen.\n\n#### `get_pdf_metadata(pdf_path: str) -> Dict[str, Union[str, List[str]]]`\nMetadaten aus einer PDF-Datei abrufen.\n\n#### `get_pdf_toc(pdf_path: str) -> List[Tuple[str, int]]`\nInhaltsverzeichnis aus einer PDF-Datei abrufen.\n\n#### `get_pdf_page_text(pdf_path: str, page_number: int) -> str`\nKlartext-Inhalt von einer spezifischen Seite abrufen.\n\n#### `get_pdf_page_markdown(pdf_path: str, page_number: int) -> str`\nMarkdown-formatierten Inhalt von einer spezifischen Seite abrufen.\n\n#### `get_pdf_chapter_content(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]`\nKapitelinhalt und entsprechende Seitenzahlen nach Kapiteltitel abrufen.\n\n## Abhängigkeiten\n\nWichtige Abhängigkeiten umfassen:\n- ebooklib: EPUB-Dateiverarbeitung\n- PyPDF2: Grundlegende PDF-Verarbeitung\n- PyMuPDF: Erweiterte PDF-Verarbeitung\n- beautifulsoup4: HTML-Parsing\n- html2text: HTML zu Markdown Konvertierung\n- pydantic: Datenvalidierung\n- fastmcp: MCP-Server-Framework\n\n## Wichtige Hinweise\n\n1. PDF-Verarbeitung basiert auf dem Inhaltsverzeichnis des Dokuments. Einige Funktionen funktionieren möglicherweise nicht, wenn das Inhaltsverzeichnis nicht verfügbar ist.\n2. Für große PDF-Dateien wird empfohlen, nach Seitenbereichen zu verarbeiten, um das Laden der gesamten Datei auf einmal zu vermeiden.\n3. EPUB-Kapitel-IDs müssen aus der Inhaltsverzeichnisstruktur abgerufen werden.\n\n## Architektur\n\n```\n           ┌────────────────────────────┐\n           │         Agent-Schicht      │\n           │  - Übersetzungsstrategie   │\n           │  - Stil-Konsistenzprüfung  │\n           │  - LLM-Aufruf & Interaktion│\n           └────────────▲─────────────┘\n                        │ Tool-Aufrufe\n           ┌────────────┴─────────────┐\n           │        MCP-Tool-Schicht  │\n           │  - extract_chapter        │\n           │  - write_translated_chapter│\n           │  - generate_epub          │\n           └────────────▲─────────────┘\n                        │ System/IO-Aufrufe\n           ┌────────────┴─────────────┐\n           │     System-Basis-Schicht │\n           │  - Datei-Lesen           │\n           │  - ebooklib-Parsing      │\n           │  - Dateipfad-Speicher/Prüf│\n           └────────────────────────────┘\n```\n\n## Mitwirken\n\nWir begrüßen Issues und Pull Requests!\n\nDetaillierte Informationen zu den letzten Änderungen finden Sie in [CHANGELOG.md](CHANGELOG.md)."
  },
  {
    "path": "README-FR.md",
    "content": "[![MseeP.ai Security Assessment Badge](https://mseep.net/pr/onebirdrocks-ebook-mcp-badge.png)](https://mseep.ai/app/onebirdrocks-ebook-mcp)\n\n# Ebook-MCP\n\n<img src=\"logo.png\" alt=\"ebook-mcp logo\" width=\"132\" height=\"132\">\n\n[English](https://github.com/onebirdrocks/ebook-mcp/blob/main/README.md) | [中文](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-CN.md) | [日本語](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-JP.md) | [한국어](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-KR.md) | [Français](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-FR.md) | [Deutsch](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-DE.md)\n\nEbook-MCP est un serveur Model Context Protocol (MCP) puissant pour le traitement des livres électroniques. Construit sur le [Model Context Protocol](https://github.com/modelcontextprotocol), il fournit un ensemble d'API standardisées pour une intégration transparente entre les applications LLM et les capacités de traitement des livres électroniques. Prend actuellement en charge les formats EPUB et PDF.\n\n## Cas d'usage et valeur\n\nEbook-MCP transforme la façon dont vous interagissez avec vos livres numériques en permettant des conversations en langage naturel avec vos matériaux de lecture. Il s'intègre parfaitement avec les IDE modernes alimentés par l'IA comme Cursor et Claude, vous permettant de :\n\n- **Gestion intelligente de bibliothèque** : Demandez simplement \"Montrez-moi tous les fichiers EPUB dans mon dossier de téléchargements\" ou \"Trouvez des livres sur GenAI dans ma bibliothèque\"\n- **Expérience de lecture interactive** : Ayez des conversations naturelles sur vos livres :\n  - \"Donnez-moi une brève introduction au 'Manuel de l'ingénieur LLM'\"\n  - \"Que couvre le chapitre 3 ?\"\n  - \"Résumez les points clés sur RAG de ce livre\"\n- **Support d'apprentissage actif** : Améliorez votre apprentissage grâce aux interactions alimentées par l'IA :\n  - \"Créez un quiz basé sur les concepts RAG du chapitre 5\"\n  - \"Expliquez les différences entre les architectures discutées dans ce chapitre\"\n  - \"Donnez-moi des exercices pratiques basés sur les concepts de cette section\"\n- **Navigation de contenu** : Naviguez facilement dans vos livres avec des requêtes en langage naturel :\n  - \"Trouvez toutes les sections discutant de l'ingénierie des prompts\"\n  - \"Montrez-moi les chapitres sur le fine-tuning\"\n  - \"Amenez-moi à la partie sur les bases de données vectorielles\"\n\nEn comblant le fossé entre les livres électroniques traditionnels et les capacités de l'IA, Ebook-MCP aide les lecteurs à extraire plus de valeur de leur bibliothèque numérique grâce à des interactions intuitives basées sur la conversation.\n\nDémo rapide - Exécuter avec DeepSeek (Veuillez vérifier tous les exemples avec Claude/OpenAI dans le dossier mcp-client-example)\n```\nRequête : Veuillez vérifier tous les livres epub dans mon dossier de téléchargement. L'ID utilisateur est onebird.\n......\n\nRequête : Pouvez-vous me poser quelques questions pour tester ma compréhension de ce livre ?\n\nRéponse : Bien sûr ! Voici 5 questions sur Elon Musk par Walter Isaacson pour tester votre compréhension du livre. Veuillez répondre en vous basant sur vos connaissances — je donnerai des commentaires et ajouterai des détails après chaque réponse.\n\nQuestion 1 :\nComment l'enfance et le contexte familial de Musk ont-ils façonné sa personnalité et sa carrière plus tard dans la vie ?\n(Indice : Sa relation avec son père, grandir en Afrique du Sud, etc.)\n\nQuestion 2 :\nZip2 et X.com (plus tard PayPal) étaient les premières startups de Musk. Quels problèmes ont-ils résolus ? Comment Musk a-t-il finalement quitté ces entreprises ?\n(Indice : Le modèle commercial de Zip2, la fusion de X.com avec Confinity, etc.)\n\nQuestion 3 :\nEn 2008, SpaceX et Tesla étaient tous deux au bord de la faillite. Comment Musk a-t-il sauvé ces deux entreprises ?\n(Indice : Le quatrième lancement de Falcon 1, le contrat NASA, le sauvetage de dernière minute de Tesla, etc.)\n\nQuestion 4 :\nQuelles controverses entourent le style de gestion de Musk ? Quels incidents spécifiques dans le livre illustrent sa culture de travail \"hardcore\" ou \"extrême\" ?\n(Indice : \"L'enfer de production\" de l'usine Fremont, les licenciements Twitter, etc.)\n\nQuestion 5 :\nQuelle est l'attitude de Musk envers l'Intelligence Artificielle (IA) ? Pourquoi a-t-il simultanément soutenu OpenAI et Neuralink, qui semblent avoir des objectifs contradictoires ?\n(Indice : L'IA comme menace existentielle, vision d'interface cerveau-machine, etc.)\n\nVeuillez répondre à chaque question. Je fournirai des commentaires et des détails supplémentaires ou corrigerai tout malentendu !\n(Si une question n'est pas claire, n'hésitez pas à me demander.)\n\nRequête :\nQuestion 1 : L'attitude abusive de son père l'a rendu moins empathique envers les autres.\nQuestion 2 : Zip2 a résolu le problème des cartes électroniques et des pages jaunes en ligne. X.com a résolu les paiements en ligne.\nQuestion 3 : Il a obtenu un contrat avec la NASA. Tesla a résolu ses problèmes de production en construisant une usine en Chine.\nQuestion 4 : Je ne sais pas. Veuillez répondre pour moi.\nQuestion 5 : Il croit que la technologie open-source est nécessaire pour s'assurer que l'IA soit sûre et ouverte.\n\nRéponse : Vos réponses ont capturé quelques points clés du livre. Laissez-moi ajouter des détails, corriger quelques inexactitudes et fournir plus de contexte pour les questions 4 et 5 :\n\nQuestion 1 — Correction et complément\nInfluence de son père :\nLe père de Musk, Errol, était effectivement extrêmement dur — par exemple, forçant le jeune Elon à s'agenouiller et compter des grains de riz pendant des heures. Le livre souligne que cette désensibilisation émotionnelle a façonné Musk en une personne extrêmement orientée vers les objectifs — indifférente à la souffrance des autres mais obsessionnellement engagée envers ses objectifs.\n\nImpact inverse :\nCe n'est pas que Musk soit incapable d'empathie — elle est sélective. Par exemple, il peut traiter les employés froidement mais étudier personnellement l'équipement médical pour les problèmes de santé de son fils X.\n\nQuestion 2 — Correction et complément\nZip2 :\nEssentiellement un service de pages jaunes en ligne + navigation cartographique (aidant les journaux à aller en ligne). Musk a écrit une grande partie du code initial lui-même et dormait au bureau. Finalement, Compaq a acquis Zip2 pour 307 millions de dollars (Musk a reçu 22 millions de dollars).\n\nX.com :\nPas seulement les paiements en ligne — c'était la tentative de Musk de créer une plateforme financière complète (visant à remplacer les banques). Plus tard fusionné avec Confinity (le précurseur de PayPal). Musk a été évincé en raison de conflits internes, mais quand eBay a acquis PayPal, Musk a gagné 180 millions de dollars.\n(Note : Musk a plus tard racheté le domaine X.com — c'est maintenant le nom de la société mère de Twitter).\n\nQuestion 3 — Correction et complément\nSpaceX :\nEn 2008, Falcon 1 a réussi lors de son quatrième lancement (après trois échecs qui ont épuisé leurs fonds). Juste après cela, SpaceX a obtenu un contrat NASA de 1,6 milliard de dollars pour le réapprovisionnement de fret — une bouée de sauvetage pour l'entreprise.\n\nTesla :\nEn 2008, Tesla a survécu grâce à un investissement de 50 millions de dollars de Daimler et un prêt du Département de l'Énergie américain.\nLa construction de la Gigafactory en Chine (2019) est venue beaucoup plus tard. Dans les premières années, la survie dépendait du lancement de la Model S (2012) et du pari personnel de Musk — vendre tous ses biens immobiliers pour financer Tesla.\n\nQuestion 4 — Explication : Controverse autour de son style de gestion\nCulture \"Hardcore\" :\n\nChez SpaceX/Tesla : Musk exigeait que les employés travaillent 80+ heures par semaine, dormant même à l'usine. (\"Si vous n'êtes pas ici, votre remplaçant l'est.\")\n\nAprès l'acquisition de Twitter : Il a immédiatement licencié 75% du personnel et demandé aux employés restants de signer un \"engagement hardcore\" — acceptant de longues heures et une production de haute intensité.\n\nCas extrêmes :\n\nÀ l'usine Fremont : Pendant \"l'enfer de production\" de la Model 3, Musk dormait sur la ligne de production et a une fois crié sur les employés en les appelant \"pires que des chiens\" (il s'est excusé plus tard).\n\nLicenciement de cadres : Musk a une fois licencié sur-le-champ un cadre supérieur de Tesla parce que la taille de police dans leur présentation PowerPoint n'était pas assez grande.\n\nQuestion 5 — Correction et complément\nThéorie de la menace IA :\nMusk croit que l'IA pourrait détruire l'humanité (\"plus dangereuse que les armes nucléaires\"). Par conséquent :\n\nOpenAI : Il l'a initialement soutenu pour promouvoir une IA sûre et open-source (mais est parti plus tard en raison de disputes sur le contrôle).\n\nNeuralink : Il vise à utiliser les interfaces cerveau-machine pour fusionner les humains avec l'IA — pour empêcher les humains d'être laissés pour compte.\n\nContradictions :\nTout en avertissant des risques de l'IA, Musk pousse simultanément la technologie de conduite entièrement autonome (FSD) chez Tesla et a fondé xAI pour rechercher l'Intelligence Artificielle Générale (AGI).\n\nÉvaluation globale :\n✅ Vous avez capturé les événements clés avec précision, mais les détails doivent être renforcés (comme les chronologies et les chiffres spécifiques).\n🔍 Domaines recommandés pour une étude plus approfondie : Le style de gestion multi-thread de Musk (dirigeant cinq entreprises simultanément) et sa compétence en manipulation médiatique (gouverner par Twitter).\n```\n\n## Fonctionnalités\n\n### Support EPUB\n- Extraire les métadonnées (titre, auteur, date de publication, etc.)\n- Extraire la table des matières\n- Extraire le contenu des chapitres (avec sortie Markdown)\n- Traitement par lots des fichiers EPUB\n\n### Support PDF\n- Extraire les métadonnées (titre, auteur, date de création, etc.)\n- Extraire la table des matières\n- Extraire le contenu par numéro de page\n- Extraire le contenu par titre de chapitre\n- Support de sortie Markdown\n- Traitement par lots des fichiers PDF\n\n## Installation\n\n1. Cloner le dépôt :\n```bash\ngit clone https://github.com/yourusername/ebook-mcp.git\ncd ebook-mcp\n```\n\n2. Installer les dépendances en utilisant `uv` :\n```bash\nuv pip install -e .\n```\n\n## Utilisation\n\n### Démarrer le serveur MCP en mode développement\n\nExécuter le serveur en mode développement :\n```bash\nuv run mcp dev src/ebook_mcp/main.py\n```\n\nVous pouvez visiter http://localhost:5173/ à des fins de test et de débogage\nVous pouvez également installer l'inspecteur pour le test.\n```\nnpx @modelcontextprotocol/inspector uv --directory . run src/ebook_mcp/main.py\n```\n\n### Démarrer le serveur MCP en mode production\n\nExécuter le serveur :\n```bash\nuv run src/ebook_mcp/main.py\n```\n\n#### Configurer le MCP dans Cursor\n\nAjouter la configuration suivante dans Cursor\n```bash\n\"ebook-mcp\":{\n            \"command\": \"uv\",\n            \"args\": [\n                \"--directory\",\n                \"/Users/onebird/github/ebook-mcp/src/ebook_mcp/\",\n                \"run\",\n                \"main.py\"\n            ]\n        }\n```\n\n### Exemples de traitement EPUB\n\n```python\n# Obtenir tous les fichiers EPUB dans un répertoire\nepub_files = get_all_epub_files(\"/path/to/books\")\n\n# Obtenir les métadonnées EPUB\nmetadata = get_metadata(\"/path/to/book.epub\")\n\n# Obtenir la table des matières\ntoc = get_toc(\"/path/to/book.epub\")\n\n# Obtenir le contenu d'un chapitre spécifique (au format Markdown)\nchapter_content = get_chapter_markdown(\"/path/to/book.epub\", \"chapter_id\")\n```\n\n### Exemples de traitement PDF\n\n```python\n# Obtenir tous les fichiers PDF dans un répertoire\npdf_files = get_all_pdf_files(\"/path/to/books\")\n\n# Obtenir les métadonnées PDF\nmetadata = get_pdf_metadata(\"/path/to/book.pdf\")\n\n# Obtenir la table des matières\ntoc = get_pdf_toc(\"/path/to/book.pdf\")\n\n# Obtenir le contenu d'une page spécifique\npage_text = get_pdf_page_text(\"/path/to/book.pdf\", 1)\npage_markdown = get_pdf_page_markdown(\"/path/to/book.pdf\", 1)\n\n# Obtenir le contenu d'un chapitre spécifique\nchapter_content, page_numbers = get_pdf_chapter_content(\"/path/to/book.pdf\", \"Chapter 1\")\n```\n\n## Référence API\n\n### APIs EPUB\n\n#### `get_all_epub_files(path: str) -> List[str]`\nObtenir tous les fichiers EPUB dans le répertoire spécifié.\n\n#### `get_metadata(epub_path: str) -> Dict[str, Union[str, List[str]]]`\nObtenir les métadonnées d'un fichier EPUB.\n\n#### `get_toc(epub_path: str) -> List[Tuple[str, str]]`\nObtenir la table des matières d'un fichier EPUB.\n\n#### `get_chapter_markdown(epub_path: str, chapter_id: str) -> str`\nObtenir le contenu du chapitre au format Markdown.\n\n### APIs PDF\n\n#### `get_all_pdf_files(path: str) -> List[str]`\nObtenir tous les fichiers PDF dans le répertoire spécifié.\n\n#### `get_pdf_metadata(pdf_path: str) -> Dict[str, Union[str, List[str]]]`\nObtenir les métadonnées d'un fichier PDF.\n\n#### `get_pdf_toc(pdf_path: str) -> List[Tuple[str, int]]`\nObtenir la table des matières d'un fichier PDF.\n\n#### `get_pdf_page_text(pdf_path: str, page_number: int) -> str`\nObtenir le contenu en texte brut d'une page spécifique.\n\n#### `get_pdf_page_markdown(pdf_path: str, page_number: int) -> str`\nObtenir le contenu formaté Markdown d'une page spécifique.\n\n#### `get_pdf_chapter_content(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]`\nObtenir le contenu du chapitre et les numéros de page correspondants par titre de chapitre.\n\n## Dépendances\n\nLes dépendances clés incluent :\n- ebooklib : Traitement des fichiers EPUB\n- PyPDF2 : Traitement PDF de base\n- PyMuPDF : Traitement PDF avancé\n- beautifulsoup4 : Analyse HTML\n- html2text : Conversion HTML vers Markdown\n- pydantic : Validation des données\n- fastmcp : Framework de serveur MCP\n\n## Notes importantes\n\n1. Le traitement PDF dépend de la table des matières du document. Certaines fonctionnalités peuvent ne pas fonctionner si la table des matières n'est pas disponible.\n2. Pour les gros fichiers PDF, il est recommandé de traiter par plages de pages pour éviter de charger tout le fichier en une fois.\n3. Les ID de chapitre EPUB doivent être obtenus à partir de la structure de la table des matières.\n\n## Architecture\n\n```\n           ┌────────────────────────────┐\n           │      Couche Agent          │\n           │  - Stratégie de traduction │\n           │  - Vérification cohérence  │\n           │  - Appel LLM & Interaction │\n           └────────────▲─────────────┘\n                        │ Appels d'outils\n           ┌────────────┴─────────────┐\n           │     Couche Outils MCP    │\n           │  - extract_chapter        │\n           │  - write_translated_chapter│\n           │  - generate_epub          │\n           └────────────▲─────────────┘\n                        │ Appels Système/IO\n           ┌────────────┴─────────────┐\n           │   Couche Base Système    │\n           │  - Lecture de fichiers   │\n           │  - Analyse ebooklib      │\n           │  - Stockage/Vérif. chemin│\n           └────────────────────────────┘\n```\n\n## Contribution\n\nNous accueillons les Issues et Pull Requests !\n\nPour des informations détaillées sur les changements récents, veuillez consulter [CHANGELOG.md](CHANGELOG.md)."
  },
  {
    "path": "README-JP.md",
    "content": "[![MseeP.ai Security Assessment Badge](https://mseep.net/pr/onebirdrocks-ebook-mcp-badge.png)](https://mseep.ai/app/onebirdrocks-ebook-mcp)\n\n# Ebook-MCP\n\n<img src=\"logo.png\" alt=\"ebook-mcp logo\" width=\"132\" height=\"132\">\n\n[English](https://github.com/onebirdrocks/ebook-mcp/blob/main/README.md) | [中文](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-CN.md) | [日本語](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-JP.md) | [한국어](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-KR.md) | [Français](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-FR.md) | [Deutsch](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-DE.md)\n\nEbook-MCPは、電子書籍を処理するための強力なModel Context Protocol（MCP）サーバーです。[Model Context Protocol](https://github.com/modelcontextprotocol)上に構築され、LLMアプリケーションと電子書籍処理機能間のシームレスな統合のための標準化されたAPIセットを提供します。現在、EPUBおよびPDF形式をサポートしています。\n\n## 使用例と価値\n\nEbook-MCPは、読書資料との自然言語による会話を可能にすることで、デジタル書籍との相互作用を変革します。CursorやClaudeなどの現代的なAI駆動IDEとシームレスに統合し、以下のことを可能にします：\n\n- **スマートライブラリ管理**：「ダウンロードフォルダ内のすべてのEPUBファイルを表示して」や「ライブラリ内のGenAIに関する本を見つけて」と簡単に尋ねることができます\n- **インタラクティブな読書体験**：本について自然な会話を行う：\n  - 「『LLMエンジニアハンドブック』の簡単な紹介をして」\n  - 「第3章では何が扱われていますか？」\n  - 「この本からRAGに関する要点をまとめて」\n- **アクティブラーニングサポート**：AI駆動のインタラクションを通じて学習を強化：\n  - 「第5章のRAG概念に基づいてクイズを作成して」\n  - 「この章で議論されているアーキテクチャの違いを説明して」\n  - 「このセクションの概念に基づいて実践的な演習を提供して」\n- **コンテンツナビゲーション**：自然言語クエリで本を簡単にナビゲート：\n  - 「プロンプトエンジニアリングについて議論しているすべてのセクションを見つけて」\n  - 「ファインチューニングに関する章を表示して」\n  - 「ベクターデータベースに関する部分に案内して」\n\n従来の電子書籍とAI機能の間のギャップを埋めることで、Ebook-MCPは読者が直感的で会話ベースのインタラクションを通じてデジタルライブラリからより多くの価値を抽出するのを支援します。\n\nクイックデモ - DeepSeekで実行（mcp-client-exampleフォルダ下のClaude/OpenAIのすべての例をご確認ください）\n```\nクエリ：私のダウンロードフォルダ内のすべてのepub本をチェックしてください。ユーザーIDはonebirdです。\n......\n\nクエリ：この本の理解度をテストするためにいくつか質問してもらえますか？\n\n回答：もちろんです！ウォルター・アイザックソンの『イーロン・マスク』についてあなたの理解をテストするための5つの質問があります。あなたの知識に基づいて答えてください — 各回答の後にフィードバックと詳細を追加します。\n\n質問1：\nマスクの幼少期と家族背景は、後の人生での彼の性格とキャリアをどのように形作りましたか？\n（ヒント：父親との関係、南アフリカでの成長など）\n\n質問2：\nZip2とX.com（後のPayPal）はマスクの初期のスタートアップでした。これらはどのような問題を解決しましたか？マスクは最終的にこれらの会社からどのように退出しましたか？\n（ヒント：Zip2のビジネスモデル、X.comとConfinityの合併など）\n\n質問3：\n2008年、SpaceXとTeslaの両方が破産の危機に瀕していました。マスクはこれら2つの会社をどのように救いましたか？\n（ヒント：Falcon 1の4回目の打ち上げ、NASAの契約、Teslaの土壇場での救済など）\n\n質問4：\nマスクの管理スタイルを巡ってどのような論争がありますか？本の中で彼の「ハードコア」または「極端な」労働文化を示す具体的な事件は何ですか？\n（ヒント：フリーモント工場の「生産地獄」、Twitterのレイオフなど）\n\n質問5：\n人工知能（AI）に対するマスクの態度は何ですか？なぜ彼は同時にOpenAIとNeuralinkの両方を支援したのですか？これらは相反する目標を持っているように見えます。\n（ヒント：存在的脅威としてのAI、ブレイン・マシン・インターフェースのビジョンなど）\n\n各質問に答えてください。フィードバックと追加の詳細を提供し、誤解があれば訂正します！\n（質問が不明な場合は、お気軽にお尋ねください。）\n\nクエリ：\n質問1：父親の虐待的な態度により、彼は他人に対して共感が少なくなりました。\n質問2：Zip2は電子地図とオンラインイエローページの問題を解決しました。X.comはオンライン決済を解決しました。\n質問3：彼はNASAとの契約を確保しました。Teslaは中国に工場を建設することで生産問題を解決しました。\n質問4：わかりません。私のために答えてください。\n質問5：彼はAIが安全でオープンであることを確保するためにオープンソース技術が必要だと信じています。\n\n回答：あなたの答えは本からいくつかの重要なポイントを捉えています。質問4と5について詳細を追加し、いくつかの不正確さを訂正し、さらなる背景を提供させてください：\n\n質問1 — 訂正と補足\n父親の影響：\nマスクの父親エロルは確かに極めて厳しく — 例えば、幼いイーロンに何時間も跪いて米粒を数えることを強制しました。本書は、この感情的な鈍感化がマスクを極めて目標志向の人間に形作ったことを強調しています — 他人の苦痛には無関心だが、自分の目標に執着的にコミットしています。\n\n逆の影響：\nマスクが共感能力がないということではありません — それは選択的です。例えば、従業員を冷たく扱うかもしれませんが、息子Xの健康問題のために医療機器を個人的に研究します。\n\n質問2 — 訂正と補足\nZip2：\n本質的にはオンラインイエローページ + 地図ナビゲーションサービス（新聞のオンライン化を支援）。マスクは初期のコードの多くを自分で書き、オフィスで寝泊まりしました。最終的に、CompaqがZip2を3億700万ドルで買収（マスクは2200万ドルを受け取りました）。\n\nX.com：\n単なるオンライン決済ではありません — これは包括的な金融プラットフォームを作成するマスクの試み（銀行の置き換えを目指していました）。後にConfinity（PayPalの前身）と合併。マスクは内部対立により追放されましたが、eBayがPayPalを買収した際、マスクは1億8000万ドルを稼ぎました。\n（注：マスクは後にX.comドメインを買い戻しました — 現在はTwitterの親会社名です）。\n\n質問3 — 訂正と補足\nSpaceX：\n2008年、Falcon 1は4回目の打ち上げで成功しました（3回の失敗で資金が枯渇した後）。その直後、SpaceXは貨物補給のための16億ドルのNASA契約を確保 — 会社の生命線でした。\n\nTesla：\n2008年、TeslaはDaimlerからの5000万ドルの投資と米国エネルギー省からの融資により生き残りました。\n中国でのギガファクトリー建設（2019年）はずっと後のことです。初期の頃、生存はModel Sの発売（2012年）とマスクの個人的な賭け — Teslaに資金提供するためにすべての不動産を売却することに依存していました。\n\n質問4 — 説明：彼の管理スタイルを巡る論争\n「ハードコア」文化：\n\nSpaceX/Teslaで：マスクは従業員に週80時間以上の労働を要求し、工場で寝ることさえありました。（「あなたがここにいなければ、あなたの代替者がいます。」）\n\nTwitter買収後：彼は即座にスタッフの75%を解雇し、残りの従業員に「ハードコアコミットメント」への署名を求めました — 長時間と高強度の出力に同意することです。\n\n極端なケース：\n\nフリーモント工場で：Model 3の「生産地獄」の間、マスクは生産ラインで寝て、従業員を「犬より悪い」と呼んで怒鳴ったことがありました（彼は後に謝罪しました）。\n\n幹部の解雇：マスクはPowerPointプレゼンテーションのフォントサイズが十分大きくないという理由で、Teslaの上級幹部をその場で解雇したことがあります。\n\n質問5 — 訂正と補足\nAI脅威論：\nマスクはAIが人類を破壊する可能性があると信じています（「核兵器より危険」）。したがって：\n\nOpenAI：彼は最初、安全でオープンソースのAIを促進するためにそれを支援しました（しかし後に制御権を巡る争いで離脱）。\n\nNeuralink：彼はブレイン・マシン・インターフェースを使用して人間とAIを融合させることを目指しています — 人間が取り残されることを防ぐためです。\n\n矛盾：\nAIリスクについて警告する一方で、マスクは同時にTeslaで完全自動運転（FSD）技術を推進し、人工汎用知能（AGI）を研究するためにxAIを設立しています。\n\n全体的な評価：\n✅ あなたは重要な出来事を正確に捉えていますが、詳細を強化する必要があります（タイムラインや具体的な数字など）。\n🔍 さらなる学習が推奨される分野：マスクのマルチスレッド管理スタイル（5つの会社を同時に運営）とメディア操作のスキル（Twitterを通じた統治）。\n```\n\n## 機能\n\n### EPUBサポート\n- メタデータの抽出（タイトル、著者、出版日など）\n- 目次の抽出\n- 章の内容の抽出（Markdown出力付き）\n- EPUBファイルのバッチ処理\n\n### PDFサポート\n- メタデータの抽出（タイトル、著者、作成日など）\n- 目次の抽出\n- ページ番号による内容の抽出\n- 章タイトルによる内容の抽出\n- Markdown出力サポート\n- PDFファイルのバッチ処理\n\n## インストール\n\n1. リポジトリをクローン：\n```bash\ngit clone https://github.com/yourusername/ebook-mcp.git\ncd ebook-mcp\n```\n\n2. `uv`を使用して依存関係をインストール：\n```bash\nuv pip install -e .\n```\n\n## 使用方法\n\n### 開発モードでMCPサーバーを起動\n\n開発モードでサーバーを実行：\n```bash\nuv run mcp dev src/ebook_mcp/main.py\n```\n\nテストとデバッグのために http://localhost:5173/ にアクセスできます\nテスト用にインスペクターをインストールすることもできます。\n```\nnpx @modelcontextprotocol/inspector uv --directory . run src/ebook_mcp/main.py\n```\n\n### 本番モードでMCPサーバーを起動\n\nサーバーを実行：\n```bash\nuv run src/ebook_mcp/main.py\n```\n\n#### CursorでMCPを設定\n\nCursorに以下の設定を追加\n```bash\n\"ebook-mcp\":{\n            \"command\": \"uv\",\n            \"args\": [\n                \"--directory\",\n                \"/Users/onebird/github/ebook-mcp/src/ebook_mcp/\",\n                \"run\",\n                \"main.py\"\n            ]\n        }\n```\n\n### EPUB処理の例\n\n```python\n# ディレクトリ内のすべてのEPUBファイルを取得\nepub_files = get_all_epub_files(\"/path/to/books\")\n\n# EPUBメタデータを取得\nmetadata = get_metadata(\"/path/to/book.epub\")\n\n# 目次を取得\ntoc = get_toc(\"/path/to/book.epub\")\n\n# 特定の章の内容を取得（Markdown形式）\nchapter_content = get_chapter_markdown(\"/path/to/book.epub\", \"chapter_id\")\n```\n\n### PDF処理の例\n\n```python\n# ディレクトリ内のすべてのPDFファイルを取得\npdf_files = get_all_pdf_files(\"/path/to/books\")\n\n# PDFメタデータを取得\nmetadata = get_pdf_metadata(\"/path/to/book.pdf\")\n\n# 目次を取得\ntoc = get_pdf_toc(\"/path/to/book.pdf\")\n\n# 特定のページの内容を取得\npage_text = get_pdf_page_text(\"/path/to/book.pdf\", 1)\npage_markdown = get_pdf_page_markdown(\"/path/to/book.pdf\", 1)\n\n# 特定の章の内容を取得\nchapter_content, page_numbers = get_pdf_chapter_content(\"/path/to/book.pdf\", \"Chapter 1\")\n```\n\n## APIリファレンス\n\n### EPUB APIs\n\n#### `get_all_epub_files(path: str) -> List[str]`\n指定されたディレクトリ内のすべてのEPUBファイルを取得。\n\n#### `get_metadata(epub_path: str) -> Dict[str, Union[str, List[str]]]`\nEPUBファイルからメタデータを取得。\n\n#### `get_toc(epub_path: str) -> List[Tuple[str, str]]`\nEPUBファイルから目次を取得。\n\n#### `get_chapter_markdown(epub_path: str, chapter_id: str) -> str`\nMarkdown形式で章の内容を取得。\n\n### PDF APIs\n\n#### `get_all_pdf_files(path: str) -> List[str]`\n指定されたディレクトリ内のすべてのPDFファイルを取得。\n\n#### `get_pdf_metadata(pdf_path: str) -> Dict[str, Union[str, List[str]]]`\nPDFファイルからメタデータを取得。\n\n#### `get_pdf_toc(pdf_path: str) -> List[Tuple[str, int]]`\nPDFファイルから目次を取得。\n\n#### `get_pdf_page_text(pdf_path: str, page_number: int) -> str`\n特定のページからプレーンテキスト内容を取得。\n\n#### `get_pdf_page_markdown(pdf_path: str, page_number: int) -> str`\n特定のページからMarkdown形式の内容を取得。\n\n#### `get_pdf_chapter_content(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]`\n章タイトルによって章の内容と対応するページ番号を取得。\n\n## 依存関係\n\n主要な依存関係には以下が含まれます：\n- ebooklib：EPUBファイル処理\n- PyPDF2：基本的なPDF処理\n- PyMuPDF：高度なPDF処理\n- beautifulsoup4：HTML解析\n- html2text：HTMLからMarkdownへの変換\n- pydantic：データ検証\n- fastmcp：MCPサーバーフレームワーク\n\n## 重要な注意事項\n\n1. PDF処理は文書の目次に依存します。目次が利用できない場合、一部の機能が動作しない可能性があります。\n2. 大きなPDFファイルの場合、ファイル全体を一度に読み込むことを避けるため、ページ範囲で処理することをお勧めします。\n3. EPUB章IDは目次構造から取得する必要があります。\n\n## アーキテクチャ\n\n```\n           ┌────────────────────────────┐\n           │         エージェント層      │\n           │  - 翻訳戦略                │\n           │  - スタイル一貫性チェック    │\n           │  - LLM呼び出しと相互作用    │\n           └────────────▲─────────────┘\n                        │ ツール呼び出し\n           ┌────────────┴─────────────┐\n           │        MCPツール層         │\n           │  - extract_chapter        │\n           │  - write_translated_chapter│\n           │  - generate_epub          │\n           └────────────▲─────────────┘\n                        │ システム/IO呼び出し\n           ┌────────────┴─────────────┐\n           │     システム基盤層         │\n           │  - ファイル読み取り        │\n           │  - ebooklib解析           │\n           │  - ファイルパス保存/チェック│\n           └────────────────────────────┘\n```\n\n## 貢献\n\nIssuesとPull Requestsを歓迎します！\n\n最近の変更の詳細については、[CHANGELOG.md](CHANGELOG.md)をご覧ください。"
  },
  {
    "path": "README-KR.md",
    "content": "[![MseeP.ai Security Assessment Badge](https://mseep.net/pr/onebirdrocks-ebook-mcp-badge.png)](https://mseep.ai/app/onebirdrocks-ebook-mcp)\n\n# Ebook-MCP\n\n<img src=\"logo.png\" alt=\"ebook-mcp logo\" width=\"132\" height=\"132\">\n\n[English](https://github.com/onebirdrocks/ebook-mcp/blob/main/README.md) | [中文](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-CN.md) | [日本語](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-JP.md) | [한국어](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-KR.md) | [Français](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-FR.md) | [Deutsch](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-DE.md)\n\nEbook-MCP는 전자책 처리를 위한 강력한 Model Context Protocol(MCP) 서버입니다. [Model Context Protocol](https://github.com/modelcontextprotocol) 위에 구축되어 LLM 애플리케이션과 전자책 처리 기능 간의 원활한 통합을 위한 표준화된 API 세트를 제공합니다. 현재 EPUB 및 PDF 형식을 지원합니다.\n\n## 사용 사례 및 가치\n\nEbook-MCP는 독서 자료와의 자연어 대화를 가능하게 하여 디지털 도서와의 상호작용을 변화시킵니다. Cursor 및 Claude와 같은 현대적인 AI 기반 IDE와 원활하게 통합되어 다음을 가능하게 합니다:\n\n- **스마트 라이브러리 관리**: \"다운로드 폴더의 모든 EPUB 파일을 보여줘\" 또는 \"내 라이브러리에서 GenAI에 관한 책을 찾아줘\"라고 간단히 물어보세요\n- **대화형 독서 경험**: 책에 대해 자연스러운 대화를 나누세요:\n  - \"'LLM 엔지니어 핸드북'에 대한 간단한 소개를 해줘\"\n  - \"3장에서는 무엇을 다루나요?\"\n  - \"이 책에서 RAG에 대한 핵심 포인트를 요약해줘\"\n- **능동적 학습 지원**: AI 기반 상호작용을 통해 학습을 향상시키세요:\n  - \"5장의 RAG 개념을 바탕으로 퀴즈를 만들어줘\"\n  - \"이 장에서 논의된 아키텍처 간의 차이점을 설명해줘\"\n  - \"이 섹션의 개념을 바탕으로 실습 문제를 제공해줘\"\n- **콘텐츠 내비게이션**: 자연어 쿼리로 책을 쉽게 탐색하세요:\n  - \"프롬프트 엔지니어링을 논의하는 모든 섹션을 찾아줘\"\n  - \"파인튜닝에 관한 장을 보여줘\"\n  - \"벡터 데이터베이스에 관한 부분으로 안내해줘\"\n\n전통적인 전자책과 AI 기능 간의 격차를 해소함으로써, Ebook-MCP는 독자들이 직관적이고 대화 기반의 상호작용을 통해 디지털 라이브러리에서 더 많은 가치를 추출할 수 있도록 도와줍니다.\n\n빠른 데모 - DeepSeek으로 실행 (mcp-client-example 폴더 하위의 Claude/OpenAI 모든 예제를 확인해주세요)\n```\n쿼리: 내 다운로드 폴더의 모든 epub 책을 확인해주세요. 사용자 ID는 onebird입니다.\n......\n\n쿼리: 이 책에 대한 이해도를 테스트하기 위해 몇 가지 질문을 해주실 수 있나요?\n\n답변: 물론입니다! 월터 아이작슨의 『일론 머스크』에 대한 이해도를 테스트하기 위한 5가지 질문이 있습니다. 당신의 지식을 바탕으로 답변해주세요 — 각 답변 후에 피드백과 세부사항을 추가하겠습니다.\n\n질문 1:\n머스크의 어린 시절과 가족 배경이 그의 성격과 후의 경력에 어떤 영향을 미쳤나요?\n(힌트: 아버지와의 관계, 남아프리카에서의 성장 등)\n\n질문 2:\nZip2와 X.com(후의 PayPal)은 머스크의 초기 스타트업이었습니다. 이들은 어떤 문제를 해결했나요? 머스크는 결국 이 회사들에서 어떻게 퇴출했나요?\n(힌트: Zip2의 비즈니스 모델, X.com과 Confinity의 합병 등)\n\n질문 3:\n2008년, SpaceX와 Tesla 모두 파산 위기에 처했습니다. 머스크는 이 두 회사를 어떻게 구했나요?\n(힌트: Falcon 1의 네 번째 발사, NASA 계약, Tesla의 막판 구조 등)\n\n질문 4:\n머스크의 관리 스타일을 둘러싼 논란은 무엇인가요? 책에서 그의 \"하드코어\" 또는 \"극단적인\" 업무 문화를 보여주는 구체적인 사건은 무엇인가요?\n(힌트: 프리몬트 공장의 \"생산 지옥\", 트위터 해고 등)\n\n질문 5:\n인공지능(AI)에 대한 머스크의 태도는 무엇인가요? 왜 그는 상충하는 목표를 가진 것처럼 보이는 OpenAI와 Neuralink를 동시에 지원했나요?\n(힌트: 존재적 위협으로서의 AI, 뇌-기계 인터페이스 비전 등)\n\n각 질문에 답변해주세요. 피드백과 추가 세부사항을 제공하거나 오해가 있다면 수정하겠습니다!\n(질문이 명확하지 않다면 언제든 물어보세요.)\n\n쿼리:\n질문 1: 아버지의 학대적인 태도로 인해 그는 다른 사람들에 대한 공감이 부족해졌습니다.\n질문 2: Zip2는 전자 지도와 온라인 옐로우페이지 문제를 해결했습니다. X.com은 온라인 결제를 해결했습니다.\n질문 3: 그는 NASA와 계약을 확보했습니다. Tesla는 중국에 공장을 건설하여 생산 문제를 해결했습니다.\n질문 4: 모르겠습니다. 저를 위해 답변해주세요.\n질문 5: 그는 AI가 안전하고 개방적이도록 보장하기 위해 오픈소스 기술이 필요하다고 믿습니다.\n\n답변: 당신의 답변은 책의 몇 가지 핵심 포인트를 잘 포착했습니다. 질문 4와 5에 대한 세부사항을 추가하고, 일부 부정확한 부분을 수정하며, 추가 배경을 제공하겠습니다:\n\n질문 1 — 수정 및 보완\n아버지의 영향:\n머스크의 아버지 에롤은 확실히 극도로 가혹했습니다 — 예를 들어, 어린 일론에게 무릎을 꿇고 몇 시간 동안 쌀알을 세도록 강요했습니다. 책은 이러한 감정적 둔감화가 머스크를 극도로 목표 지향적인 사람으로 만들었다고 강조합니다 — 타인의 고통에는 무관심하지만 자신의 목표에는 강박적으로 헌신합니다.\n\n역방향 영향:\n머스크가 공감 능력이 없다는 것이 아니라 — 선택적입니다. 예를 들어, 직원들을 차갑게 대할 수 있지만 아들 X의 건강 문제를 위해 의료 장비를 개인적으로 연구합니다.\n\n질문 2 — 수정 및 보완\nZip2:\n본질적으로 온라인 옐로우페이지 + 지도 내비게이션 서비스(신문의 온라인화 지원). 머스크는 초기 코드의 대부분을 직접 작성했고 사무실에서 잠을 잤습니다. 결국 Compaq이 Zip2를 3억 700만 달러에 인수했습니다(머스크는 2200만 달러를 받았습니다).\n\nX.com:\n단순한 온라인 결제가 아니라 — 포괄적인 금융 플랫폼을 만들려는 머스크의 시도였습니다(은행을 대체하는 것을 목표로). 나중에 Confinity(PayPal의 전신)와 합병했습니다. 머스크는 내부 갈등으로 축출되었지만, eBay가 PayPal을 인수했을 때 머스크는 1억 8천만 달러를 벌었습니다.\n(참고: 머스크는 나중에 X.com 도메인을 다시 구매했습니다 — 현재 트위터의 모회사 이름입니다).\n\n질문 3 — 수정 및 보완\nSpaceX:\n2008년, Falcon 1은 네 번째 발사에서 성공했습니다(세 번의 실패로 자금이 고갈된 후). 바로 그 후, SpaceX는 화물 보급을 위한 16억 달러의 NASA 계약을 확보했습니다 — 회사의 생명줄이었습니다.\n\nTesla:\n2008년, Tesla는 Daimler의 5천만 달러 투자와 미국 에너지부의 대출을 통해 생존했습니다.\n중국에서의 기가팩토리 건설(2019년)은 훨씬 나중의 일입니다. 초기에는 Model S 출시(2012년)와 머스크의 개인적 도박 — Tesla에 자금을 제공하기 위해 모든 부동산을 매각하는 것에 생존이 달려 있었습니다.\n\n질문 4 — 설명: 그의 관리 스타일을 둘러싼 논란\n\"하드코어\" 문화:\n\nSpaceX/Tesla에서: 머스크는 직원들에게 주당 80시간 이상 일하도록 요구했고, 심지어 공장에서 잠을 자기도 했습니다. (\"당신이 여기 없다면, 당신의 대체자가 있습니다.\")\n\n트위터 인수 후: 그는 즉시 직원의 75%를 해고하고 나머지 직원들에게 \"하드코어 약속\"에 서명하도록 요구했습니다 — 장시간과 고강도 업무에 동의하는 것입니다.\n\n극단적인 사례:\n\n프리몬트 공장에서: Model 3의 \"생산 지옥\" 기간 동안, 머스크는 생산 라인에서 잠을 자고 직원들을 \"개보다 못하다\"고 소리치며 욕했습니다(그는 나중에 사과했습니다).\n\n임원 해고: 머스크는 PowerPoint 프레젠테이션의 글꼴 크기가 충분히 크지 않다는 이유로 Tesla 고위 임원을 즉석에서 해고한 적이 있습니다.\n\n질문 5 — 수정 및 보완\nAI 위협론:\n머스크는 AI가 인류를 파괴할 수 있다고 믿습니다(\"핵무기보다 위험\"). 따라서:\n\nOpenAI: 그는 처음에 안전하고 오픈소스인 AI를 촉진하기 위해 지원했습니다(하지만 나중에 통제권 분쟁으로 떠났습니다).\n\nNeuralink: 그는 뇌-기계 인터페이스를 사용하여 인간과 AI를 융합시키는 것을 목표로 합니다 — 인간이 뒤처지는 것을 방지하기 위해서입니다.\n\n모순:\nAI 위험에 대해 경고하면서도, 머스크는 동시에 Tesla에서 완전 자율주행(FSD) 기술을 추진하고 있으며 인공일반지능(AGI)을 연구하기 위해 xAI를 설립했습니다.\n\n전체 평가:\n✅ 당신은 주요 사건들을 정확하게 포착했지만, 세부사항을 강화할 필요가 있습니다(타임라인과 구체적인 수치 등).\n🔍 추가 학습이 권장되는 영역: 머스크의 멀티스레드 관리 스타일(5개 회사를 동시에 운영)과 미디어 조작 기술(트위터를 통한 통치).\n```\n\n## 기능\n\n### EPUB 지원\n- 메타데이터 추출 (제목, 저자, 출판일 등)\n- 목차 추출\n- 장 내용 추출 (Markdown 출력 포함)\n- EPUB 파일 일괄 처리\n\n### PDF 지원\n- 메타데이터 추출 (제목, 저자, 생성일 등)\n- 목차 추출\n- 페이지 번호별 내용 추출\n- 장 제목별 내용 추출\n- Markdown 출력 지원\n- PDF 파일 일괄 처리\n\n## 설치\n\n1. 저장소 클론:\n```bash\ngit clone https://github.com/yourusername/ebook-mcp.git\ncd ebook-mcp\n```\n\n2. `uv`를 사용하여 종속성 설치:\n```bash\nuv pip install -e .\n```\n\n## 사용법\n\n### 개발 모드에서 MCP 서버 시작\n\n개발 모드에서 서버 실행:\n```bash\nuv run mcp dev src/ebook_mcp/main.py\n```\n\n테스트 및 디버깅을 위해 http://localhost:5173/ 을 방문할 수 있습니다\n테스트를 위해 인스펙터를 설치할 수도 있습니다.\n```\nnpx @modelcontextprotocol/inspector uv --directory . run src/ebook_mcp/main.py\n```\n\n### 프로덕션 모드에서 MCP 서버 시작\n\n서버 실행:\n```bash\nuv run src/ebook_mcp/main.py\n```\n\n#### Cursor에서 MCP 구성\n\nCursor에 다음 구성 추가\n```bash\n\"ebook-mcp\":{\n            \"command\": \"uv\",\n            \"args\": [\n                \"--directory\",\n                \"/Users/onebird/github/ebook-mcp/src/ebook_mcp/\",\n                \"run\",\n                \"main.py\"\n            ]\n        }\n```\n\n### EPUB 처리 예제\n\n```python\n# 디렉토리의 모든 EPUB 파일 가져오기\nepub_files = get_all_epub_files(\"/path/to/books\")\n\n# EPUB 메타데이터 가져오기\nmetadata = get_metadata(\"/path/to/book.epub\")\n\n# 목차 가져오기\ntoc = get_toc(\"/path/to/book.epub\")\n\n# 특정 장 내용 가져오기 (Markdown 형식)\nchapter_content = get_chapter_markdown(\"/path/to/book.epub\", \"chapter_id\")\n```\n\n### PDF 처리 예제\n\n```python\n# 디렉토리의 모든 PDF 파일 가져오기\npdf_files = get_all_pdf_files(\"/path/to/books\")\n\n# PDF 메타데이터 가져오기\nmetadata = get_pdf_metadata(\"/path/to/book.pdf\")\n\n# 목차 가져오기\ntoc = get_pdf_toc(\"/path/to/book.pdf\")\n\n# 특정 페이지 내용 가져오기\npage_text = get_pdf_page_text(\"/path/to/book.pdf\", 1)\npage_markdown = get_pdf_page_markdown(\"/path/to/book.pdf\", 1)\n\n# 특정 장 내용 가져오기\nchapter_content, page_numbers = get_pdf_chapter_content(\"/path/to/book.pdf\", \"Chapter 1\")\n```\n\n## API 참조\n\n### EPUB APIs\n\n#### `get_all_epub_files(path: str) -> List[str]`\n지정된 디렉토리의 모든 EPUB 파일을 가져옵니다.\n\n#### `get_metadata(epub_path: str) -> Dict[str, Union[str, List[str]]]`\nEPUB 파일에서 메타데이터를 가져옵니다.\n\n#### `get_toc(epub_path: str) -> List[Tuple[str, str]]`\nEPUB 파일에서 목차를 가져옵니다.\n\n#### `get_chapter_markdown(epub_path: str, chapter_id: str) -> str`\nMarkdown 형식으로 장 내용을 가져옵니다.\n\n### PDF APIs\n\n#### `get_all_pdf_files(path: str) -> List[str]`\n지정된 디렉토리의 모든 PDF 파일을 가져옵니다.\n\n#### `get_pdf_metadata(pdf_path: str) -> Dict[str, Union[str, List[str]]]`\nPDF 파일에서 메타데이터를 가져옵니다.\n\n#### `get_pdf_toc(pdf_path: str) -> List[Tuple[str, int]]`\nPDF 파일에서 목차를 가져옵니다.\n\n#### `get_pdf_page_text(pdf_path: str, page_number: int) -> str`\n특정 페이지에서 일반 텍스트 내용을 가져옵니다.\n\n#### `get_pdf_page_markdown(pdf_path: str, page_number: int) -> str`\n특정 페이지에서 Markdown 형식 내용을 가져옵니다.\n\n#### `get_pdf_chapter_content(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]`\n장 제목으로 장 내용과 해당 페이지 번호를 가져옵니다.\n\n## 종속성\n\n주요 종속성은 다음과 같습니다:\n- ebooklib: EPUB 파일 처리\n- PyPDF2: 기본 PDF 처리\n- PyMuPDF: 고급 PDF 처리\n- beautifulsoup4: HTML 파싱\n- html2text: HTML에서 Markdown으로 변환\n- pydantic: 데이터 검증\n- fastmcp: MCP 서버 프레임워크\n\n## 중요 사항\n\n1. PDF 처리는 문서의 목차에 의존합니다. 목차를 사용할 수 없는 경우 일부 기능이 작동하지 않을 수 있습니다.\n2. 큰 PDF 파일의 경우 전체 파일을 한 번에 로드하는 것을 피하기 위해 페이지 범위별로 처리하는 것이 좋습니다.\n3. EPUB 장 ID는 목차 구조에서 가져와야 합니다.\n\n## 아키텍처\n\n```\n           ┌────────────────────────────┐\n           │         에이전트 레이어     │\n           │  - 번역 전략               │\n           │  - 스타일 일관성 검사       │\n           │  - LLM 호출 및 상호작용     │\n           └────────────▲─────────────┘\n                        │ 도구 호출\n           ┌────────────┴─────────────┐\n           │        MCP 도구 레이어    │\n           │  - extract_chapter        │\n           │  - write_translated_chapter│\n           │  - generate_epub          │\n           └────────────▲─────────────┘\n                        │ 시스템/IO 호출\n           ┌────────────┴─────────────┐\n           │     시스템 기반 레이어    │\n           │  - 파일 읽기              │\n           │  - ebooklib 파싱          │\n           │  - 파일 경로 저장/검사     │\n           └────────────────────────────┘\n```\n\n## 기여\n\nIssues와 Pull Requests를 환영합니다!\n\n최근 변경사항에 대한 자세한 정보는 [CHANGELOG.md](CHANGELOG.md)를 참조하세요."
  },
  {
    "path": "README.md",
    "content": "[![MseeP.ai Security Assessment Badge](https://mseep.net/pr/onebirdrocks-ebook-mcp-badge.png)](https://mseep.ai/app/onebirdrocks-ebook-mcp)\n\n# Ebook-MCP\n\n<img src=\"logo.png\" alt=\"ebook-mcp logo\" width=\"132\" height=\"132\">\n\n\n[English](https://github.com/onebirdrocks/ebook-mcp/blob/main/README.md) | [中文](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-CN.md) | [日本語](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-JP.md) | [한국어](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-KR.md) | [Français](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-FR.md) | [Deutsch](https://github.com/onebirdrocks/ebook-mcp/blob/main/README-DE.md) \n\nEbook-MCP is a powerful Model Context Protocol (MCP) server for processing electronic books. Built on the [Model Context Protocol](https://github.com/modelcontextprotocol), it provides a set of standardized APIs for seamless integration between LLM applications and e-book processing capabilities. Currently supports EPUB and PDF formats.\n\n\n## Use Cases & Value\n\nEbook-MCP transforms how you interact with your digital books by enabling natural language conversations with your reading materials. It seamlessly integrates with modern AI-powered IDEs like Cursor and Claude, allowing you to:\n\n- **Smart Library Management**: Simply ask \"Show me all EPUB files in my downloads folder\" or \"Find books about GenAI in my library\"\n- **Interactive Reading Experience**: Have natural conversations about your books:\n  - \"Give me a brief introduction to 'LLM Engineer Handbook'\"\n  - \"What's covered in Chapter 3?\"\n  - \"Summarize the key points about RAG from this book\"\n- **Active Learning Support**: Enhance your learning through AI-powered interactions:\n  - \"Create a quiz based on the RAG concepts from Chapter 5\"\n  - \"Explain the differences between the architectures discussed in this chapter\"\n  - \"Give me practical exercises based on the concepts in this section\"\n- **Content Navigation**: Easily navigate through your books with natural language queries:\n  - \"Find all sections discussing prompt engineering\"\n  - \"Show me the chapters about fine-tuning\"\n  - \"Take me to the part about vector databases\"\n\nBy bridging the gap between traditional e-books and AI capabilities, Ebook-MCP helps readers extract more value from their digital library through intuitive, conversation-based interactions.\n\nQuick demo - Run with DeepSeek (Please check all the examples with Claude/OpenAI under mcp-client-example folder) \n```\nQuery:Please check all the epub books in my download folder. user id is onebird.\n......\n\nQuery: Can you ask me a few questions to test my understanding of this book?\n\n\nAnswer: Sure! Here are 5 questions about Elon Musk by Walter Isaacson to test your understanding of the book. Please answer based on your knowledge — I’ll give feedback and add details after each response.\n\nQuestion 1:\nHow did Musk’s childhood and family background shape his personality and career later in life?\n(Hint: His relationship with his father, growing up in South Africa, etc.)\n\nQuestion 2:\nZip2 and X.com (later PayPal) were Musk’s early startups. What problems did they solve? How did Musk eventually exit from these companies?\n(Hint: Zip2’s business model, X.com’s merger with Confinity, etc.)\n\nQuestion 3:\nIn 2008, both SpaceX and Tesla were on the verge of bankruptcy. How did Musk save these two companies?\n(Hint: Falcon 1’s fourth launch, NASA contract, Tesla’s last-minute rescue, etc.)\n\nQuestion 4:\nWhat controversies surround Musk’s management style? What specific incidents in the book illustrate his “hardcore” or “extreme” work culture?\n(Hint: Fremont factory’s “production hell,” Twitter layoffs, etc.)\n\nQuestion 5:\nWhat is Musk’s attitude toward Artificial Intelligence (AI)? Why did he simultaneously support both OpenAI and Neuralink, which seem to have conflicting goals?\n(Hint: AI as an existential threat, brain-machine interface vision, etc.)\n\nPlease answer each question. I’ll provide feedback and additional details or correct any misunderstandings!\n(If a question isn’t clear, feel free to ask me.)\n\nQuery:\nQuestion 1: His father’s abusive attitude made him less empathetic towards others.\nQuestion 2: Zip2 solved the problem of electronic maps and online yellow pages. X.com solved online payments.\nQuestion 3: He secured a contract with NASA. Tesla solved its production problems by building a factory in China.\nQuestion 4: I don’t know. Please answer for me.\nQuestion 5: He believes open-source technology is necessary to ensure AI is safe and open.\n\nAnswer: Your answers have captured some key points from the book. Let me add details, correct some inaccuracies, and provide further background for Questions 4 and 5:\n\nQuestion 1 — Correction & Supplement\nInfluence of His Father:\nMusk's father, Errol, was indeed extremely harsh — for example, forcing young Elon to kneel and count grains of rice for hours. The book emphasizes that this emotional desensitization shaped Musk into an extremely goal-oriented person — indifferent to others’ suffering but obsessively committed to his goals.\n\nReverse Impact:\nIt’s not that Musk is incapable of empathy — it’s selective. For instance, he may treat employees coldly but personally study medical equipment for his son X’s health issues.\n\nQuestion 2 — Correction & Supplement\nZip2:\nEssentially an online Yellow Pages + map navigation service (helping newspapers go online). Musk wrote much of the early code himself and slept in the office. Eventually, Compaq acquired Zip2 for $307 million (Musk received $22 million).\n\nX.com:\nNot just online payments — it was Musk’s attempt to create a comprehensive financial platform (aiming to replace banks). Later merged with Confinity (the precursor of PayPal). Musk was ousted due to internal conflicts, but when eBay acquired PayPal, Musk earned $180 million.\n(Note: Musk later bought back the domain X.com — it’s now Twitter’s parent company name).\n\nQuestion 3 — Correction & Supplement\nSpaceX:\nIn 2008, Falcon 1 succeeded on its fourth launch (after three failures drained their funds). Right after that, SpaceX secured a $1.6 billion NASA contract for cargo resupply — a lifeline for the company.\n\nTesla:\nIn 2008, Tesla survived through a $50 million investment from Daimler and a loan from the U.S. Department of Energy.\nBuilding the Gigafactory in China (2019) came much later. In the early years, survival depended on the launch of Model S (2012) and Musk’s personal gamble — selling all his real estate to fund Tesla.\n\nQuestion 4 — Explanation: Controversy Around His Management Style\n“Hardcore” Culture:\n\nAt SpaceX/Tesla: Musk demanded employees work 80+ hours per week, even sleeping at the factory. (\"If you’re not here, your replacement is.\")\n\nAfter acquiring Twitter: He immediately laid off 75% of staff and asked the remaining employees to sign a \"hardcore commitment\" — agreeing to long hours and high-intensity output.\n\nExtreme Cases:\n\nAt the Fremont Factory: During the \"production hell\" of the Model 3, Musk slept on the production line and once yelled at employees calling them \"worse than dogs\" (he later apologized).\n\nFiring Executives: Musk once fired a senior Tesla executive on the spot because the font size in their PowerPoint presentation wasn’t big enough.\n\nQuestion 5 — Correction & Supplement\nAI Threat Theory:\nMusk believes that AI could destroy humanity (\"more dangerous than nuclear weapons\"). Therefore:\n\nOpenAI: He initially supported it to promote safe, open-source AI (but left later due to disputes over control).\n\nNeuralink: He aims to use brain-machine interfaces to merge humans with AI — to prevent humans from being left behind.\n\nContradictions:\nWhile warning about AI risks, Musk is simultaneously pushing for Full Self-Driving (FSD) technology at Tesla and has founded xAI to research Artificial General Intelligence (AGI).\n\nOverall Evaluation:\n✅ You have captured the key events accurately, but the details need to be strengthened (such as timelines and specific figures).\n🔍 Recommended areas for further study: Musk's multi-threaded management style (running five companies simultaneously) and his skill in media manipulation (governing through Twitter).\n```\n\n## Features\n\n### EPUB Support\n- Extract metadata (title, author, publication date, etc.)\n- Extract table of contents\n- Extract chapter content (with Markdown output)\n- Batch process EPUB files\n\n### PDF Support\n- Extract metadata (title, author, creation date, etc.)\n- Extract table of contents\n- Extract content by page number\n- Extract content by chapter title\n- Markdown output support\n- Batch process PDF files\n\n## Installation\n\n1. Clone the repository:\n```bash\ngit clone https://github.com/yourusername/ebook-mcp.git\ncd ebook-mcp\n```\n\n2. Install dependencies using `uv`:\n```bash\nuv pip install -e .\n```\n\n## Usage\n\n### Starting the MCP Server in development mode\n\nRun the server in development mode:\n```bash\nuv run mcp dev src/ebook_mcp/main.py\n```\n\n\nYou can visit http://localhost:5173/ for testing & debugging purpose \nYou can also install the inspector for the test.\n```\nnpx @modelcontextprotocol/inspector uv --directory . run src/ebook_mcp/main.py\n```\n\n### Starting the MCP Server in Prod mode\n\nRun the server:\n```bash\nuv run src/ebook_mcp/main.py\n```\n\n\n#### Configure the MCP in Cursor\n\nAdd the following configuration in Cursor\n```bash\n\"ebook-mcp\":{\n            \"command\": \"uv\",\n            \"args\": [\n                \"--directory\",\n                \"/Users/onebird/github/ebook-mcp/src/ebook_mcp/\",\n                \"run\",\n                \"main.py\"\n            ]\n        }\n```\n\n\n\n\n### EPUB Processing Examples\n\n```python\n# Get all EPUB files in a directory\nepub_files = get_all_epub_files(\"/path/to/books\")\n\n# Get EPUB metadata\nmetadata = get_metadata(\"/path/to/book.epub\")\n\n# Get table of contents\ntoc = get_toc(\"/path/to/book.epub\")\n\n# Get specific chapter content (in Markdown format)\nchapter_content = get_chapter_markdown(\"/path/to/book.epub\", \"chapter_id\")\n```\n\n### PDF Processing Examples\n\n```python\n# Get all PDF files in a directory\npdf_files = get_all_pdf_files(\"/path/to/books\")\n\n# Get PDF metadata\nmetadata = get_pdf_metadata(\"/path/to/book.pdf\")\n\n# Get table of contents\ntoc = get_pdf_toc(\"/path/to/book.pdf\")\n\n# Get specific page content\npage_text = get_pdf_page_text(\"/path/to/book.pdf\", 1)\npage_markdown = get_pdf_page_markdown(\"/path/to/book.pdf\", 1)\n\n# Get specific chapter content\nchapter_content, page_numbers = get_pdf_chapter_content(\"/path/to/book.pdf\", \"Chapter 1\")\n```\n\n## API Reference\n\n### EPUB APIs\n\n#### `get_all_epub_files(path: str) -> List[str]`\nGet all EPUB files in the specified directory.\n\n#### `get_metadata(epub_path: str) -> Dict[str, Union[str, List[str]]]`\nGet metadata from an EPUB file.\n\n#### `get_toc(epub_path: str) -> List[Tuple[str, str]]`\nGet table of contents from an EPUB file.\n\n#### `get_chapter_markdown(epub_path: str, chapter_id: str) -> str`\nGet chapter content in Markdown format.\n\n### PDF APIs\n\n#### `get_all_pdf_files(path: str) -> List[str]`\nGet all PDF files in the specified directory.\n\n#### `get_pdf_metadata(pdf_path: str) -> Dict[str, Union[str, List[str]]]`\nGet metadata from a PDF file.\n\n#### `get_pdf_toc(pdf_path: str) -> List[Tuple[str, int]]`\nGet table of contents from a PDF file.\n\n#### `get_pdf_page_text(pdf_path: str, page_number: int) -> str`\nGet plain text content from a specific page.\n\n#### `get_pdf_page_markdown(pdf_path: str, page_number: int) -> str`\nGet Markdown formatted content from a specific page.\n\n#### `get_pdf_chapter_content(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]`\nGet chapter content and corresponding page numbers by chapter title.\n\n## Dependencies\n\nKey dependencies include:\n- ebooklib: EPUB file processing\n- PyPDF2: Basic PDF processing\n- PyMuPDF: Advanced PDF processing\n- beautifulsoup4: HTML parsing\n- html2text: HTML to Markdown conversion\n- pydantic: Data validation\n- fastmcp: MCP server framework\n\n## Important Notes\n\n1. PDF processing relies on the document's table of contents. Some features may not work if TOC is not available.\n2. For large PDF files, it's recommended to process by page ranges to avoid loading the entire file at once.\n3. EPUB chapter IDs must be obtained from the table of contents structure.\n\n## Architecture\n\n```\n           ┌────────────────────────────┐\n           │         Agent Layer        │\n           │  - Translation Strategy    │\n           │  - Style Consistency Check │\n           │  - LLM Call & Interaction │\n           └────────────▲─────────────┘\n                        │ Tool Calls\n           ┌────────────┴─────────────┐\n           │        MCP Tool Layer     │\n           │  - extract_chapter        │\n           │  - write_translated_chapter│\n           │  - generate_epub          │\n           └────────────▲─────────────┘\n                        │ System/IO Calls\n           ┌────────────┴─────────────┐\n           │     System Base Layer     │\n           │  - File Reading          │\n           │  - ebooklib Parsing      │\n           │  - File Path Storage/Check│\n           └────────────────────────────┘\n```\n\n\n\n## Contributing\n\nWe welcome Issues and Pull Requests!\n\nFor detailed information about recent changes, please see [CHANGELOG.md](CHANGELOG.md).\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"ebook-mcp\"\nversion = \"0.1.8\"\ndescription = \"An MCP server for chatting with ebooks (PDF/EPUB).\"\nauthors = [{ name = \"onebird\", email = \"rymrocks@163.com\" }]\nlicense = \"Apache-2.0\"\nreadme = \"README.md\"\nrequires-python = \">=3.10\"\n\ndependencies = [\n    \"ebooklib>=0.19\",\n    \"PyMuPDF>=1.26.3\",\n    \"beautifulsoup4>=4.13.4\",\n    \"html2text>=2025.4.15\",\n    \"pydantic>=2.11.7\",\n    \"fastmcp>=2.11.1\",\n    \"typer>=0.16.0\"\n]\n\n[project.optional-dependencies]\ndev = [\n    \"pytest>=8.4.1\",\n    \"uvicorn>=0.35.0,<1.0.0\",\n]\n\n\n\n[project.scripts]\nebook-mcp = \"ebook_mcp.main:cli_entry\"\n\n[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n\n[tool.hatch.build.targets.wheel]\npackages = [\"src/ebook_mcp\"]"
  },
  {
    "path": "pytest.ini",
    "content": "[tool:pytest]\ntestpaths = src/ebook_mcp/tests\npython_files = test_*.py\npython_classes = Test*\npython_functions = test_*\naddopts = \n    -v\n    --tb=short\n    --strict-markers\n    --disable-warnings\n    --import-mode=importlib\nmarkers =\n    slow: marks tests as slow (deselect with '-m \"not slow\"')\n    integration: marks tests as integration tests\n    unit: marks tests as unit tests "
  },
  {
    "path": "run_tests.sh",
    "content": "#!/bin/bash\n\n# Ebook-MCP 测试运行脚本\n\necho \"==========================================\"\necho \"Ebook-MCP 单元测试运行器\"\necho \"==========================================\"\n\n# 检查是否安装了 pytest\nif ! command -v pytest &> /dev/null; then\n    echo \"❌ pytest 未安装，请先安装: pip install pytest\"\n    exit 1\nfi\n\n# 设置测试目录\nTEST_DIR=\"src/ebook_mcp/tests\"\n\necho \"📁 测试目录: $TEST_DIR\"\necho \"\"\n\n# 运行基本测试（推荐）\necho \"🧪 运行基本测试（不需要外部依赖）...\"\npython -m pytest $TEST_DIR/test_basic.py -v\nBASIC_RESULT=$?\n\necho \"\"\necho \"🧪 运行 EPUB 章节提取修复版本测试...\"\npython -m pytest $TEST_DIR/test_epub_chapter_extraction.py -v\nFIXED_RESULT=$?\n\necho \"\"\necho \"🧪 运行 AZW 测试（如果模块可用）...\"\npython -m pytest $TEST_DIR/test_azw.py -v\nAZW_RESULT=$?\n\necho \"\"\necho \"==========================================\"\necho \"测试结果总结:\"\necho \"==========================================\"\n\nif [ $BASIC_RESULT -eq 0 ]; then\n    echo \"✅ 基本测试: 通过\"\nelse\n    echo \"❌ 基本测试: 失败\"\nfi\n\nif [ $FIXED_RESULT -eq 0 ]; then\n    echo \"✅ EPUB 章节提取修复测试: 通过\"\nelse\n    echo \"❌ EPUB 章节提取修复测试: 失败\"\nfi\n\nif [ $AZW_RESULT -eq 0 ]; then\n    echo \"✅ AZW 测试: 通过或跳过\"\nelse\n    echo \"❌ AZW 测试: 失败\"\nfi\n\necho \"\"\necho \"💡 提示:\"\necho \"   - 要运行完整测试，请先安装项目依赖: pip install -e .\"\necho \"   - 查看详细测试指南: cat HOW-TO-TEST.md\"\necho \"\"\n\n# 返回总体结果\nif [ $BASIC_RESULT -eq 0 ] && [ $FIXED_RESULT -eq 0 ] && [ $AZW_RESULT -eq 0 ]; then\n    echo \"🎉 所有可用测试通过！\"\n    exit 0\nelse\n    echo \"⚠️  部分测试失败，请检查上述输出\"\n    exit 1\nfi "
  },
  {
    "path": "src/ebook_mcp/__init__.py",
    "content": ""
  },
  {
    "path": "src/ebook_mcp/main.py",
    "content": "import os\nfrom typing import Any,List,Dict,Union,Tuple, Callable, TypeVar\nfrom functools import wraps\nfrom mcp.server.fastmcp import FastMCP\nfrom ebooklib import epub\nfrom pydantic import BaseModel\nfrom bs4 import BeautifulSoup\nfrom ebook_mcp.tools import epub_helper, pdf_helper\nimport logging\nfrom datetime import datetime\nfrom ebook_mcp.tools.logger_config import setup_logger  # Import logger config\n\n# Type variable for generic function return type\nT = TypeVar('T')\n\ndef handle_mcp_errors(func: Callable[..., T]) -> Callable[..., T]:\n    \"\"\"\n    Decorator to handle common MCP tool errors uniformly.\n    \n    This decorator catches FileNotFoundError and other exceptions,\n    re-raises them with consistent error messages.\n    \"\"\"\n    @wraps(func)\n    def wrapper(*args, **kwargs) -> T:\n        try:\n            return func(*args, **kwargs)\n        except FileNotFoundError as e:\n            raise FileNotFoundError(str(e))\n        except (epub_helper.EpubProcessingError, pdf_helper.PdfProcessingError) as e:\n            # Re-raise custom exceptions as-is to preserve detailed error information\n            raise e\n        except Exception as e:\n            raise Exception(str(e))\n    return wrapper\n\ndef handle_pdf_errors(func: Callable[..., T]) -> Callable[..., T]:\n    \"\"\"\n    Decorator to handle PDF-specific errors.\n    \n    Some PDF functions don't need FileNotFoundError handling\n    as they handle it internally.\n    \"\"\"\n    @wraps(func)\n    def wrapper(*args, **kwargs) -> T:\n        try:\n            return func(*args, **kwargs)\n        except Exception as e:\n            raise Exception(str(e))\n    return wrapper\n\n\nlog_dir = \"logs\"\nif not os.path.exists(log_dir):\n    os.makedirs(log_dir)\n\nlog_file = os.path.join(log_dir, f\"ebook-mcp_server_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log\")\nlogging.basicConfig(\n    level=logging.DEBUG,\n    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',\n    handlers=[\n        logging.FileHandler(log_file),\n        logging.StreamHandler()\n    ]\n)\nlogger = logging.getLogger(__name__)\n\n\n\n\n# Initialize FastMCP server\nmcp = FastMCP(\"ebook-MCP\")\n\n# EPUB related tools\n@mcp.tool()\n@handle_mcp_errors\ndef get_all_epub_files(path: str) -> List[str]:\n    \"\"\"Get all epub files in a given path.\n    \"\"\"\n    return epub_helper.get_all_epub_files(path)\n\n@mcp.tool()\n@handle_mcp_errors\ndef get_epub_metadata(epub_path:str) -> Dict[str, Union[str, List[str]]]:\n    \"\"\"Get metadata of a given ebook.\n\n    Args:\n        epub_path: Full path to the ebook file.eg. \"/Users/macbook/Downloads/test.epub\"\n    \n    Returns:\n        List[Tuple[str, str]]: Return a list，Each element is a Tuple，contains key and value of metadata\n\n    Raises:\n        FileNotFoundError: Raises when the epub file not found\n        Exception: Raisers when running into parsing error of epub file\n    \"\"\"\n    logger.debug(f\"Getting ebook metadata: {epub_path}\")\n    return epub_helper.get_meta(epub_path)\n\n\n@mcp.tool()\n@handle_mcp_errors\ndef get_epub_toc(epub_path: str) -> List[Tuple[str, str]]:\n    \"\"\"Get table of contents of a given EPUB file.\n\n    Args:\n        epub_path: Full path to the ebook file.eg. \"/Users/macbook/Downloads/test.epub\"\n    \n    Returns:\n        List[Tuple[str, str]]: List of TOC entries, each entry is a tuple of (title, href)\n\n    Raises:\n        FileNotFoundError: Raises when the EPUB file not found\n        Exception: Raisers when running into parsing error of EPUB file\n    \"\"\"\n    logger.debug(f\"calling get_epub_toc: {epub_path}\")\n    return epub_helper.get_toc(epub_path)\n\n@mcp.tool()\n@handle_mcp_errors\ndef get_epub_chapter_markdown(epub_path:str, chapter_id: str) -> str:\n    \"\"\"Get content of a given chapter using the improved extraction method.\n    \n    ✅ RECOMMENDED: This tool fixes the truncation issue in the original version when processing subchapters.\n    It can correctly handle EPUB files with subchapters and provide complete chapter content.\n    \n    This function uses extract_chapter_html which properly handles subchapters\n    and provides accurate chapter boundaries, fixing the issue where subchapters\n    in the TOC cause premature truncation of chapter content.\n\n    Args:\n        epub_path: Full path to the ebook file. eg. \"/Users/macbook/Downloads/test.epub\"\n        chapter_id: Chapter id of the chapter to get content (e.g., \"chapter1.xhtml#section1_3\")\n    \n    Returns:\n        str: Chapter content in markdown format\n    \"\"\"\n    logger.debug(f\"calling get_epub_chapter_markdown: {epub_path}, chapter ID: {chapter_id}\")\n    book = epub_helper.read_epub(epub_path)\n    \n    # Use the improved version\n    return epub_helper.extract_chapter_markdown(book, chapter_id)\n\n# PDF related tools\n@mcp.tool()\n@handle_mcp_errors\ndef get_all_pdf_files(path: str) -> List[str]:\n    \"\"\"Get all PDF files in a given path.\n    \"\"\"\n    return pdf_helper.get_all_pdf_files(path)\n\n@mcp.tool()\n@handle_mcp_errors\ndef get_pdf_metadata(pdf_path: str) -> Dict[str, Union[str, List[str]]]:\n    \"\"\"Get metadata of a given PDF file.\n\n    Args:\n        pdf_path: Full path to the PDF file.eg. \"/Users/macbook/Downloads/test.pdf\"\n    \n    Returns:\n        Dict[str, Union[str, List[str]]]: Dictionary containing metadata\n\n    Raises:\n        FileNotFoundError: Raises when the PDF file not found\n        Exception: Raisers when running into parsing error of PDF file\n    \"\"\"\n    logger.debug(f\"calling get_pdf_metadata: {pdf_path}\")\n    return pdf_helper.get_meta(pdf_path)\n\n@mcp.tool()\n@handle_mcp_errors\ndef get_pdf_toc(pdf_path: str) -> List[Tuple[str, int]]:\n    \"\"\"Get table of contents of a given PDF file.\n\n    Args:\n        pdf_path: Full path to the PDF file.eg. \"/Users/macbook/Downloads/test.pdf\"\n    \n    Returns:\n        List[Tuple[str, int]]: List of TOC entries, each entry is a tuple of (title, page_number)\n\n    Raises:\n        FileNotFoundError: Raises when the PDF file not found\n        Exception: Raisers when running into parsing error of PDF file\n    \"\"\"\n    logger.debug(f\"calling get_pdf_toc: {pdf_path}\")\n    return pdf_helper.get_toc(pdf_path)\n\n@mcp.tool()\n@handle_pdf_errors\ndef get_pdf_page_text(pdf_path: str, page_number: int) -> str:\n    \"\"\"Get text content of a specific page in PDF file.\n\n    Args:\n        pdf_path: Full path to the PDF file.eg. \"/Users/macbook/Downloads/test.pdf\"\n        page_number: Page number to extract (1-based index)\n    \n    Returns:\n        str: Extracted text content\n    \"\"\"\n    logger.debug(f\"calling get_pdf_page_text: {pdf_path}, page: {page_number}\")\n    return pdf_helper.extract_page_text(pdf_path, page_number)\n\n@mcp.tool()\n@handle_pdf_errors\ndef get_pdf_page_markdown(pdf_path: str, page_number: int) -> str:\n    \"\"\"Get markdown formatted content of a specific page in PDF file.\n\n    Args:\n        pdf_path: Full path to the PDF file.eg. \"/Users/macbook/Downloads/test.pdf\"\n        page_number: Page number to extract (1-based index)\n    \n    Returns:\n        str: Markdown formatted text\n    \"\"\"\n    logger.debug(f\"calling get_pdf_page_markdown: {pdf_path}, page: {page_number}\")\n    return pdf_helper.extract_page_markdown(pdf_path, page_number)\n\n@mcp.tool()\n@handle_pdf_errors\ndef get_pdf_chapter_content(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]:\n    \"\"\"Get content of a specific chapter in PDF file by its title.\n\n    Args:\n        pdf_path: Full path to the PDF file.eg. \"/Users/macbook/Downloads/test.pdf\"\n        chapter_title: Title of the chapter to extract\n    \n    Returns:\n        Tuple[str, List[int]]: Tuple containing (chapter_content, page_numbers)\n    \"\"\"\n    logger.debug(f\"calling get_pdf_chapter_content: {pdf_path}, chapter: {chapter_title}\")\n    return pdf_helper.extract_chapter_by_title(pdf_path, chapter_title)\n\nif __name__ == \"__main__\":\n    # Initialize and run the server\n    logger.info(\"Server is starting.....\")\n    mcp.run(transport='stdio')\n\n# as the cli entry after the \"pip install ebook-mcp\"\ndef cli_entry():\n    import logging\n    logging.info(\"Starting ebook-mcp server\")\n    from mcp.server.fastmcp import FastMCP\n    mcp = FastMCP(\"ebook-mcp\")\n    mcp.run(transport='stdio')\n"
  },
  {
    "path": "src/ebook_mcp/mcp_client_example/.gitignore",
    "content": ".env\n\n# Logs\nlogs/\n*.log\n\n# Python\n__pycache__/\n*.py[cod]\n*$py.class\n*.so\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# Virtual Environment\nvenv/\nenv/\nENV/\n.venv/\n.env/\n\n# IDE\n.idea/\n.vscode/\n*.swp\n*.swo\n.DS_Store\n\n# Project specific\n*.bak\n*.tmp"
  },
  {
    "path": "src/ebook_mcp/mcp_client_example/README-CN.md",
    "content": "[English](README.md) | 中文\n\n本项目提供了多个 MCP（模型控制协议）客户端示例，用于与不同的 AI 模型进行交互，包括 OpenAI GPT-4、Anthropic Claude 和 Deepseek AI。\n\n## 安装\n\n安装依赖：\n```bash\npip install -e .\n```\n\n## 配置\n\n在项目根目录创建 `.env` 文件，并根据你想要使用的模型添加相应的 API 密钥：\n\n```env\n# OpenAI API 密钥 (用于 GPT-4)\nOPENAI_API_KEY=your_openai_api_key\n\n# Anthropic API 密钥 (用于 Claude)\nANTHROPIC_API_KEY=your_anthropic_api_key\n\n# Deepseek API 密钥\nDEEPSEEK_API_KEY=your_deepseek_api_key\n```\n\n## 使用方法\n\n本项目提供了三个客户端示例，每个对应不同的 AI 模型：\n\n### OpenAI GPT-4 客户端\n```bash\npython openai_example.py ../main.py\n```\n\n### Anthropic Claude 客户端\n```bash\npython anthropic_example.py ../main.py\n```\n\n### Deepseek AI 客户端\n```bash\npython deepseek_example.py ../main.py\n```\n\n## 对话示例\n\n以下是使用 MCP 客户端（DeepSeek）与电子书交互的示例：\n\n```\n## 查询：\n你能考察我几个问题，看我对这本书的掌握程度吗？\n\n2025-04-12 15:29:12,313 - mcp.server.lowlevel.server - INFO - 正在处理请求类型 ListToolsRequest\n\n好的！以下是关于《埃隆·马斯克传》（Walter Isaacson 著）的5个问题，测试你对这本书的掌握程度。请根据你的理解回答，我会根据你的答案进行反馈和补充。\n\n---\n\n### 问题1：\n马斯克的童年和家庭背景，如何影响了他后来的性格和事业？\n（提示：与父亲的关系、南非的成长经历等）\n\n---\n\n### 问题2：\nZip2 和 X.com（后来的 PayPal）是马斯克早期的创业项目。它们分别解决了什么问题？马斯克最终是如何退出这些公司的？\n（提示：Zip2的商业模式，X.com与Confinity的合并等）\n\n---\n\n### 问题3：\n2008年，SpaceX 和特斯拉同时濒临破产。马斯克是如何拯救这两家公司的？\n（提示：猎鹰1号的第四次发射，NASA合同，特斯拉的“最后一搏”等）\n\n---\n\n### 问题4：\n马斯克的管理风格有哪些争议？书中有哪些具体事件体现了他的“硬核”或“极端”工作文化？\n（提示：弗里蒙特工厂的“生产地狱”、推特的大裁员等）\n\n---\n\n### 问题5：\n马斯克对人工智能（AI）的态度是什么？他为什么同时支持OpenAI和Neuralink这两个看似矛盾的项目？\n（提示：AI的生存威胁论，脑机接口的愿景等）\n\n---\n\n请依次回答这些问题，我会根据你的答案进行补充和纠正。\n（如果某个问题不清楚，也可以直接问我）\n\n---\n\n## 回答：\n\n- 问题1：他父亲对他粗暴的态度，让他对他人缺乏共情。\n- 问题2：Zip2 解决了电子地图和在线黄页的问题。X.com 解决了电子支付。\n- 问题3：他拿到了NASA的订单。特斯拉通过在中国建厂解决了产能问题。\n- 问题4：我不知道，请帮我解答。\n- 问题5：他认为开源技术是确保AI安全和开放的必要方式。\n\n---\n\n## 你的回答已经抓住了书中的一些关键点，我来补充细节和纠正一些偏差，同时为问题4和5提供更多背景：\n\n---\n\n### 问题1 —— 修正与补充\n\n#### 父亲的影响：\n马斯克的父亲埃罗尔确实非常严苛，比如曾经强迫小马斯克跪在地上数大米几个小时。书中强调，这种情感钝化的经历，反而让他形成了极端的目标导向性格 —— 对别人的痛苦不敏感，但对目标极度执着。\n\n#### 反向影响：\n马斯克并不是完全没有共情能力，而是选择性共情。例如他对员工冷酷，但会亲自研究医疗设备来帮助生病的儿子X。\n\n---\n\n### 问题2 —— 修正与补充\n\n#### Zip2：\n本质上是企业在线黄页 + 地图导航服务（帮助报纸实现数字化转型）。马斯克早期亲自编写大量代码，甚至睡在办公室。最终被康柏（Compaq）以3.07亿美元收购，马斯克个人获得2200万美元。\n\n#### X.com：\n不仅仅是电子支付，更是马斯克试图打造的一个金融全平台（取代银行）。后来与Confinity（PayPal前身）合并，但因为内部斗争被踢出管理层。eBay收购PayPal时，马斯克获得了1.8亿美元。\n> 注：X.com 后来被马斯克买回，成为推特母公司的名称。\n\n---\n\n### 问题3 —— 修正与补充\n\n#### SpaceX：\n2008年，猎鹰1号在经历三次失败后，第四次发射终于成功。紧接着，SpaceX 拿到了NASA价值16亿美元的货运合同，成为公司的救命稻草。\n\n#### 特斯拉：\n2008年，特斯拉靠戴姆勒投资的5000万美元，以及美国能源部的贷款才得以生存下来。\n> 中国建厂（2019年）是后期的事情。早期的生存关键是Model S（2012年）的成功发布，以及马斯克卖掉自己所有房产，全力押注特斯拉。\n\n---\n\n### 问题4 —— 解答：管理风格的争议\n\n#### “硬核”文化：\n\n- 在SpaceX和特斯拉：马斯克要求员工每周工作80小时以上，甚至要在工厂里过夜。他曾说：“如果你不在这里，就会有人取代你的位置。”\n  \n- 收购推特后：他立即裁员75%，并要求剩余员工签署“极端硬核”承诺书，承诺长时间、高强度工作。\n\n#### 极端案例：\n\n- 弗里蒙特工厂：Model 3产能地狱时期，马斯克直接睡在生产线上，甚至骂员工“连狗都不如”（事后道歉）。\n\n- 开除高管：曾因为PPT字体不够大，当场开除特斯拉高管。\n\n---\n\n### 问题5 —— 修正与补充\n\n#### AI威胁论：\n马斯克认为AI可能会毁灭人类（“比核弹更危险”）。因此：\n\n- OpenAI：最初支持OpenAI，推动安全、开源的AI（但后来因控制权分歧离开）。\n\n- Neuralink：希望通过脑机接口让人类与AI共生，避免人类被AI淘汰。\n\n#### 矛盾之处：\n尽管警告AI的风险，但马斯克同时在特斯拉大力推动自动驾驶（FSD），并成立xAI研究AGI（通用人工智能）。\n\n```\n"
  },
  {
    "path": "src/ebook_mcp/mcp_client_example/README.md",
    "content": "English | [中文](README-CN.md)\n# MCP Client Examples\n\n\n\nThis project provides multiple MCP (Model Control Protocol) client examples for interacting with different AI models, including OpenAI GPT-4, Anthropic Claude, and Deepseek AI.\n\n## Installation\n\nInstall dependencies:\n```bash\npip install -e .\n```\n\n## Configuration\n\nCreate a `.env` file in the project root directory and add the corresponding API keys based on the model you want to use:\n\n```env\n# OpenAI API key (for GPT-4)\nOPENAI_API_KEY=your_openai_api_key\n\n# Anthropic API key (for Claude)\nANTHROPIC_API_KEY=your_anthropic_api_key\n\n# Deepseek API key\nDEEPSEEK_API_KEY=your_deepseek_api_key\n```\n\n## Usage\n\nThe project provides three client examples, each corresponding to a different AI model:\n\n### OpenAI GPT-4 Client\n```bash\npython openai_example.py ../main.py\n```\n\n### Anthropic Claude Client\n```bash\npython anthropic_example.py ../main.py\n```\n\n### Deepseek AI Client\n```bash\npython deepseek_example.py ../main.py\n```\n\n## Example Conversation\n\nHere's an example of interacting with an ebook using the MCP client with DeepSeek:\n\n```\n\nQuery: Can you ask me a few questions to test my understanding of this book?\n\n\nSure! Here are 5 questions about Elon Musk by Walter Isaacson to test your understanding of the book. Please answer based on your knowledge — I’ll give feedback and add details after each response.\n\nQuestion 1:\nHow did Musk’s childhood and family background shape his personality and career later in life?\n(Hint: His relationship with his father, growing up in South Africa, etc.)\n\nQuestion 2:\nZip2 and X.com (later PayPal) were Musk’s early startups. What problems did they solve? How did Musk eventually exit from these companies?\n(Hint: Zip2’s business model, X.com’s merger with Confinity, etc.)\n\nQuestion 3:\nIn 2008, both SpaceX and Tesla were on the verge of bankruptcy. How did Musk save these two companies?\n(Hint: Falcon 1’s fourth launch, NASA contract, Tesla’s last-minute rescue, etc.)\n\nQuestion 4:\nWhat controversies surround Musk’s management style? What specific incidents in the book illustrate his “hardcore” or “extreme” work culture?\n(Hint: Fremont factory’s “production hell,” Twitter layoffs, etc.)\n\nQuestion 5:\nWhat is Musk’s attitude toward Artificial Intelligence (AI)? Why did he simultaneously support both OpenAI and Neuralink, which seem to have conflicting goals?\n(Hint: AI as an existential threat, brain-machine interface vision, etc.)\n\nPlease answer each question. I’ll provide feedback and additional details or correct any misunderstandings!\n(If a question isn’t clear, feel free to ask me.)\n\nQuery:\nQuestion 1: His father’s abusive attitude made him less empathetic towards others.\nQuestion 2: Zip2 solved the problem of electronic maps and online yellow pages. X.com solved online payments.\nQuestion 3: He secured a contract with NASA. Tesla solved its production problems by building a factory in China.\nQuestion 4: I don’t know. Please answer for me.\nQuestion 5: He believes open-source technology is necessary to ensure AI is safe and open.\n\nYour answers have captured some key points from the book. Let me add details, correct some inaccuracies, and provide further background for Questions 4 and 5:\n\nQuestion 1 — Correction & Supplement\nInfluence of His Father:\nMusk's father, Errol, was indeed extremely harsh — for example, forcing young Elon to kneel and count grains of rice for hours. The book emphasizes that this emotional desensitization shaped Musk into an extremely goal-oriented person — indifferent to others’ suffering but obsessively committed to his goals.\n\nReverse Impact:\nIt’s not that Musk is incapable of empathy — it’s selective. For instance, he may treat employees coldly but personally study medical equipment for his son X’s health issues.\n\nQuestion 2 — Correction & Supplement\nZip2:\nEssentially an online Yellow Pages + map navigation service (helping newspapers go online). Musk wrote much of the early code himself and slept in the office. Eventually, Compaq acquired Zip2 for $307 million (Musk received $22 million).\n\nX.com:\nNot just online payments — it was Musk’s attempt to create a comprehensive financial platform (aiming to replace banks). Later merged with Confinity (the precursor of PayPal). Musk was ousted due to internal conflicts, but when eBay acquired PayPal, Musk earned $180 million.\n(Note: Musk later bought back the domain X.com — it’s now Twitter’s parent company name).\n\nQuestion 3 — Correction & Supplement\nSpaceX:\nIn 2008, Falcon 1 succeeded on its fourth launch (after three failures drained their funds). Right after that, SpaceX secured a $1.6 billion NASA contract for cargo resupply — a lifeline for the company.\n\nTesla:\nIn 2008, Tesla survived through a $50 million investment from Daimler and a loan from the U.S. Department of Energy.\nBuilding the Gigafactory in China (2019) came much later. In the early years, survival depended on the launch of Model S (2012) and Musk’s personal gamble — selling all his real estate to fund Tesla.\n\nQuestion 4 — Explanation: Controversy Around His Management Style\n“Hardcore” Culture:\n\nAt SpaceX/Tesla: Musk demanded employees work 80+ hours per week, even sleeping at the factory. (\"If you’re not here, your replacement is.\")\n\nAfter acquiring Twitter: He immediately laid off 75% of staff and asked the remaining employees to sign a \"hardcore commitment\" — agreeing to long hours and high-intensity output.\n\nExtreme Cases:\n\nAt the Fremont Factory: During the \"production hell\" of the Model 3, Musk slept on the production line and once yelled at employees calling them \"worse than dogs\" (he later apologized).\n\nFiring Executives: Musk once fired a senior Tesla executive on the spot because the font size in their PowerPoint presentation wasn’t big enough.\n\nQuestion 5 — Correction & Supplement\nAI Threat Theory:\nMusk believes that AI could destroy humanity (\"more dangerous than nuclear weapons\"). Therefore:\n\nOpenAI: He initially supported it to promote safe, open-source AI (but left later due to disputes over control).\n\nNeuralink: He aims to use brain-machine interfaces to merge humans with AI — to prevent humans from being left behind.\n\nContradictions:\nWhile warning about AI risks, Musk is simultaneously pushing for Full Self-Driving (FSD) technology at Tesla and has founded xAI to research Artificial General Intelligence (AGI).\n\n\n\n\n```\n\n## Notes\n\n- Make sure you have configured the appropriate API keys\n- Each client generates log files in the `logs` directory. Use those log's tu understand the conversation and tools usage.\n- Use the `clear` command to clear conversation history(not implement in deepseek version)\n- Use the `quit` command to exit the program\n\n## Contributing\nIssues and suggestions for improvements are welcome!\n\n"
  },
  {
    "path": "src/ebook_mcp/mcp_client_example/anthropic_example.py",
    "content": "import asyncio\nimport logging\nimport os\nfrom datetime import datetime\nfrom typing import Optional, List, Dict\nfrom contextlib import AsyncExitStack\n\nfrom mcp import ClientSession, StdioServerParameters\nfrom mcp.client.stdio import stdio_client\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv()\n\n# Configure logging\nlog_dir = \"logs\"\nif not os.path.exists(log_dir):\n    os.makedirs(log_dir)\n\nlog_file = os.path.join(log_dir, f\"anthropic_mcp_client_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log\")\nlogging.basicConfig(\n    level=logging.DEBUG,\n    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',\n    handlers=[\n        logging.FileHandler(log_file),\n        #logging.StreamHandler()\n    ]\n)\nlogger = logging.getLogger(__name__)\n\nclass MCPClient:\n    \"\"\"\n    MCP Client class that handles communication with the server and Claude API.\n    Manages message history and tool execution.\n    \"\"\"\n    def __init__(self):\n        # Initialize session and client objects\n        self.session: Optional[ClientSession] = None\n        self.exit_stack = AsyncExitStack()\n        self.anthropic = Anthropic()\n        self.messages: List[Dict[str, str]] = []\n        self.max_messages = 20  # Maximum number of messages to keep in history\n        self.max_tokens = 8000  # Maximum token count (using half of Claude 3.5 Sonnet's 16k context window)\n        logger.debug(\"MCPClient initialized\")\n\n    def _estimate_tokens(self, text: str) -> int:\n        \"\"\"\n        Roughly estimate the number of tokens in a text.\n        Uses a simple approximation of 4 characters per token.\n        \"\"\"\n        return len(text) // 4\n\n    def _trim_messages_history(self):\n        \"\"\"\n        Clean up message history to ensure it doesn't exceed limits.\n        Removes oldest messages first when exceeding count or token limits.\n        \"\"\"\n        initial_count = len(self.messages)\n        \n        # Remove messages if count exceeds limit\n        while len(self.messages) > self.max_messages:\n            removed_msg = self.messages.pop(0)\n            logger.debug(f\"Removed message due to count limit: {removed_msg['role']}\")\n        \n        # Calculate and trim based on token count\n        total_tokens = sum(self._estimate_tokens(msg[\"content\"]) for msg in self.messages)\n        while total_tokens > self.max_tokens and self.messages:\n            removed_msg = self.messages.pop(0)\n            removed_tokens = self._estimate_tokens(removed_msg[\"content\"])\n            total_tokens -= removed_tokens\n            logger.debug(f\"Removed message due to token limit: {removed_msg['role']}, tokens: {removed_tokens}\")\n        \n        if initial_count != len(self.messages):\n            logger.debug(f\"Trimmed messages from {initial_count} to {len(self.messages)}\")\n\n    def add_message(self, role: str, content: str):\n        \"\"\"\n        Add a new message to the history and trim if necessary.\n        \"\"\"\n        self.messages.append({\"role\": role, \"content\": content})\n        logger.debug(f\"Added message - Role: {role}, Content length: {len(content)}\")\n        self._trim_messages_history()\n\n    async def connect_to_server(self, server_script_path: str):\n        \"\"\"\n        Connect to an MCP server\n        \n        Args:\n            server_script_path: Path to the server script (.py or .js)\n        \"\"\"\n        logger.debug(f\"Connecting to server with script: {server_script_path}\")\n        \n        is_python = server_script_path.endswith('.py')\n        is_js = server_script_path.endswith('.js')\n        if not (is_python or is_js):\n            logger.error(\"Invalid server script type\")\n            raise ValueError(\"Server script must be a .py or .js file\")\n            \n        command = \"python\" if is_python else \"node\"\n        server_params = StdioServerParameters(\n            command=command,\n            args=[server_script_path],\n            env=None\n        )\n        \n        stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))\n        self.stdio, self.write = stdio_transport\n        self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))\n        \n        await self.session.initialize()\n        \n        # List available tools\n        response = await self.session.list_tools()\n        tools = response.tools\n        tools_list = [tool.name for tool in tools]\n        logger.debug(f\"Connected to server with tools: {tools_list}\")\n        print(\"\\nConnected to server with tools:\", tools_list)\n\n    async def process_query(self, query: str) -> str:\n        \"\"\"\n        Process a query using Claude and available tools\n        \"\"\"\n        logger.debug(f\"Processing new query: {query}\")\n        self.add_message(\"user\", query)\n\n        response = await self.session.list_tools()\n        available_tools = [{ \n            \"name\": tool.name,\n            \"description\": tool.description,\n            \"input_schema\": tool.inputSchema\n        } for tool in response.tools]\n        logger.debug(f\"Available tools: {[tool['name'] for tool in available_tools]}\")\n\n        # Initial Claude API call\n        response = self.anthropic.messages.create(\n            model=\"claude-3-5-sonnet-20241022\",\n            max_tokens=1000,\n            messages=self.messages,\n            tools=available_tools\n        )\n\n        tool_results = []\n        final_text = []\n\n        for content in response.content:\n            if content.type == 'text':\n                final_text.append(content.text)\n                self.add_message(\"assistant\", content.text)\n                logger.debug(\"Added assistant text response\")\n            elif content.type == 'tool_use':\n                tool_name = content.name\n                tool_args = content.input\n                \n                logger.debug(f\"Executing tool call - Tool: {tool_name}, Args: {tool_args}\")\n                result = await self.session.call_tool(tool_name, tool_args)\n                tool_results.append({\"call\": tool_name, \"result\": result})\n                final_text.append(f\"[Calling tool {tool_name} with args {tool_args}]\")\n                logger.debug(f\"Tool execution result: {result.content}\")\n\n                if hasattr(content, 'text') and content.text:\n                    self.add_message(\"assistant\", content.text)\n                self.add_message(\"user\", result.content)\n\n                response = self.anthropic.messages.create(\n                    model=\"claude-3-5-sonnet-20241022\",\n                    max_tokens=1000,\n                    messages=self.messages,\n                )\n\n                final_text.append(response.content[0].text)\n                self.add_message(\"assistant\", response.content[0].text)\n                logger.debug(\"Added follow-up assistant response\")\n\n        return \"\\n\".join(final_text)\n\n    async def chat_loop(self):\n        \"\"\"\n        Run an interactive chat loop\n        \"\"\"\n        logger.info(\"Starting chat loop\")\n        print(\"\\nMCP Client Started!\")\n        print(\"Type your queries or 'quit' to exit.\")\n        print(\"Type 'clear' to clear conversation history.\")\n        \n        while True:\n            try:\n                query = input(\"\\nQuery: \").strip()\n                \n                if query.lower() == 'quit':\n                    logger.info(\"Chat loop terminated by user\")\n                    break\n                elif query.lower() == 'clear':\n                    self.messages = []\n                    logger.debug(\"Conversation history cleared\")\n                    print(\"\\nConversation history cleared.\")\n                    continue\n                    \n                response = await self.process_query(query)\n                print(\"\\n\" + response)\n                    \n            except Exception as e:\n                logger.error(f\"Error in chat loop: {str(e)}\", exc_info=True)\n                print(f\"\\nError: {str(e)}\")\n    \n    async def cleanup(self):\n        \"\"\"\n        Clean up resources\n        \"\"\"\n        logger.debug(\"Cleaning up resources\")\n        await self.exit_stack.aclose()\n\nasync def main():\n    if len(sys.argv) < 2:\n        logger.error(\"Missing server script path argument\")\n        print(\"Usage: python client.py <path_to_server_script>\")\n        sys.exit(1)\n        \n    client = MCPClient()\n    try:\n        await client.connect_to_server(sys.argv[1])\n        await client.chat_loop()\n    finally:\n        await client.cleanup()\n\nif __name__ == \"__main__\":\n    import sys\n    logger.info(\"Starting Anthropic MCP Client application\")\n    asyncio.run(main())"
  },
  {
    "path": "src/ebook_mcp/mcp_client_example/deepseek_example.py",
    "content": "import asyncio\nfrom typing import Optional\nfrom contextlib import AsyncExitStack\nimport json\nimport logging\nimport os\nfrom datetime import datetime\n\nfrom mcp import ClientSession, StdioServerParameters\nfrom mcp.client.stdio import stdio_client\n\nfrom openai import AsyncOpenAI\nfrom dotenv import load_dotenv\n\nload_dotenv()\n\n# Setup logging\ndef setup_logging():\n    \"\"\"Configure logging settings\"\"\"\n    # Create logs directory if it doesn't exist\n    if not os.path.exists('logs'):\n        os.makedirs('logs')\n    \n    # Generate log filename with timestamp\n    log_filename = f'logs/deepseek_mcp_client_{datetime.now().strftime(\"%Y%m%d_%H%M%S\")}.log'\n    \n    # Configure logging\n    logging.basicConfig(\n        level=logging.DEBUG,\n        format='%(asctime)s - %(levelname)s - %(message)s',\n        handlers=[\n            logging.FileHandler(log_filename, encoding='utf-8'),\n            #logging.StreamHandler()  # Also output to console\n        ]\n    )\n    return logging.getLogger(__name__)\n\nclass MCPClient:\n    def __init__(self):\n        # Initialize session and client objects\n        self.session: Optional[ClientSession] = None\n        self.exit_stack = AsyncExitStack()\n        \n        # Initialize Deepseek client using OpenAI's client with custom base URL\n        self.client = AsyncOpenAI(\n            api_key=os.getenv(\"DEEPSEEK_API_KEY\"),\n            base_url=\"https://api.deepseek.com/v1\"  # Deepseek API endpoint\n        )\n        \n        self.conversation_history = []\n        self.model = \"deepseek-chat\"  # Using Deepseek Chat model\n        self.max_tokens = 8192  # Deepseek context window\n        self.logger = setup_logging()\n\n    def count_tokens(self, messages):\n        \"\"\"Calculate token count for message history - simplified for Deepseek\"\"\"\n        # Note: This is a simplified token counting method\n        # Deepseek uses its own tokenizer internally\n        total_chars = sum(len(str(msg.get(\"content\", \"\"))) for msg in messages)\n        # Approximate token count (assuming average 4 chars per token)\n        return total_chars // 4\n\n    def trim_conversation_history(self):\n        \"\"\"Trim conversation history to ensure it doesn't exceed maximum token limit\"\"\"\n        while self.conversation_history and self.count_tokens(self.conversation_history) > self.max_tokens:\n            removed_message = self.conversation_history.pop(1)\n            self.logger.debug(f\"Trimmed message from history: {removed_message}\")\n\n    async def connect_to_server(self, server_script_path: str):\n        \"\"\"Connect to an MCP server\"\"\"\n        self.logger.info(f\"Connecting to server: {server_script_path}\")\n        \n        is_python = server_script_path.endswith('.py')\n        is_js = server_script_path.endswith('.js')\n        if not (is_python or is_js):\n            raise ValueError(\"Server script must be a .py or .js file\")\n            \n        command = \"python\" if is_python else \"node\"\n        server_params = StdioServerParameters(\n            command=command,\n            args=[server_script_path],\n            env=None\n        )\n        \n        stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))\n        self.stdio, self.write = stdio_transport\n        self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))\n        \n        await self.session.initialize()\n        \n        response = await self.session.list_tools()\n        tools = response.tools\n        self.logger.info(f\"Connected to server with tools: {[tool.name for tool in tools]}\")\n        print(\"\\nConnected to server with tools:\", [tool.name for tool in tools])\n\n    async def process_query(self, query: str) -> str:\n        \"\"\"Process a query using Deepseek and available tools\"\"\"\n        self.logger.debug(f\"Processing user query: {query}\")\n        \n        if query.lower() == 'clear':\n            self.logger.debug(\"Clearing conversation history\")\n            self.conversation_history = []\n            return \"Conversation history cleared.\"\n\n        self.conversation_history.append({\n            \"role\": \"user\",\n            \"content\": query\n        })\n        self.logger.debug(\"Added user message to history\")\n\n        self.trim_conversation_history()\n\n        response = await self.session.list_tools()\n        available_tools = [{ \n            \"type\": \"function\",\n            \"function\": {\n                \"name\": tool.name,\n                \"description\": tool.description,\n                \"parameters\": tool.inputSchema\n            }\n        } for tool in response.tools]\n\n        final_text = []\n        current_tool_call_id = None  # Track current tool call ID\n        \n        while True:\n            # Ensure all message contents are strings\n            messages_for_api = []\n            for msg in self.conversation_history:\n                api_msg = {\"role\": msg[\"role\"]}\n                \n                # Convert content to string if it exists\n                if \"content\" in msg:\n                    api_msg[\"content\"] = str(msg[\"content\"]) if msg[\"content\"] is not None else \"\"\n                else:\n                    api_msg[\"content\"] = \"\"\n                \n                # Handle tool calls if they exist\n                if msg.get(\"tool_calls\"):\n                    api_msg[\"tool_calls\"] = msg[\"tool_calls\"]\n                    if msg[\"tool_calls\"]:\n                        current_tool_call_id = msg[\"tool_calls\"][0][\"id\"]\n                \n                # Add tool_call_id for tool messages\n                if msg[\"role\"] == \"tool\" and current_tool_call_id:\n                    api_msg[\"tool_call_id\"] = current_tool_call_id\n                \n                messages_for_api.append(api_msg)\n\n            self.logger.debug(f\"Sending request to Deepseek with {len(messages_for_api)} messages in history\")\n            try:\n                response = await self.client.chat.completions.create(\n                    model=self.model,\n                    messages=messages_for_api,\n                    tools=available_tools,\n                    tool_choice=\"auto\"\n                )\n            except Exception as e:\n                self.logger.error(f\"API request failed: {str(e)}\")\n                self.logger.debug(f\"Messages sent: {json.dumps(messages_for_api, indent=2)}\")\n                raise\n\n            message = response.choices[0].message\n            assistant_message = {\n                \"role\": \"assistant\",\n                \"content\": str(message.content) if message.content is not None else \"\"\n            }\n            \n            # Only add tool_calls if they exist\n            if message.tool_calls:\n                assistant_message[\"tool_calls\"] = [\n                    {\n                        \"id\": tool_call.id,\n                        \"type\": \"function\",\n                        \"function\": {\n                            \"name\": tool_call.function.name,\n                            \"arguments\": tool_call.function.arguments\n                        }\n                    } for tool_call in message.tool_calls\n                ]\n                current_tool_call_id = message.tool_calls[0].id\n            \n            self.conversation_history.append(assistant_message)\n            self.logger.debug(f\"Added assistant message to history: {message.content}\")\n            final_text.append(message.content or \"\")\n\n            if not message.tool_calls:\n                break\n\n            for tool_call in message.tool_calls:\n                tool_name = tool_call.function.name\n                tool_args = json.loads(tool_call.function.arguments)\n                \n                self.logger.debug(f\"Calling tool: {tool_name}\")\n                self.logger.debug(f\"Tool arguments: {tool_args}\")\n                \n                result = await self.session.call_tool(tool_name, tool_args)\n                self.logger.debug(f\"Tool result: {result.content}\")\n                \n                final_text.append(f\"[Calling tool {tool_name} with args {tool_args}]\")\n\n                # Add tool response to conversation history with tool_call_id\n                tool_message = {\n                    \"role\": \"tool\",\n                    \"content\": str(result.content),\n                    \"tool_call_id\": tool_call.id  # Use current tool call ID\n                }\n                self.conversation_history.append(tool_message)\n                self.logger.debug(\"Added tool result to history\")\n\n            self.trim_conversation_history()\n\n        self.logger.debug(\"Final conversation history:\")\n        for msg in self.conversation_history:\n            self.logger.debug(f\"Message: {msg}\")\n\n        return \"\\n\".join(filter(None, final_text))\n\n    async def chat_loop(self):\n        \"\"\"Run an interactive chat loop\"\"\"\n        self.logger.info(\"Starting chat loop\")\n        print(\"\\nMCP Client Started!\")\n        print(\"Type your queries, 'clear' to clear history, or 'quit' to exit.\")\n        \n        while True:\n            try:\n                query = input(\"\\nQuery: \").strip()\n                \n                if query.lower() == 'quit':\n                    self.logger.info(\"User requested to quit\")\n                    break\n                    \n                response = await self.process_query(query)\n                print(\"\\n\" + response)\n                    \n            except Exception as e:\n                error_msg = f\"Error: {str(e)}\"\n                self.logger.error(error_msg, exc_info=True)\n                print(\"\\n\" + error_msg)\n    \n    async def cleanup(self):\n        \"\"\"Clean up resources\"\"\"\n        self.logger.info(\"Cleaning up resources\")\n        await self.exit_stack.aclose()\n\nasync def main():\n    if len(sys.argv) < 2:\n        print(\"Usage: python client.py <path_to_server_script>\")\n        sys.exit(1)\n        \n    client = MCPClient()\n    client.logger.info(\"Starting Deepseek MCP Client application\")\n    try:\n        await client.connect_to_server(sys.argv[1])\n        await client.chat_loop()\n    finally:\n        await client.cleanup()\n\nif __name__ == \"__main__\":\n    import sys\n    asyncio.run(main())"
  },
  {
    "path": "src/ebook_mcp/mcp_client_example/openai_example.py",
    "content": "import asyncio\nfrom typing import Optional\nfrom contextlib import AsyncExitStack\nimport json\nimport tiktoken\nimport logging\nimport os\nfrom datetime import datetime\n\nfrom mcp import ClientSession, StdioServerParameters\nfrom mcp.client.stdio import stdio_client\n\nfrom openai import AsyncOpenAI\nfrom dotenv import load_dotenv\n\nload_dotenv()\n\n# Setup logging\ndef setup_logging():\n    \"\"\"Configure logging settings\"\"\"\n    # Create logs directory if it doesn't exist\n    if not os.path.exists('logs'):\n        os.makedirs('logs')\n    \n    # Generate log filename with timestamp\n    log_filename = f'logs/openai_mcp_client_{datetime.now().strftime(\"%Y%m%d_%H%M%S\")}.log'\n    \n    # Configure logging\n    logging.basicConfig(\n        level=logging.DEBUG,\n        format='%(asctime)s - %(levelname)s - %(message)s',\n        handlers=[\n            logging.FileHandler(log_filename, encoding='utf-8'),\n            #logging.StreamHandler()  # Also output to console\n        ]\n    )\n    return logging.getLogger(__name__)\n\nclass MCPClient:\n    def __init__(self):\n        # Initialize session and client objects\n        self.session: Optional[ClientSession] = None\n        self.exit_stack = AsyncExitStack()\n        self.client = AsyncOpenAI()\n        self.conversation_history = []\n        self.model = \"gpt-4-turbo-preview\"\n        self.max_tokens = 4000\n        self.encoding = tiktoken.encoding_for_model(self.model)\n        self.logger = setup_logging()\n\n    def count_tokens(self, messages):\n        \"\"\"Calculate token count for message history\"\"\"\n        num_tokens = 0\n        for message in messages:\n            num_tokens += 4\n            for key, value in message.items():\n                if key == \"tool_calls\" and value:\n                    for tool_call in value:\n                        if isinstance(tool_call, dict):\n                            function_name = tool_call.get(\"function\", {}).get(\"name\", \"\")\n                            function_args = tool_call.get(\"function\", {}).get(\"arguments\", \"\")\n                        else:\n                            function_name = tool_call.function.name\n                            function_args = tool_call.function.arguments\n                        num_tokens += len(self.encoding.encode(function_name))\n                        num_tokens += len(self.encoding.encode(str(function_args)))\n                elif isinstance(value, str):\n                    num_tokens += len(self.encoding.encode(value))\n        num_tokens += 2\n        return num_tokens\n\n    def trim_conversation_history(self):\n        \"\"\"Trim conversation history to ensure it doesn't exceed maximum token limit\"\"\"\n        while self.conversation_history and self.count_tokens(self.conversation_history) > self.max_tokens:\n            removed_message = self.conversation_history.pop(1)\n            self.logger.debug(f\"Trimmed message from history: {removed_message}\")\n\n    async def connect_to_server(self, server_script_path: str):\n        \"\"\"Connect to an MCP server\"\"\"\n        self.logger.info(f\"Connecting to server: {server_script_path}\")\n        \n        is_python = server_script_path.endswith('.py')\n        is_js = server_script_path.endswith('.js')\n        if not (is_python or is_js):\n            raise ValueError(\"Server script must be a .py or .js file\")\n            \n        command = \"python\" if is_python else \"node\"\n        server_params = StdioServerParameters(\n            command=command,\n            args=[server_script_path],\n            env=None\n        )\n        \n        stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))\n        self.stdio, self.write = stdio_transport\n        self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write))\n        \n        await self.session.initialize()\n        \n        response = await self.session.list_tools()\n        tools = response.tools\n        self.logger.info(f\"Connected to server with tools: {[tool.name for tool in tools]}\")\n        print(\"\\nConnected to server with tools:\", [tool.name for tool in tools])\n\n    async def process_query(self, query: str) -> str:\n        \"\"\"Process a query using OpenAI and available tools\"\"\"\n        self.logger.debug(f\"Processing user query: {query}\")\n        \n        if query.lower() == 'clear':\n            self.logger.debug(\"Clearing conversation history\")\n            self.conversation_history = []\n            return \"Conversation history cleared.\"\n\n        self.conversation_history.append({\n            \"role\": \"user\",\n            \"content\": query\n        })\n        self.logger.debug(\"Added user message to history\")\n\n        self.trim_conversation_history()\n\n        response = await self.session.list_tools()\n        available_tools = [{ \n            \"type\": \"function\",\n            \"function\": {\n                \"name\": tool.name,\n                \"description\": tool.description,\n                \"parameters\": tool.inputSchema\n            }\n        } for tool in response.tools]\n\n        final_text = []\n        while True:\n            self.logger.debug(f\"Sending request to OpenAI with {len(self.conversation_history)} messages in history\")\n            response = await self.client.chat.completions.create(\n                model=self.model,\n                messages=self.conversation_history,\n                tools=available_tools,\n                tool_choice=\"auto\"\n            )\n\n            message = response.choices[0].message\n            assistant_message = {\n                \"role\": \"assistant\",\n                \"content\": message.content,\n                \"tool_calls\": [\n                    {\n                        \"id\": tool_call.id,\n                        \"type\": \"function\",\n                        \"function\": {\n                            \"name\": tool_call.function.name,\n                            \"arguments\": tool_call.function.arguments\n                        }\n                    } for tool_call in (message.tool_calls or [])\n                ] if message.tool_calls else None\n            }\n            self.conversation_history.append(assistant_message)\n            self.logger.debug(f\"Added assistant message to history: {message.content}\")\n            final_text.append(message.content or \"\")\n\n            if not message.tool_calls:\n                break\n\n            for tool_call in message.tool_calls:\n                tool_name = tool_call.function.name\n                tool_args = json.loads(tool_call.function.arguments)\n                \n                self.logger.debug(f\"Calling tool: {tool_name}\")\n                self.logger.debug(f\"Tool arguments: {tool_args}\")\n                \n                result = await self.session.call_tool(tool_name, tool_args)\n                self.logger.debug(f\"Tool result: {result.content}\")\n                \n                final_text.append(f\"[Calling tool {tool_name} with args {tool_args}]\")\n\n                tool_message = {\n                    \"role\": \"tool\",\n                    \"tool_call_id\": tool_call.id,\n                    \"content\": result.content\n                }\n                self.conversation_history.append(tool_message)\n                self.logger.debug(\"Added tool result to history\")\n\n            self.trim_conversation_history()\n\n        self.logger.debug(\"Final conversation history:\")\n        for msg in self.conversation_history:\n            self.logger.debug(f\"Message: {msg}\")\n\n        return \"\\n\".join(filter(None, final_text))\n\n    async def chat_loop(self):\n        \"\"\"Run an interactive chat loop\"\"\"\n        self.logger.info(\"Starting chat loop\")\n        print(\"\\nMCP Client Started!\")\n        print(\"Type your queries, 'clear' to clear history, or 'quit' to exit.\")\n        \n        while True:\n            try:\n                query = input(\"\\nQuery: \").strip()\n                \n                if query.lower() == 'quit':\n                    self.logger.info(\"User requested to quit\")\n                    break\n                    \n                response = await self.process_query(query)\n                print(\"\\n\" + response)\n                    \n            except Exception as e:\n                error_msg = f\"Error: {str(e)}\"\n                self.logger.error(error_msg, exc_info=True)\n                print(\"\\n\" + error_msg)\n    \n    async def cleanup(self):\n        \"\"\"Clean up resources\"\"\"\n        self.logger.info(\"Cleaning up resources\")\n        await self.exit_stack.aclose()\n\nasync def main():\n    if len(sys.argv) < 2:\n        print(\"Usage: python client.py <path_to_server_script>\")\n        sys.exit(1)\n        \n    client = MCPClient()\n    client.logger.info(\"Starting OpenAI MCP Client application\")\n    try:\n        await client.connect_to_server(sys.argv[1])\n        await client.chat_loop()\n    finally:\n        await client.cleanup()\n\nif __name__ == \"__main__\":\n    import sys\n    asyncio.run(main())"
  },
  {
    "path": "src/ebook_mcp/mcp_client_example/pyproject.toml",
    "content": "[project]\nname = \"demo-mcp-client\"\nversion = \"0.1.0\"\ndescription = \"Add your description here\"\nreadme = \"README.md\"\nrequires-python = \">=3.12\"\ndependencies = [\n    \"anthropic>=0.49.0\",\n    \"mcp>=1.4.1\",\n    \"openai>=1.69.0\",\n    \"python-dotenv>=1.0.1\",\n]\n"
  },
  {
    "path": "src/ebook_mcp/mcp_client_example/requirements.txt",
    "content": "anthropic==0.49.0\npython-dotenv>=1.0.0\nopenai>=1.0.0\nfastmcp>=0.1.0\npydantic>=2.0.0\n"
  },
  {
    "path": "src/ebook_mcp/tests/conftest.py",
    "content": "import pytest\nimport tempfile\nimport os\nfrom unittest.mock import Mock\n\n\n@pytest.fixture\ndef temp_dir():\n    \"\"\"Create a temporary directory for testing\"\"\"\n    with tempfile.TemporaryDirectory() as temp_dir:\n        yield temp_dir\n\n\n@pytest.fixture\ndef mock_epub_book():\n    \"\"\"Create a mock EPUB book for testing\"\"\"\n    mock_book = Mock()\n    mock_book.get_metadata.return_value = {\n        'title': [('Test Book', {})],\n        'creator': [('Test Author', {})],\n        'language': [('en', {})],\n        'identifier': [('test-id', {})],\n        'date': [('2023-01-01', {})],\n        'publisher': [('Test Publisher', {})],\n        'description': [('Test description', {})]\n    }\n    return mock_book\n\n\n@pytest.fixture\ndef mock_pdf_reader():\n    \"\"\"Create a mock PDF reader for testing\"\"\"\n    mock_reader = Mock()\n    mock_reader.metadata = {\n        '/Title': 'Test PDF',\n        '/Author': 'Test Author',\n        '/Subject': 'Test Subject',\n        '/Creator': 'Test Creator',\n        '/Producer': 'Test Producer',\n        '/CreationDate': '2023-01-01',\n        '/ModDate': '2023-01-02'\n    }\n    mock_reader.pages = [Mock(), Mock(), Mock()]  # 3 pages\n    return mock_reader\n\n\n@pytest.fixture\ndef mock_pdf_document():\n    \"\"\"Create a mock PDF document for testing\"\"\"\n    mock_doc = Mock()\n    mock_doc.get_toc.return_value = [\n        (1, \"Chapter 1\", 1),\n        (1, \"Chapter 2\", 5),\n        (2, \"Subchapter 2.1\", 7)\n    ]\n    return mock_doc\n\n\n@pytest.fixture\ndef sample_epub_files():\n    \"\"\"Create sample EPUB file names for testing\"\"\"\n    return [\"book1.epub\", \"book2.epub\", \"document.txt\"]\n\n\n@pytest.fixture\ndef sample_pdf_files():\n    \"\"\"Create sample PDF file names for testing\"\"\"\n    return [\"document1.pdf\", \"document2.pdf\", \"text.txt\"]\n\n\n@pytest.fixture\ndef temp_epub_file():\n    \"\"\"Create a temporary EPUB file for testing\"\"\"\n    with tempfile.NamedTemporaryFile(suffix='.epub', delete=False) as f:\n        f.write(b\"mock epub content\")\n        epub_path = f.name\n    \n    yield epub_path\n    \n    # Cleanup\n    if os.path.exists(epub_path):\n        os.unlink(epub_path)\n\n\n@pytest.fixture\ndef temp_pdf_file():\n    \"\"\"Create a temporary PDF file for testing\"\"\"\n    with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n        f.write(b\"mock pdf content\")\n        pdf_path = f.name\n    \n    yield pdf_path\n    \n    # Cleanup\n    if os.path.exists(pdf_path):\n        os.unlink(pdf_path) "
  },
  {
    "path": "src/ebook_mcp/tests/run_tests.py",
    "content": "#!/usr/bin/env python3\n\"\"\"\nTest runner script for ebook-mcp project.\nThis script runs all unit tests for the server components.\n\"\"\"\n\nimport sys\nimport os\nimport subprocess\nimport pytest\n\ndef run_tests():\n    \"\"\"Run all tests for the ebook-mcp project\"\"\"\n    \n    # Add the src directory to Python path\n    src_path = os.path.join(os.path.dirname(__file__), '..', '..')\n    sys.path.insert(0, src_path)\n    \n    # Get the tests directory\n    tests_dir = os.path.dirname(__file__)\n    \n    print(\"Running ebook-mcp unit tests...\")\n    print(\"=\" * 50)\n    \n    # Run tests with pytest\n    try:\n        # Run tests with verbose output and coverage\n        result = pytest.main([\n            tests_dir,\n            '-v',\n            '--tb=short',\n            '--strict-markers',\n            '--disable-warnings'\n        ])\n        \n        if result == 0:\n            print(\"\\n\" + \"=\" * 50)\n            print(\"✅ All tests passed!\")\n            return True\n        else:\n            print(\"\\n\" + \"=\" * 50)\n            print(\"❌ Some tests failed!\")\n            return False\n            \n    except Exception as e:\n        print(f\"Error running tests: {e}\")\n        return False\n\n\ndef run_specific_test(test_file):\n    \"\"\"Run a specific test file\"\"\"\n    src_path = os.path.join(os.path.dirname(__file__), '..', '..')\n    sys.path.insert(0, src_path)\n    \n    test_path = os.path.join(os.path.dirname(__file__), test_file)\n    \n    print(f\"Running specific test: {test_file}\")\n    print(\"=\" * 50)\n    \n    try:\n        result = pytest.main([\n            test_path,\n            '-v',\n            '--tb=short'\n        ])\n        \n        if result == 0:\n            print(\"\\n\" + \"=\" * 50)\n            print(\"✅ Test passed!\")\n            return True\n        else:\n            print(\"\\n\" + \"=\" * 50)\n            print(\"❌ Test failed!\")\n            return False\n            \n    except Exception as e:\n        print(f\"Error running test: {e}\")\n        return False\n\n\ndef list_tests():\n    \"\"\"List all available test files\"\"\"\n    tests_dir = os.path.dirname(__file__)\n    test_files = []\n    \n    for file in os.listdir(tests_dir):\n        if file.startswith('test_') and file.endswith('.py'):\n            test_files.append(file)\n    \n    print(\"Available test files:\")\n    print(\"=\" * 30)\n    for test_file in sorted(test_files):\n        print(f\"  - {test_file}\")\n    \n    return test_files\n\n\nif __name__ == \"__main__\":\n    if len(sys.argv) > 1:\n        command = sys.argv[1]\n        \n        if command == \"list\":\n            list_tests()\n        elif command == \"run\":\n            if len(sys.argv) > 2:\n                test_file = sys.argv[2]\n                run_specific_test(test_file)\n            else:\n                run_tests()\n        else:\n            print(\"Usage:\")\n            print(\"  python run_tests.py list          - List all test files\")\n            print(\"  python run_tests.py run           - Run all tests\")\n            print(\"  python run_tests.py run test_file - Run specific test file\")\n    else:\n        run_tests() "
  },
  {
    "path": "src/ebook_mcp/tests/test_azw.py",
    "content": "import os\nimport pytest\nimport sys\nsys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))\n\ntry:\n    from ebook_mcp.azw import find_azw_files, get_metadata, get_toc, get_chapter_text\n    AZW_AVAILABLE = True\nexcept ImportError:\n    # Skip tests if azw module is not available\n    AZW_AVAILABLE = False\n\n@pytest.mark.skipif(not AZW_AVAILABLE, reason=\"AZW module not available\")\ndef test_find_azw_files(tmp_path):\n    # Create test directory structure\n    test_dir = tmp_path / \"test_azw\"\n    test_dir.mkdir()\n\n    # Create test files\n    (test_dir / \"book1.azw\").touch()\n    (test_dir / \"book2.azw3\").touch()\n    (test_dir / \"book3.txt\").touch()\n    \n    # Create subdirectory with more files\n    sub_dir = test_dir / \"subdir\"\n    sub_dir.mkdir()\n    (sub_dir / \"book4.azw\").touch()\n\n    # Test file finding\n    azw_files = find_azw_files(str(test_dir))\n    assert len(azw_files) == 3\n\n    # Clean up test files\n    # (handled automatically by pytest)\n\n@pytest.mark.skipif(not AZW_AVAILABLE, reason=\"AZW module not available\")\ndef test_get_metadata(test_azw_path):\n    \"\"\"Test metadata extraction\n\n    Args:\n        test_azw_path: Path to test AZW file (should be provided when running tests)\n    \"\"\"\n    metadata = get_metadata(test_azw_path)\n    \n    # Verify basic metadata fields exist\n    assert \"title\" in metadata\n    assert \"author\" in metadata\n    assert \"publisher\" in metadata\n    assert \"publication_date\" in metadata\n    assert \"language\" in metadata\n    assert \"isbn\" in metadata\n\n@pytest.mark.skipif(not AZW_AVAILABLE, reason=\"AZW module not available\")\ndef test_get_toc(test_azw_path):\n    \"\"\"Test table of contents extraction\n\n    Args:\n        test_azw_path: Path to test AZW file (should be provided when running tests)\n    \"\"\"\n    toc = get_toc(test_azw_path)\n    \n    # Verify TOC structure\n    assert isinstance(toc, list)\n    for entry in toc:\n        assert isinstance(entry, tuple)\n        assert len(entry) == 2\n        assert isinstance(entry[0], str)  # title\n        assert isinstance(entry[1], str)  # chapter_id\n\n@pytest.mark.skipif(not AZW_AVAILABLE, reason=\"AZW module not available\")\ndef test_get_chapter_text(test_azw_path):\n    \"\"\"Test chapter text extraction\n\n    Args:\n        test_azw_path: Path to test AZW file (should be provided when running tests)\n    \"\"\"\n    toc = get_toc(test_azw_path)\n    if not toc:\n        pytest.skip(\"No TOC available in test file\")\n    \n    chapter_id = toc[0][1]\n    text = get_chapter_text(test_azw_path, chapter_id)\n    \n    assert isinstance(text, str)\n    assert len(text) > 0 "
  },
  {
    "path": "src/ebook_mcp/tests/test_basic.py",
    "content": "import pytest\nimport os\nimport tempfile\nfrom unittest.mock import Mock, patch\n\n# Test basic file operations that don't require external dependencies\n\ndef test_get_all_epub_files_basic():\n    \"\"\"Test basic EPUB file discovery without external dependencies\"\"\"\n    with tempfile.TemporaryDirectory() as temp_dir:\n        # Create mock EPUB files\n        epub_files = [\"book1.epub\", \"book2.epub\", \"document.txt\"]\n        for file in epub_files:\n            with open(os.path.join(temp_dir, file), 'w') as f:\n                f.write(\"mock content\")\n        \n        # Test the basic file discovery logic\n        result = [f for f in os.listdir(temp_dir) if f.endswith('.epub')]\n        assert set(result) == {\"book1.epub\", \"book2.epub\"}\n\ndef test_get_all_pdf_files_basic():\n    \"\"\"Test basic PDF file discovery without external dependencies\"\"\"\n    with tempfile.TemporaryDirectory() as temp_dir:\n        # Create mock PDF files\n        pdf_files = [\"document1.pdf\", \"document2.pdf\", \"text.txt\"]\n        for file in pdf_files:\n            with open(os.path.join(temp_dir, file), 'w') as f:\n                f.write(\"mock content\")\n        \n        # Test the basic file discovery logic\n        result = [f for f in os.listdir(temp_dir) if f.endswith('.pdf')]\n        assert set(result) == {\"document1.pdf\", \"document2.pdf\"}\n\ndef test_file_not_found_error():\n    \"\"\"Test file not found error handling\"\"\"\n    with pytest.raises(FileNotFoundError):\n        with open(\"/nonexistent/file.txt\", 'r') as f:\n            pass\n\ndef test_temp_file_operations():\n    \"\"\"Test temporary file operations\"\"\"\n    with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:\n        f.write(\"test content\")\n        temp_path = f.name\n    \n    try:\n        # Verify file was created\n        assert os.path.exists(temp_path)\n        \n        # Read content\n        with open(temp_path, 'r') as f:\n            content = f.read()\n        assert content == \"test content\"\n    finally:\n        # Clean up\n        if os.path.exists(temp_path):\n            os.unlink(temp_path)\n\ndef test_directory_operations():\n    \"\"\"Test directory operations\"\"\"\n    with tempfile.TemporaryDirectory() as temp_dir:\n        # Create subdirectory\n        sub_dir = os.path.join(temp_dir, \"subdir\")\n        os.makedirs(sub_dir)\n        \n        # Create files in subdirectory\n        files = [\"file1.txt\", \"file2.txt\"]\n        for file in files:\n            with open(os.path.join(sub_dir, file), 'w') as f:\n                f.write(f\"content for {file}\")\n        \n        # List files\n        result = os.listdir(sub_dir)\n        assert set(result) == set(files)\n\n@pytest.mark.parametrize(\"file_extension,expected_count\", [\n    (\".epub\", 2),\n    (\".pdf\", 1),\n    (\".txt\", 3),\n])\ndef test_file_filtering(file_extension, expected_count):\n    \"\"\"Test file filtering by extension\"\"\"\n    with tempfile.TemporaryDirectory() as temp_dir:\n        # Create test files\n        test_files = [\n            \"book1.epub\",\n            \"book2.epub\", \n            \"document.pdf\",\n            \"file1.txt\",\n            \"file2.txt\",\n            \"file3.txt\"\n        ]\n        \n        for file in test_files:\n            with open(os.path.join(temp_dir, file), 'w') as f:\n                f.write(\"content\")\n        \n        # Filter by extension\n        result = [f for f in os.listdir(temp_dir) if f.endswith(file_extension)]\n        assert len(result) == expected_count\n\ndef test_mock_basic_operations():\n    \"\"\"Test basic mock operations\"\"\"\n    mock_file = Mock()\n    mock_file.read.return_value = \"mock content\"\n    mock_file.write.return_value = None\n    \n    # Test mock behavior\n    assert mock_file.read() == \"mock content\"\n    mock_file.write(\"test\")\n    mock_file.write.assert_called_once_with(\"test\")\n\ndef test_patch_basic():\n    \"\"\"Test basic patch functionality\"\"\"\n    with patch('os.path.exists', return_value=False):\n        assert not os.path.exists(\"/any/path\")\n    \n    with patch('os.path.exists', return_value=True):\n        assert os.path.exists(\"/any/path\") "
  },
  {
    "path": "src/ebook_mcp/tests/test_epub_chapter_extraction.py",
    "content": "import pytest\nimport tempfile\nimport os\nfrom unittest.mock import Mock, patch, MagicMock\n\n# Add project root to path\nimport sys\nsys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))\n\n# Skip tests if dependencies are not available\ntry:\n    from ebooklib import epub\n    from bs4 import BeautifulSoup\n    import html2text\n    DEPENDENCIES_AVAILABLE = True\nexcept ImportError:\n    DEPENDENCIES_AVAILABLE = False\n\nif DEPENDENCIES_AVAILABLE:\n    from ebook_mcp.tools.epub_helper import (\n        extract_chapter_html,\n        extract_chapter_markdown,\n        clean_html,\n        convert_html_to_markdown\n    )\n\n\nclass TestExtractChapterHtml:\n    \"\"\"Test the improved version of extract_chapter_html function\"\"\"\n    \n    @pytest.mark.skipif(not DEPENDENCIES_AVAILABLE, reason=\"Dependencies not available\")\n    def test_simple_chapter_extraction(self):\n        \"\"\"Test simple chapter extraction without subchapters\"\"\"\n        # Mock EPUB book\n        mock_book = Mock()\n        \n        # Mock TOC structure\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        \n        mock_chapter2 = Mock()\n        mock_chapter2.title = \"Chapter 2\"\n        mock_chapter2.href = \"chapter2.xhtml\"\n        \n        mock_book.toc = [mock_chapter1, mock_chapter2]\n        \n        # Mock HTML content\n        html_content = \"\"\"\n        <html>\n            <body>\n                <h1 id=\"chapter1\">Chapter 1</h1>\n                <p>Chapter 1 content</p>\n                \n                <h1 id=\"chapter2\">Chapter 2</h1>\n                <p>Chapter 2 content</p>\n            </body>\n        </html>\n        \"\"\"\n        \n        # Mock book.get_item_with_href\n        mock_item = Mock()\n        mock_item.get_content.return_value = html_content.encode('utf-8')\n        mock_book.get_item_with_href.return_value = mock_item\n        \n        # Test extracting Chapter 1 - use the chapter ID that exists in TOC\n        result = extract_chapter_markdown(mock_book, \"chapter1.xhtml\")\n        \n        # Should include Chapter 1 content but not Chapter 2\n        assert \"Chapter 1 content\" in result\n        assert \"Chapter 2 content\" not in result\n    \n    @pytest.mark.skipif(not DEPENDENCIES_AVAILABLE, reason=\"Dependencies not available\")\n    def test_chapter_with_subchapters_bug_case(self):\n        \"\"\"Test the specific bug case: chapter with subchapters causing premature truncation\"\"\"\n        # Mock EPUB book\n        mock_book = Mock()\n        \n        # Mock TOC structure with subchapters (the problematic case)\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        \n        mock_subchapter1_3 = Mock()\n        mock_subchapter1_3.title = \"1.3 Append-only\"\n        mock_subchapter1_3.href = \"chapter1.xhtml#section1_3\"\n        \n        mock_subchapter1_4 = Mock()\n        mock_subchapter1_4.title = \"1.4 Another Section\"\n        mock_subchapter1_4.href = \"chapter1.xhtml#section1_4\"\n        \n        mock_chapter2 = Mock()\n        mock_chapter2.title = \"Chapter 2\"\n        mock_chapter2.href = \"chapter2.xhtml\"\n        \n        # Set up nested TOC\n        mock_book.toc = [\n            (mock_chapter1, [mock_subchapter1_3, mock_subchapter1_4]),\n            mock_chapter2\n        ]\n        \n        # Mock HTML content that matches the bug report\n        html_content = \"\"\"\n        <html>\n            <body>\n                <h1 id=\"chapter1\">Chapter 1</h1>\n                <p>Chapter 1 introduction</p>\n                \n                <h2 id=\"section1_3\">1.3 Append-only</h2>\n                <h3 id=\"subsection\">Safe incremental updates with logs</h3>\n                <p>One way to do incremental updates is to just append the updates to a file. \n                This is called a \"log\" because it's append-only. It's safer than in-place updates \n                because no data is overwritten; you can always recover the old data after a crash.</p>\n                \n                <h2 id=\"section1_4\">1.4 Another Section</h2>\n                <p>Another section content</p>\n                \n                <h1 id=\"chapter2\">Chapter 2</h1>\n                <p>Chapter 2 content</p>\n            </body>\n        </html>\n        \"\"\"\n        \n        # Mock book.get_item_with_href\n        mock_item = Mock()\n        mock_item.get_content.return_value = html_content.encode('utf-8')\n        mock_book.get_item_with_href.return_value = mock_item\n        \n        # Test extracting section1_3 (the problematic case)\n        result = extract_chapter_html(mock_book, \"chapter1.xhtml#section1_3\")\n        \n        # Should include the full section content\n        assert \"1.3 Append-only\" in result\n        assert \"Safe incremental updates with logs\" in result\n        assert \"One way to do incremental updates\" in result\n        assert \"This is called a \\\"log\\\" because it's append-only\" in result\n        \n        # Should NOT include content from other sections\n        assert \"1.4 Another Section\" not in result\n        assert \"Another section content\" not in result\n        assert \"Chapter 2 content\" not in result\n    \n    @pytest.mark.skipif(not DEPENDENCIES_AVAILABLE, reason=\"Dependencies not available\")\n    def test_comparison_with_original_function(self):\n        \"\"\"Compare the improved function with the original function\"\"\"\n        # Mock EPUB book\n        mock_book = Mock()\n        \n        # Mock TOC structure\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        \n        mock_subchapter1_3 = Mock()\n        mock_subchapter1_3.title = \"1.3 Append-only\"\n        mock_subchapter1_3.href = \"chapter1.xhtml#section1_3\"\n        \n        mock_subchapter1_4 = Mock()\n        mock_subchapter1_4.title = \"1.4 Another Section\"\n        mock_subchapter1_4.href = \"chapter1.xhtml#section1_4\"\n        \n        mock_book.toc = [\n            (mock_chapter1, [mock_subchapter1_3, mock_subchapter1_4])\n        ]\n        \n        # Mock HTML content\n        html_content = \"\"\"\n        <html>\n            <body>\n                <h2 id=\"section1_3\">1.3 Append-only</h2>\n                <p>Section 1.3 content</p>\n                \n                <h2 id=\"section1_4\">1.4 Another Section</h2>\n                <p>Section 1.4 content</p>\n            </body>\n        </html>\n        \"\"\"\n        \n        # Mock book.get_item_with_href\n        mock_item = Mock()\n        mock_item.get_content.return_value = html_content.encode('utf-8')\n        mock_book.get_item_with_href.return_value = mock_item\n        \n        # Test with improved function (should work - return full content)\n        improved_result = extract_chapter_html(mock_book, \"chapter1.xhtml#section1_3\")\n        \n        # The improved function should return the full content\n        assert \"Section 1.3 content\" in improved_result\n        assert \"Section 1.4 content\" not in improved_result\n    \n    @pytest.mark.skipif(not DEPENDENCIES_AVAILABLE, reason=\"Dependencies not available\")\n    def test_markdown_conversion(self):\n        \"\"\"Test the fixed markdown conversion function\"\"\"\n        # Mock EPUB book\n        mock_book = Mock()\n        \n        # Mock TOC structure\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        \n        mock_subchapter1_3 = Mock()\n        mock_subchapter1_3.title = \"1.3 Append-only\"\n        mock_subchapter1_3.href = \"chapter1.xhtml#section1_3\"\n        \n        mock_book.toc = [\n            (mock_chapter1, [mock_subchapter1_3])\n        ]\n        \n        # Mock HTML content\n        html_content = \"\"\"\n        <html>\n            <body>\n                <h2 id=\"section1_3\">1.3 Append-only</h2>\n                <p>This is <strong>bold</strong> content.</p>\n            </body>\n        </html>\n        \"\"\"\n        \n        # Mock book.get_item_with_href\n        mock_item = Mock()\n        mock_item.get_content.return_value = html_content.encode('utf-8')\n        mock_book.get_item_with_href.return_value = mock_item\n        \n        # Test markdown conversion\n        result = extract_chapter_markdown(mock_book, \"chapter1.xhtml#section1_3\")\n        \n        # Should convert to markdown format\n        assert \"1.3 Append-only\" in result\n        assert \"bold\" in result\n    \n    @pytest.mark.skipif(not DEPENDENCIES_AVAILABLE, reason=\"Dependencies not available\")\n    def test_edge_cases(self):\n        \"\"\"Test edge cases and error conditions\"\"\"\n        # Mock EPUB book\n        mock_book = Mock()\n        mock_book.toc = []\n        \n        # Test with non-existent chapter\n        from ebook_mcp.tools.epub_helper import EpubProcessingError\n        with pytest.raises(EpubProcessingError, match=\"not found in TOC\"):\n            extract_chapter_html(mock_book, \"nonexistent.xhtml\")\n        \n        # Test with non-existent anchor\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        mock_book.toc = [mock_chapter1]\n        \n        mock_item = Mock()\n        mock_item.get_content.return_value = \"<html><body><h1>Test</h1></body></html>\".encode('utf-8')\n        mock_book.get_item_with_href.return_value = mock_item\n        \n        with pytest.raises(EpubProcessingError, match=\"not found in\"):\n            extract_chapter_html(mock_book, \"chapter1.xhtml#nonexistent\")\n    \n    @pytest.mark.skipif(not DEPENDENCIES_AVAILABLE, reason=\"Dependencies not available\")\n    def test_last_chapter_extraction(self):\n        \"\"\"Test extracting the last chapter (no next chapter)\"\"\"\n        # Mock EPUB book\n        mock_book = Mock()\n        \n        # Mock TOC structure\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        \n        mock_chapter2 = Mock()\n        mock_chapter2.title = \"Chapter 2\"\n        mock_chapter2.href = \"chapter2.xhtml\"\n        \n        mock_book.toc = [mock_chapter1, mock_chapter2]\n        \n        # Mock HTML content\n        html_content = \"\"\"\n        <html>\n            <body>\n                <h1 id=\"chapter2\">Chapter 2</h1>\n                <p>Chapter 2 content</p>\n                <p>More content</p>\n            </body>\n        </html>\n        \"\"\"\n        \n        # Mock book.get_item_with_href\n        mock_item = Mock()\n        mock_item.get_content.return_value = html_content.encode('utf-8')\n        mock_book.get_item_with_href.return_value = mock_item\n        \n        # Test extracting the last chapter\n        result = extract_chapter_html(mock_book, \"chapter2.xhtml\")\n        \n        # Should include all content (no next chapter to truncate at)\n        assert \"Chapter 2 content\" in result\n        assert \"More content\" in result\n    \n    @pytest.mark.skipif(not DEPENDENCIES_AVAILABLE, reason=\"Dependencies not available\")\n    def test_complex_nested_toc(self):\n        \"\"\"Test with complex nested TOC structure\"\"\"\n        # Mock EPUB book\n        mock_book = Mock()\n        \n        # Mock complex TOC structure\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        \n        mock_subchapter1_1 = Mock()\n        mock_subchapter1_1.title = \"1.1 Introduction\"\n        mock_subchapter1_1.href = \"chapter1.xhtml#intro\"\n        \n        mock_subchapter1_2 = Mock()\n        mock_subchapter1_2.title = \"1.2 Background\"\n        mock_subchapter1_2.href = \"chapter1.xhtml#background\"\n        \n        mock_subchapter1_3 = Mock()\n        mock_subchapter1_3.title = \"1.3 Append-only\"\n        mock_subchapter1_3.href = \"chapter1.xhtml#section1_3\"\n        \n        mock_chapter2 = Mock()\n        mock_chapter2.title = \"Chapter 2\"\n        mock_chapter2.href = \"chapter2.xhtml\"\n        \n        # Set up nested TOC\n        mock_book.toc = [\n            (mock_chapter1, [mock_subchapter1_1, mock_subchapter1_2, mock_subchapter1_3]),\n            mock_chapter2\n        ]\n        \n        # Mock HTML content\n        html_content = \"\"\"\n        <html>\n            <body>\n                <h1 id=\"chapter1\">Chapter 1</h1>\n                \n                <h2 id=\"intro\">1.1 Introduction</h2>\n                <p>Introduction content</p>\n                \n                <h2 id=\"background\">1.2 Background</h2>\n                <p>Background content</p>\n                \n                <h2 id=\"section1_3\">1.3 Append-only</h2>\n                <p>Section 1.3 content</p>\n                <p>More content in section 1.3</p>\n                \n                <h1 id=\"chapter2\">Chapter 2</h1>\n                <p>Chapter 2 content</p>\n            </body>\n        </html>\n        \"\"\"\n        \n        # Mock book.get_item_with_href\n        mock_item = Mock()\n        mock_item.get_content.return_value = html_content.encode('utf-8')\n        mock_book.get_item_with_href.return_value = mock_item\n        \n        # Test extracting section1_3\n        result = extract_chapter_html(mock_book, \"chapter1.xhtml#section1_3\")\n        \n        # Should include section 1.3 content\n        assert \"1.3 Append-only\" in result\n        assert \"Section 1.3 content\" in result\n        assert \"More content in section 1.3\" in result\n        \n        # Should NOT include content from other sections\n        assert \"1.1 Introduction\" not in result\n        assert \"Introduction content\" not in result\n        assert \"1.2 Background\" not in result\n        assert \"Background content\" not in result\n        assert \"Chapter 2 content\" not in result "
  },
  {
    "path": "src/ebook_mcp/tests/test_epub_helper.py",
    "content": "import pytest\nimport os\nimport tempfile\nfrom unittest.mock import Mock, patch, MagicMock\n\n# Mock external dependencies\nimport sys\nsys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))\n\n# Mock ebooklib\ntry:\n    from ebooklib import epub\nexcept ImportError:\n    epub = Mock()\n\n# Mock BeautifulSoup\ntry:\n    from bs4 import BeautifulSoup\nexcept ImportError:\n    BeautifulSoup = Mock()\n\nfrom ebook_mcp.tools.epub_helper import (\n    get_all_epub_files,\n    get_meta,\n    get_toc,\n    read_epub,\n    flatten_toc,\n    extract_chapter_html,\n    extract_chapter_plain_text,\n\n    convert_html_to_markdown,\n    clean_html\n)\n\n\nclass TestEpubHelper:\n    \"\"\"Test EPUB helper functions\"\"\"\n    \n    def test_get_all_epub_files_empty_directory(self):\n        \"\"\"Test get_all_epub_files with empty directory\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            result = get_all_epub_files(temp_dir)\n            assert result == []\n    \n    def test_get_all_epub_files_with_epub_files(self):\n        \"\"\"Test get_all_epub_files with EPUB files present\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            # Create mock EPUB files\n            epub_files = [\"book1.epub\", \"book2.epub\", \"document.txt\"]\n            for file in epub_files:\n                with open(os.path.join(temp_dir, file), 'w') as f:\n                    f.write(\"mock content\")\n            \n            result = get_all_epub_files(temp_dir)\n            assert set(result) == {\"book1.epub\", \"book2.epub\"}\n    \n    @patch('ebook_mcp.tools.epub_helper.epub.read_epub')\n    def test_get_toc_success(self, mock_read_epub):\n        \"\"\"Test get_toc successful case\"\"\"\n        # Mock EPUB book with TOC\n        mock_book = Mock()\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        mock_chapter2 = Mock()\n        mock_chapter2.title = \"Chapter 2\"\n        mock_chapter2.href = \"chapter2.xhtml\"\n        \n        mock_book.toc = [mock_chapter1, mock_chapter2]\n        mock_read_epub.return_value = mock_book\n        \n        with tempfile.NamedTemporaryFile(suffix='.epub', delete=False) as f:\n            f.write(b\"mock epub content\")\n            epub_path = f.name\n        \n        try:\n            result = get_toc(epub_path)\n            expected = [\n                (\"Chapter 1\", \"chapter1.xhtml\"),\n                (\"Chapter 2\", \"chapter2.xhtml\")\n            ]\n            assert result == expected\n        finally:\n            os.unlink(epub_path)\n    \n    @patch('ebook_mcp.tools.epub_helper.epub.read_epub')\n    def test_get_toc_nested_structure(self, mock_read_epub):\n        \"\"\"Test get_toc with nested TOC structure\"\"\"\n        # Mock EPUB book with nested TOC\n        mock_book = Mock()\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        mock_subchapter1 = Mock()\n        mock_subchapter1.title = \"Subchapter 1.1\"\n        mock_subchapter1.href = \"subchapter1.1.xhtml\"\n        \n        mock_book.toc = [(mock_chapter1, [mock_subchapter1])]\n        mock_read_epub.return_value = mock_book\n        \n        with tempfile.NamedTemporaryFile(suffix='.epub', delete=False) as f:\n            f.write(b\"mock epub content\")\n            epub_path = f.name\n        \n        try:\n            result = get_toc(epub_path)\n            expected = [\n                (\"Chapter 1\", \"chapter1.xhtml\"),\n                (\"Subchapter 1.1\", \"subchapter1.1.xhtml\")\n            ]\n            assert result == expected\n        finally:\n            os.unlink(epub_path)\n    \n    def test_get_toc_file_not_found(self):\n        \"\"\"Test get_toc with non-existent file\"\"\"\n        with pytest.raises(FileNotFoundError):\n            get_toc(\"/path/to/nonexistent.epub\")\n    \n    @patch('ebook_mcp.tools.epub_helper.epub.read_epub')\n    def test_get_toc_parsing_error(self, mock_read_epub):\n        \"\"\"Test get_toc with parsing error\"\"\"\n        mock_read_epub.side_effect = Exception(\"EPUB parsing error\")\n        \n        with tempfile.NamedTemporaryFile(suffix='.epub', delete=False) as f:\n            f.write(b\"mock epub content\")\n            epub_path = f.name\n        \n        try:\n            with pytest.raises(Exception):\n                get_toc(epub_path)\n        finally:\n            os.unlink(epub_path)\n    \n    @patch('ebook_mcp.tools.epub_helper.epub.read_epub')\n    def test_get_meta_success(self, mock_read_epub):\n        \"\"\"Test get_meta successful case\"\"\"\n        # Mock EPUB book with metadata\n        mock_book = Mock()\n        \n        # 设置 get_metadata 方法返回正确的格式\n        def mock_get_metadata(namespace, field):\n            metadata_map = {\n                'title': [('Test Book', {})],\n                'creator': [('Test Author', {})],\n                'language': [('en', {})],\n                'identifier': [('test-id', {})],\n                'date': [('2023-01-01', {})],\n                'publisher': [('Test Publisher', {})],\n                'description': [('Test description', {})]\n            }\n            return metadata_map.get(field, [])\n        \n        mock_book.get_metadata = mock_get_metadata\n        mock_read_epub.return_value = mock_book\n        \n        with tempfile.NamedTemporaryFile(suffix='.epub', delete=False) as f:\n            f.write(b\"mock epub content\")\n            epub_path = f.name\n        \n        try:\n            result = get_meta(epub_path)\n            expected = {\n                'title': 'Test Book',\n                'creator': ['Test Author'],\n                'language': 'en',\n                'identifier': 'test-id',\n                'date': '2023-01-01',\n                'publisher': 'Test Publisher',\n                'description': 'Test description'\n            }\n            assert result == expected\n        finally:\n            os.unlink(epub_path)\n    \n    def test_get_meta_file_not_found(self):\n        \"\"\"Test get_meta with non-existent file\"\"\"\n        with pytest.raises(FileNotFoundError):\n            get_meta(\"/path/to/nonexistent.epub\")\n    \n    @patch('ebook_mcp.tools.epub_helper.epub.read_epub')\n    def test_get_meta_parsing_error(self, mock_read_epub):\n        \"\"\"Test get_meta with parsing error\"\"\"\n        mock_read_epub.side_effect = Exception(\"EPUB parsing error\")\n        \n        with tempfile.NamedTemporaryFile(suffix='.epub', delete=False) as f:\n            f.write(b\"mock epub content\")\n            epub_path = f.name\n        \n        try:\n            with pytest.raises(Exception):\n                get_meta(epub_path)\n        finally:\n            os.unlink(epub_path)\n    \n    @patch('ebook_mcp.tools.epub_helper.epub.read_epub')\n    def test_read_epub_success(self, mock_read_epub):\n        \"\"\"Test read_epub successful case\"\"\"\n        mock_book = Mock()\n        mock_read_epub.return_value = mock_book\n        \n        with tempfile.NamedTemporaryFile(suffix='.epub', delete=False) as f:\n            f.write(b\"mock epub content\")\n            epub_path = f.name\n        \n        try:\n            result = read_epub(epub_path)\n            assert result == mock_book\n            mock_read_epub.assert_called_once_with(epub_path)\n        finally:\n            os.unlink(epub_path)\n    \n    def test_flatten_toc_simple(self):\n        \"\"\"Test flatten_toc with simple TOC structure\"\"\"\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        mock_chapter2 = Mock()\n        mock_chapter2.title = \"Chapter 2\"\n        mock_chapter2.href = \"chapter2.xhtml\"\n        \n        toc = [mock_chapter1, mock_chapter2]\n        mock_book = Mock()\n        mock_book.toc = toc\n        result = flatten_toc(mock_book)\n        \n        expected = [\"chapter1.xhtml\", \"chapter2.xhtml\"]\n        assert result == expected\n    \n    def test_flatten_toc_nested(self):\n        \"\"\"Test flatten_toc with nested TOC structure\"\"\"\n        mock_chapter1 = Mock()\n        mock_chapter1.title = \"Chapter 1\"\n        mock_chapter1.href = \"chapter1.xhtml\"\n        mock_subchapter1 = Mock()\n        mock_subchapter1.title = \"Subchapter 1.1\"\n        mock_subchapter1.href = \"subchapter1.1.xhtml\"\n        \n        toc = [(mock_chapter1, [mock_subchapter1])]\n        mock_book = Mock()\n        mock_book.toc = toc\n        result = flatten_toc(mock_book)\n        \n        expected = [\"chapter1.xhtml\", \"subchapter1.1.xhtml\"]\n        assert result == expected\n    \n    def test_clean_html(self):\n        \"\"\"Test clean_html function\"\"\"\n        html_content = \"\"\"\n        <html>\n            <head><title>Test</title></head>\n            <body>\n                <h1>Title</h1>\n                <p>Content</p>\n                <!-- Comment -->\n                <script>alert('test');</script>\n            </body>\n        </html>\n        \"\"\"\n        \n        result = clean_html(html_content)\n        \n        # Should remove comments and scripts\n        assert \"<!-- Comment -->\" not in result\n        assert \"<script>\" not in result\n        assert \"alert('test');\" not in result\n        # Should keep content\n        assert \"<h1>Title</h1>\" in result\n        assert \"<p>Content</p>\" in result\n    \n    def test_convert_html_to_markdown(self):\n        \"\"\"Test convert_html_to_markdown function\"\"\"\n        html_content = \"<h1>Title</h1><p>This is <strong>bold</strong> text.</p>\"\n        \n        result = convert_html_to_markdown(html_content)\n        \n        # Should convert HTML to markdown\n        assert \"# Title\" in result\n        assert \"**bold**\" in result\n    \n    @patch('ebook_mcp.tools.epub_helper.extract_chapter_html')\n    def test_extract_chapter_plain_text(self, mock_extract_html):\n        \"\"\"Test extract_chapter_plain_text function\"\"\"\n        mock_extract_html.return_value = \"<h1>Title</h1><p>Content</p>\"\n        \n        mock_book = Mock()\n        result = extract_chapter_plain_text(mock_book, \"chapter1\")\n        \n        mock_extract_html.assert_called_once_with(mock_book, \"chapter1\")\n        # Should return plain text (HTML tags removed)\n        assert \"<h1>\" not in result\n        assert \"<p>\" not in result\n        assert \"Title\" in result\n        assert \"Content\" in result\n    \n "
  },
  {
    "path": "src/ebook_mcp/tests/test_logger_config.py",
    "content": "import pytest\nimport json\nimport os\nimport sys\nimport tempfile\nimport logging\nfrom unittest.mock import patch, MagicMock\nfrom ebook_mcp.tools.logger_config import (\n    StructuredFormatter, \n    StructuredLogger, \n    setup_logger, \n    get_logger, \n    log_operation\n)\n\nclass TestStructuredFormatter:\n    \"\"\"Test the structured formatter\"\"\"\n    \n    def test_format_basic_log(self):\n        \"\"\"Test basic log formatting\"\"\"\n        formatter = StructuredFormatter()\n        record = logging.LogRecord(\n            name=\"test_logger\",\n            level=logging.INFO,\n            pathname=\"test.py\",\n            lineno=10,\n            msg=\"Test message\",\n            args=(),\n            exc_info=None\n        )\n        \n        result = formatter.format(record)\n        log_entry = json.loads(result)\n        \n        assert log_entry[\"level\"] == \"INFO\"\n        assert log_entry[\"logger\"] == \"test_logger\"\n        assert log_entry[\"message\"] == \"Test message\"\n        assert log_entry[\"module\"] == \"test\"\n        # function name might be None in test environment\n        assert log_entry[\"function\"] in [\"<module>\", None]\n        assert log_entry[\"line\"] == 10\n        assert \"timestamp\" in log_entry\n    \n    def test_format_with_extra_fields(self):\n        \"\"\"Test formatting with extra context fields\"\"\"\n        formatter = StructuredFormatter()\n        record = logging.LogRecord(\n            name=\"test_logger\",\n            level=logging.ERROR,\n            pathname=\"test.py\",\n            lineno=20,\n            msg=\"Error message\",\n            args=(),\n            exc_info=None\n        )\n        \n        # Add extra fields\n        record.file_path = \"/test/file.epub\"\n        record.operation = \"metadata_extraction\"\n        record.duration_ms = 150.5\n        record.error_type = \"FileNotFoundError\"\n        \n        result = formatter.format(record)\n        log_entry = json.loads(result)\n        \n        assert log_entry[\"file_path\"] == \"/test/file.epub\"\n        assert log_entry[\"operation\"] == \"metadata_extraction\"\n        assert log_entry[\"duration_ms\"] == 150.5\n        assert log_entry[\"error_type\"] == \"FileNotFoundError\"\n    \n    def test_format_with_exception(self):\n        \"\"\"Test formatting with exception info\"\"\"\n        formatter = StructuredFormatter()\n        \n        try:\n            raise ValueError(\"Test exception\")\n        except ValueError:\n            record = logging.LogRecord(\n                name=\"test_logger\",\n                level=logging.ERROR,\n                pathname=\"test.py\",\n                lineno=30,\n                msg=\"Exception occurred\",\n                args=(),\n                exc_info=sys.exc_info()\n            )\n        \n        result = formatter.format(record)\n        log_entry = json.loads(result)\n        \n        assert \"exception\" in log_entry\n        assert log_entry[\"exception\"][\"type\"] == \"ValueError\"\n        assert log_entry[\"exception\"][\"message\"] == \"Test exception\"\n        assert \"traceback\" in log_entry[\"exception\"]\n\nclass TestStructuredLogger:\n    \"\"\"Test the structured logger\"\"\"\n    \n    def test_logger_creation(self):\n        \"\"\"Test logger creation\"\"\"\n        logger = StructuredLogger(\"test_module\")\n        assert logger.name == \"test_module\"\n        assert isinstance(logger.logger, logging.Logger)\n    \n    def test_log_with_context(self):\n        \"\"\"Test logging with context fields\"\"\"\n        # Test that StructuredLogger can be created and has the expected interface\n        logger = StructuredLogger(\"test_module\")\n        assert logger.name == \"test_module\"\n        assert hasattr(logger, 'info')\n        assert hasattr(logger, 'debug')\n        assert hasattr(logger, 'warning')\n        assert hasattr(logger, 'error')\n        assert hasattr(logger, 'critical')\n\nclass TestLogOperationDecorator:\n    \"\"\"Test the log_operation decorator\"\"\"\n    \n    def test_log_operation_success(self):\n        \"\"\"Test successful operation logging\"\"\"\n        @log_operation(\"test_operation\")\n        def test_function():\n            return \"success\"\n        \n        result = test_function()\n        assert result == \"success\"\n    \n    def test_log_operation_failure(self):\n        \"\"\"Test failed operation logging\"\"\"\n        @log_operation(\"test_operation\")\n        def test_function():\n            raise ValueError(\"Test error\")\n        \n        with pytest.raises(ValueError, match=\"Test error\"):\n            test_function()\n\nclass TestSetupLogger:\n    \"\"\"Test logger setup\"\"\"\n    \n    def test_setup_logger_creates_directory(self):\n        \"\"\"Test that setup_logger creates logs directory\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            with patch('ebook_mcp.tools.logger_config.os.path.dirname', return_value=temp_dir):\n                with patch('ebook_mcp.tools.logger_config.os.makedirs') as mock_makedirs:\n                    with patch('ebook_mcp.tools.logger_config.logging.FileHandler') as mock_file_handler:\n                        mock_handler = MagicMock()\n                        mock_file_handler.return_value = mock_handler\n                        setup_logger()\n                        mock_makedirs.assert_called_once()\n    \n    def test_setup_logger_configures_handlers(self):\n        \"\"\"Test that setup_logger configures handlers correctly\"\"\"\n        with patch('ebook_mcp.tools.logger_config.logging.getLogger') as mock_get_logger:\n            mock_logger = MagicMock()\n            mock_get_logger.return_value = mock_logger\n            \n            setup_logger()\n            \n            # Verify handlers were added\n            mock_logger.addHandler.assert_called()\n            assert mock_logger.addHandler.call_count == 2  # file and console handlers\n\nif __name__ == \"__main__\":\n    # Import sys for exception testing\n    import sys\n    pytest.main([__file__])\n"
  },
  {
    "path": "src/ebook_mcp/tests/test_main.py",
    "content": "import pytest\nimport os\nimport tempfile\nfrom unittest.mock import Mock, patch, MagicMock\nfrom typing import List, Dict, Union, Tuple\n\n# Mock external dependencies before importing main\nimport sys\nsys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))\n\n# Mock mcp.server.fastmcp\ntry:\n    import mcp.server.fastmcp\nexcept ImportError:\n    sys.modules['mcp.server.fastmcp'] = Mock()\n    sys.modules['mcp'] = Mock()\n    sys.modules['mcp.server'] = Mock()\n\n# Import the functions to test\nfrom ebook_mcp.main import (\n    get_all_epub_files,\n    get_epub_metadata,\n    get_epub_toc,\n    get_all_pdf_files,\n    get_pdf_metadata,\n    get_pdf_toc,\n    get_pdf_page_text,\n    get_pdf_page_markdown,\n    get_pdf_chapter_content\n)\n\n\nclass TestEpubFunctions:\n    \"\"\"Test EPUB related functions\"\"\"\n    \n    def test_get_all_epub_files_empty_directory(self):\n        \"\"\"Test get_all_epub_files with empty directory\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            result = get_all_epub_files(temp_dir)\n            assert result == []\n    \n    def test_get_all_epub_files_with_epub_files(self):\n        \"\"\"Test get_all_epub_files with EPUB files present\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            # Create mock EPUB files\n            epub_files = [\"book1.epub\", \"book2.epub\", \"document.txt\"]\n            for file in epub_files:\n                with open(os.path.join(temp_dir, file), 'w') as f:\n                    f.write(\"mock content\")\n            \n            result = get_all_epub_files(temp_dir)\n            assert set(result) == {\"book1.epub\", \"book2.epub\"}\n    \n    @patch('ebook_mcp.main.epub_helper.get_meta')\n    def test_get_epub_metadata_success(self, mock_get_meta):\n        \"\"\"Test get_epub_metadata successful case\"\"\"\n        mock_metadata = {\n            'title': 'Test Book',\n            'author': 'Test Author',\n            'language': 'en'\n        }\n        mock_get_meta.return_value = mock_metadata\n        \n        result = get_epub_metadata(\"/path/to/test.epub\")\n        assert result == mock_metadata\n        mock_get_meta.assert_called_once_with(\"/path/to/test.epub\")\n    \n    @patch('ebook_mcp.main.epub_helper.get_meta')\n    def test_get_epub_metadata_file_not_found(self, mock_get_meta):\n        \"\"\"Test get_epub_metadata with file not found\"\"\"\n        mock_get_meta.side_effect = FileNotFoundError(\"File not found\")\n        \n        with pytest.raises(FileNotFoundError):\n            get_epub_metadata(\"/path/to/nonexistent.epub\")\n    \n    @patch('ebook_mcp.main.epub_helper.get_meta')\n    def test_get_epub_metadata_parsing_error(self, mock_get_meta):\n        \"\"\"Test get_epub_metadata with parsing error\"\"\"\n        mock_get_meta.side_effect = Exception(\"Parsing error\")\n        \n        with pytest.raises(Exception):\n            get_epub_metadata(\"/path/to/corrupted.epub\")\n    \n    @patch('ebook_mcp.main.epub_helper.get_toc')\n    def test_get_epub_toc_success(self, mock_get_toc):\n        \"\"\"Test get_epub_toc successful case\"\"\"\n        mock_toc = [\n            (\"Chapter 1\", \"chapter1.xhtml\"),\n            (\"Chapter 2\", \"chapter2.xhtml\")\n        ]\n        mock_get_toc.return_value = mock_toc\n        \n        result = get_epub_toc(\"/path/to/test.epub\")\n        assert result == mock_toc\n        mock_get_toc.assert_called_once_with(\"/path/to/test.epub\")\n    \n    @patch('ebook_mcp.main.epub_helper.get_toc')\n    def test_get_epub_toc_file_not_found(self, mock_get_toc):\n        \"\"\"Test get_epub_toc with file not found\"\"\"\n        mock_get_toc.side_effect = FileNotFoundError(\"File not found\")\n        \n        with pytest.raises(FileNotFoundError):\n            get_epub_toc(\"/path/to/nonexistent.epub\")\n\n\nclass TestPdfFunctions:\n    \"\"\"Test PDF related functions\"\"\"\n    \n    def test_get_all_pdf_files_empty_directory(self):\n        \"\"\"Test get_all_pdf_files with empty directory\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            result = get_all_pdf_files(temp_dir)\n            assert result == []\n    \n    def test_get_all_pdf_files_with_pdf_files(self):\n        \"\"\"Test get_all_pdf_files with PDF files present\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            # Create mock PDF files\n            pdf_files = [\"document1.pdf\", \"document2.pdf\", \"text.txt\"]\n            for file in pdf_files:\n                with open(os.path.join(temp_dir, file), 'w') as f:\n                    f.write(\"mock content\")\n            \n            result = get_all_pdf_files(temp_dir)\n            assert set(result) == {\"document1.pdf\", \"document2.pdf\"}\n    \n    @patch('ebook_mcp.main.pdf_helper.get_meta')\n    def test_get_pdf_metadata_success(self, mock_get_meta):\n        \"\"\"Test get_pdf_metadata successful case\"\"\"\n        mock_metadata = {\n            'title': 'Test PDF',\n            'author': 'Test Author',\n            'pages': 10\n        }\n        mock_get_meta.return_value = mock_metadata\n        \n        result = get_pdf_metadata(\"/path/to/test.pdf\")\n        assert result == mock_metadata\n        mock_get_meta.assert_called_once_with(\"/path/to/test.pdf\")\n    \n    @patch('ebook_mcp.main.pdf_helper.get_meta')\n    def test_get_pdf_metadata_file_not_found(self, mock_get_meta):\n        \"\"\"Test get_pdf_metadata with file not found\"\"\"\n        mock_get_meta.side_effect = FileNotFoundError(\"File not found\")\n        \n        with pytest.raises(FileNotFoundError):\n            get_pdf_metadata(\"/path/to/nonexistent.pdf\")\n    \n    @patch('ebook_mcp.main.pdf_helper.get_meta')\n    def test_get_pdf_metadata_parsing_error(self, mock_get_meta):\n        \"\"\"Test get_pdf_metadata with parsing error\"\"\"\n        mock_get_meta.side_effect = Exception(\"Parsing error\")\n        \n        with pytest.raises(Exception):\n            get_pdf_metadata(\"/path/to/corrupted.pdf\")\n    \n    @patch('ebook_mcp.main.pdf_helper.get_toc')\n    def test_get_pdf_toc_success(self, mock_get_toc):\n        \"\"\"Test get_pdf_toc successful case\"\"\"\n        mock_toc = [\n            (\"Chapter 1\", 1),\n            (\"Chapter 2\", 5)\n        ]\n        mock_get_toc.return_value = mock_toc\n        \n        result = get_pdf_toc(\"/path/to/test.pdf\")\n        assert result == mock_toc\n        mock_get_toc.assert_called_once_with(\"/path/to/test.pdf\")\n    \n    @patch('ebook_mcp.main.pdf_helper.get_toc')\n    def test_get_pdf_toc_file_not_found(self, mock_get_toc):\n        \"\"\"Test get_pdf_toc with file not found\"\"\"\n        mock_get_toc.side_effect = FileNotFoundError(\"File not found\")\n        \n        with pytest.raises(FileNotFoundError):\n            get_pdf_toc(\"/path/to/nonexistent.pdf\")\n    \n    @patch('ebook_mcp.main.pdf_helper.extract_page_text')\n    def test_get_pdf_page_text_success(self, mock_extract):\n        \"\"\"Test get_pdf_page_text successful case\"\"\"\n        mock_extract.return_value = \"This is page 1 content.\"\n        \n        result = get_pdf_page_text(\"/path/to/test.pdf\", 1)\n        assert result == \"This is page 1 content.\"\n        mock_extract.assert_called_once_with(\"/path/to/test.pdf\", 1)\n    \n    @patch('ebook_mcp.main.pdf_helper.extract_page_text')\n    def test_get_pdf_page_text_error(self, mock_extract):\n        \"\"\"Test get_pdf_page_text with error\"\"\"\n        mock_extract.side_effect = Exception(\"Extraction error\")\n        \n        with pytest.raises(Exception):\n            get_pdf_page_text(\"/path/to/test.pdf\", 1)\n    \n    @patch('ebook_mcp.main.pdf_helper.extract_page_markdown')\n    def test_get_pdf_page_markdown_success(self, mock_extract):\n        \"\"\"Test get_pdf_page_markdown successful case\"\"\"\n        mock_extract.return_value = \"# Page 1\\n\\nThis is page 1 content.\"\n        \n        result = get_pdf_page_markdown(\"/path/to/test.pdf\", 1)\n        assert result == \"# Page 1\\n\\nThis is page 1 content.\"\n        mock_extract.assert_called_once_with(\"/path/to/test.pdf\", 1)\n    \n    @patch('ebook_mcp.main.pdf_helper.extract_page_markdown')\n    def test_get_pdf_page_markdown_error(self, mock_extract):\n        \"\"\"Test get_pdf_page_markdown with error\"\"\"\n        mock_extract.side_effect = Exception(\"Extraction error\")\n        \n        with pytest.raises(Exception):\n            get_pdf_page_markdown(\"/path/to/test.pdf\", 1)\n    \n    @patch('ebook_mcp.main.pdf_helper.extract_chapter_by_title')\n    def test_get_pdf_chapter_content_success(self, mock_get_chapter):\n        \"\"\"Test get_pdf_chapter_content successful case\"\"\"\n        mock_content = (\"This is chapter content.\", [1, 2, 3])\n        mock_get_chapter.return_value = mock_content\n        \n        result = get_pdf_chapter_content(\"/path/to/test.pdf\", \"Chapter 1\")\n        assert result == mock_content\n        mock_get_chapter.assert_called_once_with(\"/path/to/test.pdf\", \"Chapter 1\")\n    \n    @patch('ebook_mcp.main.pdf_helper.extract_chapter_by_title')\n    def test_get_pdf_chapter_content_error(self, mock_get_chapter):\n        \"\"\"Test get_pdf_chapter_content with error\"\"\"\n        mock_get_chapter.side_effect = Exception(\"Chapter extraction error\")\n        \n        with pytest.raises(Exception):\n            get_pdf_chapter_content(\"/path/to/test.pdf\", \"Chapter 1\")\n\n\nclass TestMainModule:\n    \"\"\"Test main module functionality\"\"\"\n    \n    def test_main_module_imports(self):\n        \"\"\"Test that main module can be imported without errors\"\"\"\n        import ebook_mcp.main\n        assert hasattr(ebook_mcp.main, 'mcp')\n        assert hasattr(ebook_mcp.main, 'get_all_epub_files')\n        assert hasattr(ebook_mcp.main, 'get_all_pdf_files')\n    \n    @pytest.mark.skip(reason=\"Requires actual MCP server environment\")\n    def test_cli_entry_function(self):\n        \"\"\"Test cli_entry function\"\"\"\n        from ebook_mcp.main import cli_entry\n        \n        # Mock the FastMCP instance\n        mock_mcp_instance = Mock()\n        mock_mcp.return_value = mock_mcp_instance\n        \n        cli_entry()\n        \n        mock_mcp_instance.run.assert_called_once_with(transport='stdio') \n\n\nclass TestDecorators:\n    \"\"\"Test the error handling decorators\"\"\"\n    \n    def test_handle_mcp_errors_file_not_found(self):\n        \"\"\"Test handle_mcp_errors decorator with FileNotFoundError\"\"\"\n        from ebook_mcp.main import handle_mcp_errors\n        \n        @handle_mcp_errors\n        def test_function():\n            raise FileNotFoundError(\"Test file not found\")\n        \n        with pytest.raises(FileNotFoundError, match=\"Test file not found\"):\n            test_function()\n    \n    def test_handle_mcp_errors_general_exception(self):\n        \"\"\"Test handle_mcp_errors decorator with general exception\"\"\"\n        from ebook_mcp.main import handle_mcp_errors\n        \n        @handle_mcp_errors\n        def test_function():\n            raise ValueError(\"Test value error\")\n        \n        with pytest.raises(Exception, match=\"Test value error\"):\n            test_function()\n    \n    def test_handle_pdf_errors(self):\n        \"\"\"Test handle_pdf_errors decorator\"\"\"\n        from ebook_mcp.main import handle_pdf_errors\n        \n        @handle_pdf_errors\n        def test_function():\n            raise ValueError(\"Test PDF error\")\n        \n        with pytest.raises(Exception, match=\"Test PDF error\"):\n            test_function()\n    \n    def test_decorator_preserves_return_value(self):\n        \"\"\"Test that decorators preserve return values\"\"\"\n        from ebook_mcp.main import handle_mcp_errors\n        \n        @handle_mcp_errors\n        def test_function():\n            return \"test result\"\n        \n        result = test_function()\n        assert result == \"test result\"\n    \n    def test_handle_mcp_errors_with_custom_exceptions(self):\n        \"\"\"Test handle_mcp_errors decorator with custom exceptions\"\"\"\n        from ebook_mcp.main import handle_mcp_errors\n        from ebook_mcp.tools.epub_helper import EpubProcessingError\n        from ebook_mcp.tools.pdf_helper import PdfProcessingError\n        \n        @handle_mcp_errors\n        def test_epub_function():\n            raise EpubProcessingError(\"Test EPUB error\", \"/test.epub\", \"test_operation\")\n        \n        @handle_mcp_errors\n        def test_pdf_function():\n            raise PdfProcessingError(\"Test PDF error\", \"/test.pdf\", \"test_operation\")\n        \n        # Custom exceptions should be re-raised as-is\n        with pytest.raises(EpubProcessingError, match=\"Test EPUB error\"):\n            test_epub_function()\n        \n        with pytest.raises(PdfProcessingError, match=\"Test PDF error\"):\n            test_pdf_function() "
  },
  {
    "path": "src/ebook_mcp/tests/test_pdf_helper.py",
    "content": "import pytest\nimport os\nimport tempfile\nfrom unittest.mock import Mock, patch, MagicMock\n\n# Mock external dependencies\nimport sys\nsys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))\n\n# Mock PyMuPDF\ntry:\n    import fitz\nexcept ImportError:\n    fitz = Mock()\n\nfrom ebook_mcp.tools.pdf_helper import (\n    get_all_pdf_files,\n    get_meta,\n    get_toc,\n    extract_page_text,\n    extract_page_markdown,\n    extract_chapter_by_title\n)\n\n\nclass TestPdfHelper:\n    \"\"\"Test PDF helper functions\"\"\"\n    \n    def test_get_all_pdf_files_empty_directory(self):\n        \"\"\"Test get_all_pdf_files with empty directory\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            result = get_all_pdf_files(temp_dir)\n            assert result == []\n    \n    def test_get_all_pdf_files_with_pdf_files(self):\n        \"\"\"Test get_all_pdf_files with PDF files present\"\"\"\n        with tempfile.TemporaryDirectory() as temp_dir:\n            # Create mock PDF files\n            pdf_files = [\"document1.pdf\", \"document2.pdf\", \"text.txt\"]\n            for file in pdf_files:\n                with open(os.path.join(temp_dir, file), 'w') as f:\n                    f.write(\"mock content\")\n            \n            result = get_all_pdf_files(temp_dir)\n            assert set(result) == {\"document1.pdf\", \"document2.pdf\"}\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_get_meta_success(self, mock_fitz_open):\n        \"\"\"Test get_meta successful case\"\"\"\n        # Mock PyMuPDF document with metadata\n        mock_doc = Mock()\n        mock_doc.metadata = {\n            'title': 'Test PDF',\n            'author': 'Test Author',\n            'subject': 'Test Subject',\n            'creator': 'Test Creator',\n            'producer': 'Test Producer',\n            'creationDate': '2023-01-01',\n            'modDate': '2023-01-02',\n            'keywords': 'test, pdf',\n            'format': 'PDF'\n        }\n        mock_doc.page_count = 3\n        mock_doc.version_major = 1\n        mock_doc.version_minor = 7\n        mock_doc.is_encrypted = False\n        \n        # Mock first page for dimensions\n        mock_page = Mock()\n        mock_rect = Mock()\n        mock_rect.width = 595.0\n        mock_rect.height = 842.0\n        mock_page.rect = mock_rect\n        mock_doc.__getitem__ = Mock(return_value=mock_page)\n        \n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            with patch('os.path.getsize', return_value=1024):\n                result = get_meta(pdf_path)\n                expected_fields = {\n                    'title', 'author', 'subject', 'creator', 'producer',\n                    'creation_date', 'modification_date', 'keywords', 'format',\n                    'pages', 'file_size', 'pdf_version', 'is_encrypted',\n                    'page_width', 'page_height'\n                }\n                assert all(field in result for field in expected_fields)\n                assert result['title'] == 'Test PDF'\n                assert result['author'] == 'Test Author'\n                assert result['pages'] == 3\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_get_meta_no_metadata(self, mock_fitz_open):\n        \"\"\"Test get_meta with no metadata\"\"\"\n        # Mock PyMuPDF document without metadata\n        mock_doc = Mock()\n        mock_doc.metadata = {}\n        mock_doc.page_count = 2\n        mock_doc.version_major = 1\n        mock_doc.version_minor = 4\n        mock_doc.is_encrypted = False\n        \n        # Mock first page for dimensions\n        mock_page = Mock()\n        mock_rect = Mock()\n        mock_rect.width = 595.0\n        mock_rect.height = 842.0\n        mock_page.rect = mock_rect\n        mock_doc.__getitem__ = Mock(return_value=mock_page)\n        \n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            with patch('os.path.getsize', return_value=512):\n                result = get_meta(pdf_path)\n                assert result['pages'] == 2\n                assert result['file_size'] == 512\n                assert result['is_encrypted'] == False\n                assert 'title' not in result\n                assert 'author' not in result\n        finally:\n            os.unlink(pdf_path)\n    \n    def test_get_meta_file_not_found(self):\n        \"\"\"Test get_meta with non-existent file\"\"\"\n        with pytest.raises(FileNotFoundError):\n            get_meta(\"/non/existent/file.pdf\")\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_get_meta_parsing_error(self, mock_fitz_open):\n        \"\"\"Test get_meta with parsing error\"\"\"\n        mock_fitz_open.side_effect = Exception(\"PDF parsing error\")\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            with pytest.raises(Exception, match=\"Failed to parse PDF file\"):\n                get_meta(pdf_path)\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_get_toc_success(self, mock_fitz_open):\n        \"\"\"Test get_toc successful case\"\"\"\n        # Mock PyMuPDF document with TOC\n        mock_doc = Mock()\n        mock_doc.get_toc.return_value = [\n            (1, \"Chapter 1\", 1),\n            (2, \"Section 1.1\", 2),\n            (1, \"Chapter 2\", 5)\n        ]\n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            result = get_toc(pdf_path)\n            expected = [\n                (\"Chapter 1\", 1),\n                (\"Section 1.1\", 2),\n                (\"Chapter 2\", 5)\n            ]\n            assert result == expected\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_get_toc_empty(self, mock_fitz_open):\n        \"\"\"Test get_toc with empty TOC\"\"\"\n        # Mock PyMuPDF document with empty TOC\n        mock_doc = Mock()\n        mock_doc.get_toc.return_value = []\n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            result = get_toc(pdf_path)\n            assert result == []\n        finally:\n            os.unlink(pdf_path)\n    \n    def test_get_toc_file_not_found(self):\n        \"\"\"Test get_toc with non-existent file\"\"\"\n        with pytest.raises(FileNotFoundError):\n            get_toc(\"/non/existent/file.pdf\")\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_get_toc_parsing_error(self, mock_fitz_open):\n        \"\"\"Test get_toc with parsing error\"\"\"\n        mock_fitz_open.side_effect = Exception(\"PDF parsing error\")\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            with pytest.raises(Exception, match=\"Failed to parse PDF file\"):\n                get_toc(pdf_path)\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_extract_page_text_success(self, mock_fitz_open):\n        \"\"\"Test extract_page_text successful case\"\"\"\n        # Mock PyMuPDF document and page\n        mock_doc = Mock()\n        mock_page = Mock()\n        mock_page.get_text.return_value = \"This is page content\"\n        mock_doc.__getitem__ = Mock(return_value=mock_page)\n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            result = extract_page_text(pdf_path, 1)\n            assert result == \"This is page content\"\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_extract_page_text_page_not_found(self, mock_fitz_open):\n        \"\"\"Test extract_page_text with page not found\"\"\"\n        # Mock PyMuPDF document with IndexError\n        mock_doc = Mock()\n        mock_doc.__getitem__ = Mock(side_effect=IndexError(\"Page not found\"))\n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            with pytest.raises(Exception, match=\"Failed to extract page text\"):\n                extract_page_text(pdf_path, 999)\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_extract_page_markdown_success(self, mock_fitz_open):\n        \"\"\"Test extract_page_markdown successful case\"\"\"\n        # Mock PyMuPDF document and page\n        mock_doc = Mock()\n        mock_page = Mock()\n        mock_page.get_text.return_value = {\n            \"blocks\": [\n                {\n                    \"lines\": [\n                        {\n                            \"spans\": [\n                                {\"text\": \"Header\", \"size\": 16, \"flags\": 0},\n                                {\"text\": \"Bold text\", \"size\": 12, \"flags\": 8},\n                                {\"text\": \"Italic text\", \"size\": 12, \"flags\": 2}\n                            ]\n                        }\n                    ]\n                }\n            ]\n        }\n        mock_doc.__getitem__ = Mock(return_value=mock_page)\n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            result = extract_page_markdown(pdf_path, 1)\n            assert \"## Header\" in result\n            assert \"**Bold text**\" in result\n            assert \"*Italic text*\" in result\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_extract_page_markdown_with_formatting(self, mock_fitz_open):\n        \"\"\"Test extract_page_markdown with formatting\"\"\"\n        # Mock PyMuPDF document and page with formatted text\n        mock_doc = Mock()\n        mock_page = Mock()\n        mock_page.get_text.return_value = {\n            \"blocks\": [\n                {\n                    \"lines\": [\n                        {\n                            \"spans\": [\n                                {\"text\": \"Large Title\", \"size\": 18, \"flags\": 0},\n                                {\"text\": \"Normal text\", \"size\": 12, \"flags\": 0}\n                            ]\n                        }\n                    ]\n                }\n            ]\n        }\n        mock_doc.__getitem__ = Mock(return_value=mock_page)\n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            result = extract_page_markdown(pdf_path, 1)\n            assert \"## Large Title\" in result\n            assert \"Normal text\" in result\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_extract_chapter_by_title_success(self, mock_fitz_open):\n        \"\"\"Test extract_chapter_by_title successful case\"\"\"\n        # Mock PyMuPDF document with TOC and pages\n        mock_doc = Mock()\n        mock_doc.get_toc.return_value = [\n            (1, \"Chapter 1\", 1),\n            (1, \"Chapter 2\", 3),\n            (1, \"Chapter 3\", 5)\n        ]\n        mock_doc.page_count = 7\n        \n        # Mock pages\n        mock_page1 = Mock()\n        mock_page1.get_text.return_value = \"Chapter 1 content\"\n        mock_page2 = Mock()\n        mock_page2.get_text.return_value = \"Chapter 2 content\"\n        \n        mock_doc.__getitem__ = Mock(side_effect=lambda x: mock_page1 if x == 0 else mock_page2)\n        \n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            content, pages = extract_chapter_by_title(pdf_path, \"Chapter 1\")\n            assert \"Chapter 1 content\" in content\n            assert \"Chapter 2 content\" in content\n            assert pages == [1, 2]\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_extract_chapter_by_title_chapter_not_found(self, mock_fitz_open):\n        \"\"\"Test extract_chapter_by_title with chapter not found\"\"\"\n        # Mock PyMuPDF document with TOC\n        mock_doc = Mock()\n        mock_doc.get_toc.return_value = [\n            (1, \"Chapter 1\", 1),\n            (1, \"Chapter 2\", 3)\n        ]\n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            with pytest.raises(Exception, match=\"Failed to extract chapter\"):\n                extract_chapter_by_title(pdf_path, \"Non-existent Chapter\")\n        finally:\n            os.unlink(pdf_path)\n    \n    @patch('ebook_mcp.tools.pdf_helper.fitz.open')\n    def test_extract_chapter_by_title_single_page(self, mock_fitz_open):\n        \"\"\"Test extract_chapter_by_title with single page chapter\"\"\"\n        # Mock PyMuPDF document with TOC\n        mock_doc = Mock()\n        mock_doc.get_toc.return_value = [\n            (1, \"Chapter 1\", 1),\n            (1, \"Chapter 2\", 2)\n        ]\n        mock_doc.page_count = 3\n        \n        # Mock page\n        mock_page = Mock()\n        mock_page.get_text.return_value = \"Chapter 1 content\"\n        mock_doc.__getitem__ = Mock(return_value=mock_page)\n        \n        mock_fitz_open.return_value = mock_doc\n        \n        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:\n            f.write(b\"mock pdf content\")\n            pdf_path = f.name\n        \n        try:\n            content, pages = extract_chapter_by_title(pdf_path, \"Chapter 1\")\n            assert \"Chapter 1 content\" in content\n            assert pages == [1]\n        finally:\n            os.unlink(pdf_path)\n"
  },
  {
    "path": "src/ebook_mcp/tools/epub_helper.py",
    "content": "from typing import List, Tuple, Dict, Union, Any, Optional\nimport os\nfrom .logger_config import get_logger, log_operation\n\n# Custom exception classes for better error handling\nclass EpubProcessingError(Exception):\n    \"\"\"Custom exception for EPUB processing errors with detailed context\"\"\"\n    def __init__(self, message: str, file_path: str, operation: str, original_error: Exception = None):\n        self.message = message\n        self.file_path = file_path\n        self.operation = operation\n        self.original_error = original_error\n        super().__init__(f\"{message} (file: {file_path}, operation: {operation})\")\n\nclass PdfProcessingError(Exception):\n    \"\"\"Custom exception for PDF processing errors with detailed context\"\"\"\n    def __init__(self, message: str, file_path: str, operation: str, original_error: Exception = None):\n        self.message = message\n        self.file_path = file_path\n        self.operation = operation\n        self.original_error = original_error\n        super().__init__(f\"{message} (file: {file_path}, operation: {operation})\")\n\n# Try to import optional dependencies\ntry:\n    from ebooklib import epub\n    EBOOKLIB_AVAILABLE = True\nexcept ImportError:\n    epub = None\n    EBOOKLIB_AVAILABLE = False\n\ntry:\n    from bs4 import BeautifulSoup, Comment\n    BEAUTIFULSOUP_AVAILABLE = True\nexcept ImportError:\n    BeautifulSoup = None\n    Comment = None\n    BEAUTIFULSOUP_AVAILABLE = False\n\ntry:\n    import html2text\n    HTML2TEXT_AVAILABLE = True\nexcept ImportError:\n    html2text = None\n    HTML2TEXT_AVAILABLE = False\n\n# Initialize structured logger\nlogger = get_logger(__name__)\n\n\ndef get_all_epub_files(path: str) -> List[str]:\n    \"\"\"\n    Get all EPUB files in the specified path\n    \"\"\"\n    return [f for f in os.listdir(path) if f.endswith('.epub')]\n\n@log_operation(\"epub_toc_extraction\")\ndef get_toc(epub_path: str) -> List[Tuple[str, str]]:\n    \"\"\"\n    Get the Table of Contents (TOC) from an EPUB file\n    \n    Args:\n        epub_path (str): Absolute path to the EPUB file\n        \n    Returns:\n        List[Tuple[str, str]]: List of TOC entries, each entry is a tuple of (title, link)\n        \n    Raises:\n        FileNotFoundError: If the file does not exist\n        Exception: If the file is not a valid EPUB or parsing fails\n    \"\"\"\n    try:\n        if not os.path.exists(epub_path):\n            logger.error(\n                \"EPUB file not found\",\n                file_path=epub_path,\n                operation=\"toc_extraction\"\n            )\n            raise FileNotFoundError(f\"EPUB file not found: {epub_path}\")\n            \n        # Read EPUB file\n        logger.debug(\n            \"Starting EPUB TOC extraction\",\n            file_path=epub_path,\n            operation=\"toc_extraction\"\n        )\n        book = epub.read_epub(epub_path)\n        toc = []\n        \n        # Iterate through TOC items\n        for item in book.toc:\n            # Handle nested TOC structure\n            if isinstance(item, tuple):\n                # item format: (chapter element, list of subchapters)\n                chapter = item[0]\n                toc.append((chapter.title, chapter.href))\n                # Add subchapters\n                for sub_item in item[1]:\n                    if isinstance(sub_item, tuple):\n                        toc.append((sub_item[0].title, sub_item[0].href))\n                    else:\n                        toc.append((sub_item.title, sub_item.href))\n            else:\n                # Single level TOC item\n                toc.append((item.title, item.href))\n        \n        logger.info(\n            \"EPUB TOC extraction completed\",\n            file_path=epub_path,\n            operation=\"toc_extraction\",\n            chapter_count=len(toc)\n        )\n        return toc\n    except FileNotFoundError:\n        raise FileNotFoundError(f\"EPUB file not found: {epub_path}\")\n    except Exception as e:\n        logger.error(\n            \"Failed to parse EPUB file\",\n            file_path=epub_path,\n            operation=\"toc_extraction\",\n            error_type=type(e).__name__,\n            error_details=str(e)\n        )\n        raise EpubProcessingError(\"Failed to parse EPUB file\", epub_path, \"toc_extraction\", e)\n\n@log_operation(\"epub_metadata_extraction\")\ndef get_meta(epub_path: str) -> Dict[str, Union[str, List[str]]]:\n    \"\"\"\n    Get metadata from an EPUB file\n    \n    Args:\n        epub_path (str): Absolute path to the EPUB file\n        \n    Returns:\n        Dict[str, Union[str, List[str]]]: Dictionary containing metadata\n            \n    Raises:\n        FileNotFoundError: If the file does not exist\n        Exception: If the file is not a valid EPUB or parsing fails\n    \"\"\"\n    try:\n        if not os.path.exists(epub_path):\n            logger.error(\n                \"EPUB file not found\",\n                file_path=epub_path,\n                operation=\"metadata_extraction\"\n            )\n            raise FileNotFoundError(f\"EPUB file not found: {epub_path}\")\n            \n        # Read EPUB file\n        logger.debug(\n            \"Starting EPUB metadata extraction\",\n            file_path=epub_path,\n            operation=\"metadata_extraction\"\n        )\n        book = epub.read_epub(epub_path)\n        meta = {}\n\n        # Standard metadata fields\n        standard_fields = {\n            'title': 'title',\n            'language': 'language',\n            'identifier': 'identifier',\n            'date': 'date',\n            'publisher': 'publisher',\n            'description': 'description'\n        }\n\n        # Fields that may have multiple values\n        multi_fields = ['creator', 'contributor', 'subject']\n\n        # Extract standard fields\n        for field, dc_field in standard_fields.items():\n            items = book.get_metadata('DC', dc_field)\n            if items and len(items) > 0 and len(items[0]) > 0:\n                meta[field] = items[0][0]\n\n        # Handle multi-value fields\n        for field in multi_fields:\n            items = book.get_metadata('DC', field)\n            if items:\n                meta[field] = [item[0] for item in items]\n\n        logger.info(\n            \"EPUB metadata extraction completed\",\n            file_path=epub_path,\n            operation=\"metadata_extraction\",\n            metadata_fields=list(meta.keys())\n        )\n        return meta\n\n    except FileNotFoundError:\n        raise FileNotFoundError(f\"EPUB file not found: {epub_path}\")\n    except Exception as e:\n        logger.error(\n            \"Failed to parse EPUB file\",\n            file_path=epub_path,\n            operation=\"metadata_extraction\",\n            error_type=type(e).__name__,\n            error_details=str(e)\n        )\n        raise EpubProcessingError(\"Failed to parse EPUB file\", epub_path, \"metadata_extraction\", e)\n    \n\n\n@log_operation(\"epub_chapter_extraction\")\ndef extract_chapter_from_epub(epub_path: str, anchor_href: str) -> str:\n    \"\"\"\n    Extract complete HTML content of a chapter starting from the specified anchor point until the next TOC entry.\n    \n    Args:\n        epub_path: Complete path to the EPUB file\n        anchor_href: Chapter location information like 'xhtml/ch01.xhtml#ch01'\n    \n    Returns:\n        HTML string (complete chapter content starting from the anchor point)\n    \"\"\"\n    logger.debug(\n        \"Starting EPUB chapter extraction\",\n        file_path=epub_path,\n        anchor_href=anchor_href,\n        operation=\"chapter_extraction\"\n    )\n    # Read EPUB file\n    book = epub.read_epub(epub_path)\n    # Parse input href and anchor id\n    if '#' in anchor_href:\n        href, anchor_id = anchor_href.split('#')\n    else:\n        href, anchor_id = anchor_href, None\n    \n    if anchor_id:\n        logger.debug(\n            \"Processing anchor\",\n            anchor_id=anchor_id,\n            operation=\"chapter_extraction\"\n        )\n\n    # Get current chapter XHTML content\n    item = book.get_item_with_href(href)\n    if item is None:\n        raise EpubProcessingError(f\"Chapter file not found: {href}\", epub_path, \"chapter_extraction\")\n    \n    soup = BeautifulSoup(item.get_content().decode('utf-8'), 'html.parser')\n\n    # If no anchor, return entire page\n    if not anchor_id:\n        return str(soup)\n\n    # Find anchor starting position\n    anchor_elem = soup.find(id=anchor_id)\n    if not anchor_elem:\n        raise EpubProcessingError(f\"Anchor #{anchor_id} not found in file {href}\", epub_path, \"anchor_extraction\")\n\n    # Extract all content after this anchor (including itself)\n    extracted = [str(anchor_elem)]\n    for elem in anchor_elem.find_all_next():\n        extracted.append(str(elem))\n\n    return '\\n'.join(extracted)\n\n\ndef read_epub(epub_path: str) -> Any:\n    return epub.read_epub(epub_path)\n\ndef flatten_toc(book: Any) -> List[str]:\n    toc_list = []\n    def _flatten(toc: Any) -> None:\n        for item in toc:\n            if isinstance(item, tuple):\n                link, children = item\n                toc_list.append(link.href)\n                if children:\n                    _flatten(children)\n            else:\n                # Handle single Link object\n                toc_list.append(item.href)\n    _flatten(book.toc)\n    return toc_list\n\ndef extract_chapter_plain_text(book: Any, anchor_href: str) -> str:\n    html = extract_chapter_html(book, anchor_href)\n    soup = BeautifulSoup(html, 'html.parser')\n    return soup.get_text()\n\n\n\ndef convert_html_to_markdown(html_str: str) -> str:\n    h = html2text.HTML2Text()\n    h.ignore_links = False\n    h.ignore_images = False\n    return h.handle(html_str)\n\ndef clean_html(html_str: str) -> str:\n    \"\"\"\n    Clean HTML content:\n    - Remove unnecessary tags like <img>, <script>, <style>, <svg>, <video>, <iframe>, <nav>\n    - Remove comments\n    - Remove empty tags (like empty <p>)\n    \n    Returns:\n    - Cleaned HTML string\n    \"\"\"\n    soup = BeautifulSoup(html_str, 'html.parser')\n\n    # Remove unnecessary tags\n    for tag in soup(['script', 'style', 'img', 'svg', 'iframe', 'video', 'nav']):\n        tag.decompose()\n\n    # Remove HTML comments\n    for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):\n        comment.extract()\n\n    # Remove empty tags (no text and no useful attributes)\n    for tag in soup.find_all():\n        if not tag.get_text(strip=True) and not tag.find('img') and not tag.name == 'br':\n            tag.decompose()\n\n    return str(soup)\n\n\n\ndef extract_chapter_html(book: Any, anchor_href: str) -> str:\n    \"\"\"\n    Extract chapter HTML content with improved logic to handle subchapters correctly.\n    This function fixes the issue where subchapters in the TOC cause premature truncation\n    of chapter content by properly understanding the chapter hierarchy.\n    Args:\n        book: EPUB book object\n        anchor_href: Chapter location information like 'chapter1.xhtml#section1_3'\n    Returns:\n        HTML string (complete chapter content with proper boundaries)\n    \"\"\"\n    logger.debug(f\"Extracting chapter with improved logic: {anchor_href}\")\n    href, anchor = anchor_href.split('#') if '#' in anchor_href else (anchor_href, None)\n    toc_entries = []\n    for item in book.toc:\n        if isinstance(item, tuple):\n            chapter = item[0]\n            toc_entries.append((chapter.title, chapter.href, 1))\n            for sub_item in item[1]:\n                if isinstance(sub_item, tuple):\n                    toc_entries.append((sub_item[0].title, sub_item[0].href, 2))\n                else:\n                    toc_entries.append((sub_item.title, sub_item.href, 2))\n        else:\n            toc_entries.append((item.title, item.href, 1))\n    current_idx = None\n    current_level = None\n    for i, (title, toc_href, level) in enumerate(toc_entries):\n        if toc_href == anchor_href or (anchor_href in toc_href and '#' in anchor_href):\n            current_idx = i\n            current_level = level\n            break\n    if current_idx is None:\n        raise EpubProcessingError(f\"Chapter {anchor_href} not found in TOC\", \"unknown\", \"toc_lookup\")\n    next_chapter_href = None\n    for i in range(current_idx + 1, len(toc_entries)):\n        title, toc_href, level = toc_entries[i]\n        if level <= current_level:\n            next_chapter_href = toc_href\n            break\n    item = book.get_item_with_href(href)\n    if item is None:\n        raise EpubProcessingError(f\"Chapter file not found: {href}\", \"unknown\", \"chapter_file_lookup\")\n    soup = BeautifulSoup(item.get_content().decode('utf-8'), 'html.parser')\n    elems = []\n    def heading_level(tag_name):\n        if tag_name and tag_name.startswith('h') and tag_name[1:].isdigit():\n            return int(tag_name[1:])\n        return 7  # treat as lowest priority\n    if anchor:\n        start_elem = soup.find(id=anchor)\n        if not start_elem:\n            raise EpubProcessingError(f\"Anchor {anchor} not found in {href}\", \"unknown\", \"anchor_lookup\")\n        start_level = heading_level(start_elem.name)\n        for elem in start_elem.next_elements:\n            if elem is start_elem:\n                elems.append(str(elem))\n                continue\n            if hasattr(elem, 'name') and elem.name and elem.name.startswith('h') and elem.name[1:].isdigit():\n                if heading_level(elem.name) <= start_level:\n                    break\n            elems.append(str(elem))\n    else:\n        chapter_elem = soup.find(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])\n        if chapter_elem:\n            start_level = heading_level(chapter_elem.name)\n            for elem in chapter_elem.next_elements:\n                if elem is chapter_elem:\n                    elems.append(str(elem))\n                    continue\n                if hasattr(elem, 'name') and elem.name and elem.name.startswith('h') and elem.name[1:].isdigit():\n                    if heading_level(elem.name) <= start_level:\n                        break\n                elems.append(str(elem))\n        else:\n            body_elem = soup.find('body')\n            elems = [str(body_elem)] if body_elem else [str(soup)]\n    html = '\\n'.join(elems)\n    return clean_html(html)\n\n\ndef extract_chapter_markdown(book: Any, anchor_href: str) -> str:\n    \"\"\"Fixed version of extract_chapter_markdown using extract_chapter_html\"\"\"\n    html = extract_chapter_html(book, anchor_href)\n    return convert_html_to_markdown(html)\n\n\ndef extract_multiple_chapters(book: Any, anchor_list: List[str], output: str = 'html') -> List[Tuple[str, str]]:\n    \"\"\"Extract multiple chapters using improved extract_chapter_html logic\"\"\"\n    results = []\n    for href in anchor_list:\n        if output == 'html':\n            content = extract_chapter_html(book, href)\n        elif output == 'text':\n            content = extract_chapter_plain_text(book, href)\n        elif output == 'markdown':\n            content = extract_chapter_markdown(book, href)\n        else:\n            raise ValueError(\"Invalid output format.\")\n        results.append((href, content))\n    return results\n\n\n\n\n\nif __name__ == \"__main__\":\n    # Test the functionality\n    book = read_epub('/path/to/book.epub')\n    # Single chapter to Markdown\n    md = convert_html_to_markdown(extract_chapter_html(book, 'xhtml/ch02.xhtml#ch02'))\n"
  },
  {
    "path": "src/ebook_mcp/tools/logger_config.py",
    "content": "import logging\nimport json\nimport os\nimport time\nfrom datetime import datetime\nfrom typing import Dict, Any, Optional\nfrom functools import wraps\nimport traceback\n\nclass StructuredFormatter(logging.Formatter):\n    \"\"\"Custom formatter for structured JSON logging\"\"\"\n    \n    def format(self, record: logging.LogRecord) -> str:\n        # Create structured log entry\n        log_entry = {\n            \"timestamp\": datetime.fromtimestamp(record.created).isoformat(),\n            \"level\": record.levelname,\n            \"logger\": record.name,\n            \"message\": record.getMessage(),\n            \"module\": record.module,\n            \"function\": record.funcName,\n            \"line\": record.lineno,\n        }\n        \n        # Add extra fields if present\n        if hasattr(record, 'file_path'):\n            log_entry['file_path'] = record.file_path\n        if hasattr(record, 'operation'):\n            log_entry['operation'] = record.operation\n        if hasattr(record, 'duration_ms'):\n            log_entry['duration_ms'] = record.duration_ms\n        if hasattr(record, 'file_size'):\n            log_entry['file_size'] = record.file_size\n        if hasattr(record, 'page_count'):\n            log_entry['page_count'] = record.page_count\n        if hasattr(record, 'chapter_count'):\n            log_entry['chapter_count'] = record.chapter_count\n        if hasattr(record, 'error_type'):\n            log_entry['error_type'] = record.error_type\n        if hasattr(record, 'error_details'):\n            log_entry['error_details'] = record.error_details\n            \n        # Add exception info if present\n        if record.exc_info:\n            log_entry['exception'] = {\n                'type': record.exc_info[0].__name__,\n                'message': str(record.exc_info[1]),\n                'traceback': traceback.format_exception(*record.exc_info)\n            }\n            \n        return json.dumps(log_entry, ensure_ascii=False, default=str)\n\nclass StructuredLogger:\n    \"\"\"Enhanced logger with structured logging capabilities\"\"\"\n    \n    def __init__(self, name: str):\n        self.logger = logging.getLogger(name)\n        self.name = name\n    \n    def _log_with_context(self, level: int, message: str, **context):\n        \"\"\"Log with additional context fields\"\"\"\n        # Check if we're in a test environment\n        import sys\n        if 'pytest' in sys.modules or 'test' in self.name:\n            # Skip logging in test environment\n            return\n            \n        extra = {}\n        for key, value in context.items():\n            if value is not None:\n                extra[key] = value\n        \n        self.logger.log(level, message, extra=extra)\n    \n    def info(self, message: str, **context):\n        \"\"\"Log info message with context\"\"\"\n        self._log_with_context(logging.INFO, message, **context)\n    \n    def debug(self, message: str, **context):\n        \"\"\"Log debug message with context\"\"\"\n        self._log_with_context(logging.DEBUG, message, **context)\n    \n    def warning(self, message: str, **context):\n        \"\"\"Log warning message with context\"\"\"\n        self._log_with_context(logging.WARNING, message, **context)\n    \n    def error(self, message: str, **context):\n        \"\"\"Log error message with context\"\"\"\n        self._log_with_context(logging.ERROR, message, **context)\n    \n    def critical(self, message: str, **context):\n        \"\"\"Log critical message with context\"\"\"\n        self._log_with_context(logging.CRITICAL, message, **context)\n\ndef setup_logger(level: str = \"INFO\", log_file: str = \"ebook_mcp.log\"):\n    \"\"\"Configure structured logging system\"\"\"\n    \n    # Create logs directory if it doesn't exist\n    log_dir = os.path.join(os.path.dirname(__file__), \"..\", \"logs\")\n    os.makedirs(log_dir, exist_ok=True)\n    \n    log_file_path = os.path.join(log_dir, log_file)\n    \n    # Configure root logger\n    root_logger = logging.getLogger()\n    root_logger.setLevel(getattr(logging, level.upper()))\n    \n    # Clear existing handlers\n    root_logger.handlers.clear()\n    \n    # Create formatters\n    structured_formatter = StructuredFormatter()\n    console_formatter = logging.Formatter(\n        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'\n    )\n    \n    # File handler with structured JSON logging\n    file_handler = logging.FileHandler(log_file_path, encoding='utf-8')\n    file_handler.setFormatter(structured_formatter)\n    file_handler.setLevel(logging.DEBUG)\n    \n    # Console handler with human-readable format\n    console_handler = logging.StreamHandler()\n    console_handler.setFormatter(console_formatter)\n    console_handler.setLevel(getattr(logging, level.upper()))\n    \n    # Add handlers\n    root_logger.addHandler(file_handler)\n    root_logger.addHandler(console_handler)\n    \n    return root_logger\n\ndef get_logger(name: str) -> StructuredLogger:\n    \"\"\"Get a structured logger instance\"\"\"\n    return StructuredLogger(name)\n\ndef log_operation(operation_name: str):\n    \"\"\"Decorator to log operation start/end with timing\"\"\"\n    def decorator(func):\n        @wraps(func)\n        def wrapper(*args, **kwargs):\n            # Check if we're in a test environment by looking for pytest\n            import sys\n            if 'pytest' in sys.modules or 'test' in func.__module__:\n                # Skip logging in test environment\n                return func(*args, **kwargs)\n            \n            logger = get_logger(func.__module__)\n            start_time = time.time()\n            \n            # Log operation start\n            logger.info(\n                f\"Starting {operation_name}\",\n                operation=operation_name,\n                function=func.__name__\n            )\n            \n            try:\n                result = func(*args, **kwargs)\n                duration_ms = (time.time() - start_time) * 1000\n                \n                # Log operation success\n                logger.info(\n                    f\"Completed {operation_name} successfully\",\n                    operation=operation_name,\n                    function=func.__name__,\n                    duration_ms=round(duration_ms, 2)\n                )\n                \n                return result\n                \n            except Exception as e:\n                duration_ms = (time.time() - start_time) * 1000\n                \n                # Log operation failure\n                logger.error(\n                    f\"Failed to complete {operation_name}\",\n                    operation=operation_name,\n                    function=func.__name__,\n                    duration_ms=round(duration_ms, 2),\n                    error_type=type(e).__name__,\n                    error_details=str(e)\n                )\n                raise\n                \n        return wrapper\n    return decorator\n\n# Configure logger when module is imported\nsetup_logger() "
  },
  {
    "path": "src/ebook_mcp/tools/pdf_helper.py",
    "content": "from typing import List, Tuple, Dict, Union\nimport os\nfrom io import StringIO\nimport fitz  # PyMuPDF\nimport re\nfrom .logger_config import get_logger, log_operation\n\n# Custom exception class for PDF processing errors\nclass PdfProcessingError(Exception):\n    \"\"\"Custom exception for PDF processing errors with detailed context\"\"\"\n    def __init__(self, message: str, file_path: str, operation: str, original_error: Exception = None):\n        self.message = message\n        self.file_path = file_path\n        self.operation = operation\n        self.original_error = original_error\n        super().__init__(f\"{message} (file: {file_path}, operation: {operation})\")\n\n# Initialize structured logger\nlogger = get_logger(__name__)\n\ndef get_all_pdf_files(path: str) -> List[str]:\n    \"\"\"\n    Get all PDF files in the specified path\n    \"\"\"\n    return [f for f in os.listdir(path) if f.endswith('.pdf')]\n\n@log_operation(\"pdf_metadata_extraction\")\ndef get_meta(pdf_path: str) -> Dict[str, Union[str, List[str]]]:\n    \"\"\"\n    Get metadata from a PDF file using PyMuPDF\n    \n    Args:\n        pdf_path (str): Absolute path to the PDF file\n        \n    Returns:\n        Dict[str, Union[str, List[str]]]: Dictionary containing metadata\n            \n    Raises:\n        FileNotFoundError: If the file does not exist\n        Exception: If the file is not a valid PDF or parsing fails\n    \"\"\"\n    try:\n        if not os.path.exists(pdf_path):\n            logger.error(\n                \"PDF file not found\",\n                file_path=pdf_path,\n                operation=\"metadata_extraction\"\n            )\n            raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n        # Read PDF file using PyMuPDF\n        logger.debug(\n            \"Starting PDF metadata extraction\",\n            file_path=pdf_path,\n            operation=\"metadata_extraction\"\n        )\n        doc = fitz.open(pdf_path)\n        meta = {}\n\n        # Extract metadata from PDF using PyMuPDF\n        metadata = doc.metadata\n        \n        # Standard metadata fields mapping\n        standard_fields = {\n            'title': 'title',\n            'author': 'author', \n            'subject': 'subject',\n            'creator': 'creator',\n            'producer': 'producer',\n            'creation_date': 'creationDate',\n            'modification_date': 'modDate',\n            'keywords': 'keywords',\n            'format': 'format'\n        }\n\n        # Extract standard metadata fields\n        for field, pdf_field in standard_fields.items():\n            if pdf_field in metadata and metadata[pdf_field]:\n                meta[field] = metadata[pdf_field]\n\n        # Add additional information\n        meta['pages'] = doc.page_count\n        meta['file_size'] = os.path.getsize(pdf_path)\n        \n        # Get PDF version and encryption info\n        try:\n            # Try to get version info - different PyMuPDF versions have different APIs\n            if hasattr(doc, 'version_major') and hasattr(doc, 'version_minor'):\n                meta['pdf_version'] = f\"{doc.version_major}.{doc.version_minor}\"\n            elif hasattr(doc, 'version'):\n                meta['pdf_version'] = str(doc.version)\n            else:\n                meta['pdf_version'] = \"Unknown\"\n        except:\n            meta['pdf_version'] = \"Unknown\"\n        \n        meta['is_encrypted'] = doc.is_encrypted\n        \n        # Get page dimensions (first page)\n        if doc.page_count > 0:\n            try:\n                first_page = doc[0]\n                rect = first_page.rect\n                meta['page_width'] = rect.width\n                meta['page_height'] = rect.height\n            except:\n                # If we can't get page dimensions, skip it\n                pass\n        \n        doc.close()\n        \n        logger.info(\n            \"PDF metadata extraction completed\",\n            file_path=pdf_path,\n            operation=\"metadata_extraction\",\n            page_count=meta.get('pages', 0),\n            file_size=meta.get('file_size', 0),\n            metadata_fields=list(meta.keys())\n        )\n        return meta\n\n    except FileNotFoundError:\n        raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n    except Exception as e:\n        logger.error(\n            \"Failed to parse PDF file\",\n            file_path=pdf_path,\n            operation=\"metadata_extraction\",\n            error_type=type(e).__name__,\n            error_details=str(e)\n        )\n        raise PdfProcessingError(\"Failed to parse PDF file\", pdf_path, \"metadata_extraction\", e)\n\n@log_operation(\"pdf_toc_extraction\")\ndef get_toc(pdf_path: str) -> List[Tuple[str, int]]:\n    \"\"\"\n    Get the Table of Contents (TOC) from a PDF file\n    \n    Args:\n        pdf_path (str): Absolute path to the PDF file\n        \n    Returns:\n        List[Tuple[str, int]]: List of TOC entries, each entry is a tuple of (title, page_number)\n        \n    Raises:\n        FileNotFoundError: If the file does not exist\n        Exception: If the file is not a valid PDF or parsing fails\n    \"\"\"\n    try:\n        if not os.path.exists(pdf_path):\n            logger.error(\n                \"PDF file not found\",\n                file_path=pdf_path,\n                operation=\"toc_extraction\"\n            )\n            raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n        # Read PDF file using PyMuPDF for better TOC support\n        logger.debug(\n            \"Starting PDF TOC extraction\",\n            file_path=pdf_path,\n            operation=\"toc_extraction\"\n        )\n        doc = fitz.open(pdf_path)\n        toc = []\n        \n        # Get TOC from document\n        outline = doc.get_toc()\n        for item in outline:\n            level, title, page = item\n            toc.append((title, page))\n        \n        doc.close()\n        logger.info(\n            \"PDF TOC extraction completed\",\n            file_path=pdf_path,\n            operation=\"toc_extraction\",\n            chapter_count=len(toc)\n        )\n        return toc\n\n    except FileNotFoundError:\n        raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n    except Exception as e:\n        logger.error(\n            \"Failed to parse PDF file\",\n            file_path=pdf_path,\n            operation=\"toc_extraction\",\n            error_type=type(e).__name__,\n            error_details=str(e)\n        )\n        raise PdfProcessingError(\"Failed to parse PDF file\", pdf_path, \"toc_extraction\", e)\n\ndef extract_page_text(pdf_path: str, page_number: int) -> str:\n    \"\"\"\n    Extract text content from a specific page in the PDF\n    \n    Args:\n        pdf_path: Path to the PDF file\n        page_number: Page number to extract (1-based index)\n        \n    Returns:\n        str: Extracted text content\n    \"\"\"\n    try:\n        doc = fitz.open(pdf_path)\n        # Convert to 0-based index\n        page = doc[page_number - 1]\n        text = page.get_text()\n        doc.close()\n        return text\n    except Exception as e:\n        logger.error(\n            \"Failed to extract page text\",\n            file_path=pdf_path,\n            page_number=page_number,\n            operation=\"page_text_extraction\",\n            error_type=type(e).__name__,\n            error_details=str(e)\n        )\n        raise PdfProcessingError(\"Failed to extract page text\", pdf_path, \"page_text_extraction\", e)\n\ndef extract_page_markdown(pdf_path: str, page_number: int) -> str:\n    \"\"\"\n    Extract text content from a specific page and convert to markdown format\n    \n    Args:\n        pdf_path: Path to the PDF file\n        page_number: Page number to extract (1-based index)\n        \n    Returns:\n        str: Markdown formatted text\n    \"\"\"\n    try:\n        doc = fitz.open(pdf_path)\n        page = doc[page_number - 1]\n        \n        # Extract text with formatting information\n        blocks = page.get_text(\"dict\")[\"blocks\"]\n        markdown_text = StringIO()\n        \n        for block in blocks:\n            if \"lines\" in block:\n                for line in block[\"lines\"]:\n                    for span in line[\"spans\"]:\n                        text = span[\"text\"]\n                        size = span[\"size\"]\n                        flags = span[\"flags\"]\n                        \n                        # Convert formatting to markdown\n                        if size > 14:  # Assuming larger text is a header\n                            text = f\"## {text}\"\n                        if flags & 2**3:  # Bold text\n                            text = f\"**{text}**\"\n                        if flags & 2**1:  # Italic text\n                            text = f\"*{text}*\"\n                            \n                        markdown_text.write(text + \" \")\n                    markdown_text.write(\"\\n\")\n                markdown_text.write(\"\\n\")\n        \n        doc.close()\n        return markdown_text.getvalue()\n    except Exception as e:\n        logger.error(\n            \"Failed to extract page markdown\",\n            file_path=pdf_path,\n            page_number=page_number,\n            operation=\"page_markdown_extraction\",\n            error_type=type(e).__name__,\n            error_details=str(e)\n        )\n        raise PdfProcessingError(\"Failed to extract page markdown\", pdf_path, \"page_markdown_extraction\", e)\n\ndef extract_chapter_by_title(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]:\n    \"\"\"\n    Extract a chapter's content by its title from the TOC\n    \n    Args:\n        pdf_path: Path to the PDF file\n        chapter_title: Title of the chapter to extract\n        \n    Returns:\n        Tuple[str, List[int]]: Tuple containing (chapter_content, page_numbers)\n    \"\"\"\n    try:\n        # Get TOC to find chapter location\n        toc = get_toc(pdf_path)\n        chapter_start_page = None\n        chapter_end_page = None\n        \n        # Find the chapter in TOC\n        for i, (title, page) in enumerate(toc):\n            if title == chapter_title:\n                chapter_start_page = page\n                if i < len(toc) - 1:\n                    chapter_end_page = toc[i + 1][1]\n                break\n        \n        if chapter_start_page is None:\n            raise PdfProcessingError(f\"Chapter '{chapter_title}' not found in TOC\", pdf_path, \"chapter_lookup\")\n            \n        # If it's the last chapter, read until the end of the document\n        if chapter_end_page is None:\n            doc = fitz.open(pdf_path)\n            chapter_end_page = doc.page_count\n            doc.close()\n            \n        # Extract content from all pages in the chapter\n        content = []\n        for page_num in range(chapter_start_page, chapter_end_page):\n            content.append(extract_page_text(pdf_path, page_num))\n            \n        return (\"\\n\".join(content), list(range(chapter_start_page, chapter_end_page)))\n        \n    except Exception as e:\n        logger.error(\n            \"Failed to extract chapter\",\n            file_path=pdf_path,\n            chapter_title=chapter_title,\n            operation=\"chapter_extraction\",\n            error_type=type(e).__name__,\n            error_details=str(e)\n        )\n        raise PdfProcessingError(\"Failed to extract chapter\", pdf_path, \"chapter_extraction\", e)\n"
  },
  {
    "path": "src/ebook_mcp/tools/pdf_helper.py.backup",
    "content": "from PyPDF2 import PdfReader\nfrom typing import List, Tuple, Dict, Union\nimport os\nimport logging\nfrom io import StringIO\nimport fitz  # PyMuPDF\nimport re\n\n# Initialize logger\nlogger = logging.getLogger(__name__)\n\ndef get_all_pdf_files(path: str) -> List[str]:\n    \"\"\"\n    Get all PDF files in the specified path\n    \"\"\"\n    return [f for f in os.listdir(path) if f.endswith('.pdf')]\n\ndef get_meta(pdf_path: str) -> Dict[str, Union[str, List[str]]]:\n    \"\"\"\n    Get metadata from a PDF file using PyMuPDF\n    \n    Args:\n        pdf_path (str): Absolute path to the PDF file\n        \n    Returns:\n        Dict[str, Union[str, List[str]]]: Dictionary containing metadata\n            \n    Raises:\n        FileNotFoundError: If the file does not exist\n        Exception: If the file is not a valid PDF or parsing fails\n    \"\"\"\n    try:\n        if not os.path.exists(pdf_path):\n            logger.error(f\"File not found: {pdf_path}\")\n            raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n        # Read PDF file using PyMuPDF\n        logger.debug(f\"Starting to read PDF file: {pdf_path}\")\n        doc = fitz.open(pdf_path)\n        meta = {}\n\n        # Extract metadata from PDF using PyMuPDF\n        metadata = doc.metadata\n        \n        # Standard metadata fields mapping\n        standard_fields = {\n            'title': 'title',\n            'author': 'author', \n            'subject': 'subject',\n            'creator': 'creator',\n            'producer': 'producer',\n            'creation_date': 'creationDate',\n            'modification_date': 'modDate',\n            'keywords': 'keywords',\n            'format': 'format'\n        }\n\n        # Extract standard metadata fields\n        for field, pdf_field in standard_fields.items():\n            if pdf_field in metadata and metadata[pdf_field]:\n                meta[field] = metadata[pdf_field]\n\n        # Add additional information\n        meta['pages'] = doc.page_count\n        meta['file_size'] = os.path.getsize(pdf_path)\n        \n        # Get PDF version and encryption info\n        try:\n            # Try to get version info - different PyMuPDF versions have different APIs\n            if hasattr(doc, 'version_major') and hasattr(doc, 'version_minor'):\n                meta['pdf_version'] = f\"{doc.version_major}.{doc.version_minor}\"\n            elif hasattr(doc, 'version'):\n                meta['pdf_version'] = str(doc.version)\n            else:\n                meta['pdf_version'] = \"Unknown\"\n        except:\n            meta['pdf_version'] = \"Unknown\"\n        \n        meta['is_encrypted'] = doc.is_encrypted\n        \n        # Get page dimensions (first page)\n        if doc.page_count > 0:\n            try:\n                first_page = doc[0]\n                rect = first_page.rect\n                meta['page_width'] = rect.width\n                meta['page_height'] = rect.height\n            except:\n                # If we can't get page dimensions, skip it\n                pass\n        \n        doc.close()\n        \n        logger.debug(f\"Successfully retrieved metadata with fields: {list(meta.keys())}\")\n        return meta\n\n    except FileNotFoundError:\n        raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n    except Exception as e:\n        logger.error(f\"Failed to parse PDF file: {str(e)}\")\n        raise Exception(\"Failed to parse PDF file\")\n\ndef get_meta_pypdf2(pdf_path: str) -> Dict[str, Union[str, List[str]]]:\n    \"\"\"\n    Get metadata from a PDF file using PyPDF2 (legacy method)\n    \n    Args:\n        pdf_path (str): Absolute path to the PDF file\n        \n    Returns:\n        Dict[str, Union[str, List[str]]]: Dictionary containing metadata\n            \n    Raises:\n        FileNotFoundError: If the file does not exist\n        Exception: If the file is not a valid PDF or parsing fails\n    \"\"\"\n    try:\n        if not os.path.exists(pdf_path):\n            logger.error(f\"File not found: {pdf_path}\")\n            raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n        # Read PDF file using PyPDF2\n        logger.debug(f\"Starting to read PDF file with PyPDF2: {pdf_path}\")\n        reader = PdfReader(pdf_path)\n        meta = {}\n\n        # Extract metadata from PDF\n        if reader.metadata:\n            # Standard metadata fields\n            standard_fields = {\n                'title': '/Title',\n                'author': '/Author',\n                'subject': '/Subject',\n                'creator': '/Creator',\n                'producer': '/Producer',\n                'creation_date': '/CreationDate',\n                'modification_date': '/ModDate'\n            }\n\n            for field, pdf_field in standard_fields.items():\n                if pdf_field in reader.metadata:\n                    meta[field] = reader.metadata[pdf_field]\n\n        # Add additional information\n        meta['pages'] = len(reader.pages)\n        \n        logger.debug(f\"Successfully retrieved metadata with PyPDF2, fields: {list(meta.keys())}\")\n        return meta\n\n    except FileNotFoundError:\n        raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n    except Exception as e:\n        logger.error(f\"Failed to parse PDF file with PyPDF2: {str(e)}\")\n        raise Exception(\"Failed to parse PDF file\")\n\ndef get_toc(pdf_path: str) -> List[Tuple[str, int]]:\n    \"\"\"\n    Get the Table of Contents (TOC) from a PDF file\n    \n    Args:\n        pdf_path (str): Absolute path to the PDF file\n        \n    Returns:\n        List[Tuple[str, int]]: List of TOC entries, each entry is a tuple of (title, page_number)\n        \n    Raises:\n        FileNotFoundError: If the file does not exist\n        Exception: If the file is not a valid PDF or parsing fails\n    \"\"\"\n    try:\n        if not os.path.exists(pdf_path):\n            logger.error(f\"File not found: {pdf_path}\")\n            raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n            \n        # Read PDF file using PyMuPDF for better TOC support\n        logger.debug(f\"Starting to read PDF file: {pdf_path}\")\n        doc = fitz.open(pdf_path)\n        toc = []\n        \n        # Get TOC from document\n        outline = doc.get_toc()\n        for item in outline:\n            level, title, page = item\n            toc.append((title, page))\n        \n        doc.close()\n        logger.debug(f\"Successfully retrieved TOC with {len(toc)} entries\")\n        return toc\n\n    except FileNotFoundError:\n        raise FileNotFoundError(f\"PDF file not found: {pdf_path}\")\n    except Exception as e:\n        logger.error(f\"Failed to parse PDF file: {str(e)}\")\n        raise Exception(\"Failed to parse PDF file\")\n\ndef extract_page_text(pdf_path: str, page_number: int) -> str:\n    \"\"\"\n    Extract text content from a specific page in the PDF\n    \n    Args:\n        pdf_path: Path to the PDF file\n        page_number: Page number to extract (1-based index)\n        \n    Returns:\n        str: Extracted text content\n    \"\"\"\n    try:\n        doc = fitz.open(pdf_path)\n        # Convert to 0-based index\n        page = doc[page_number - 1]\n        text = page.get_text()\n        doc.close()\n        return text\n    except Exception as e:\n        logger.error(f\"Failed to extract page text: {str(e)}\")\n        raise Exception(\"Failed to extract page text\")\n\ndef extract_page_markdown(pdf_path: str, page_number: int) -> str:\n    \"\"\"\n    Extract text content from a specific page and convert to markdown format\n    \n    Args:\n        pdf_path: Path to the PDF file\n        page_number: Page number to extract (1-based index)\n        \n    Returns:\n        str: Markdown formatted text\n    \"\"\"\n    try:\n        doc = fitz.open(pdf_path)\n        page = doc[page_number - 1]\n        \n        # Extract text with formatting information\n        blocks = page.get_text(\"dict\")[\"blocks\"]\n        markdown_text = StringIO()\n        \n        for block in blocks:\n            if \"lines\" in block:\n                for line in block[\"lines\"]:\n                    for span in line[\"spans\"]:\n                        text = span[\"text\"]\n                        size = span[\"size\"]\n                        flags = span[\"flags\"]\n                        \n                        # Convert formatting to markdown\n                        if size > 14:  # Assuming larger text is a header\n                            text = f\"## {text}\"\n                        if flags & 2**3:  # Bold text\n                            text = f\"**{text}**\"\n                        if flags & 2**1:  # Italic text\n                            text = f\"*{text}*\"\n                            \n                        markdown_text.write(text + \" \")\n                    markdown_text.write(\"\\n\")\n                markdown_text.write(\"\\n\")\n        \n        doc.close()\n        return markdown_text.getvalue()\n    except Exception as e:\n        logger.error(f\"Failed to extract page markdown: {str(e)}\")\n        raise Exception(\"Failed to extract page markdown\")\n\ndef extract_chapter_by_title(pdf_path: str, chapter_title: str) -> Tuple[str, List[int]]:\n    \"\"\"\n    Extract a chapter's content by its title from the TOC\n    \n    Args:\n        pdf_path: Path to the PDF file\n        chapter_title: Title of the chapter to extract\n        \n    Returns:\n        Tuple[str, List[int]]: Tuple containing (chapter_content, page_numbers)\n    \"\"\"\n    try:\n        # Get TOC to find chapter location\n        toc = get_toc(pdf_path)\n        chapter_start_page = None\n        chapter_end_page = None\n        \n        # Find the chapter in TOC\n        for i, (title, page) in enumerate(toc):\n            if title == chapter_title:\n                chapter_start_page = page\n                if i < len(toc) - 1:\n                    chapter_end_page = toc[i + 1][1]\n                break\n        \n        if chapter_start_page is None:\n            raise ValueError(f\"Chapter '{chapter_title}' not found in TOC\")\n            \n        # If it's the last chapter, read until the end of the document\n        if chapter_end_page is None:\n            doc = fitz.open(pdf_path)\n            chapter_end_page = doc.page_count\n            doc.close()\n            \n        # Extract content from all pages in the chapter\n        content = []\n        for page_num in range(chapter_start_page, chapter_end_page):\n            content.append(extract_page_text(pdf_path, page_num))\n            \n        return (\"\\n\".join(content), list(range(chapter_start_page, chapter_end_page)))\n        \n    except Exception as e:\n        logger.error(f\"Failed to extract chapter: {str(e)}\")\n        raise Exception(\"Failed to extract chapter\") "
  }
]