[
  {
    "path": ".github/workflows/docs.yml",
    "content": "name: Publish documentation\n\non:\n  push:\n    branches:\n    - main\n\njobs:\n  docs:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v2\n      - uses: actions/setup-python@v2\n      - name: Install dependencies\n        run: |\n          pip install -r requirements/requirements-docs.txt\n      - name: Sphinx build\n        run: |\n          sphinx-build docs _build\n      - name: Deploy\n        uses: peaceiris/actions-gh-pages@v3\n        if: ${{ github.ref == 'refs/heads/main' }}\n        with:\n          publish_branch: gh-pages\n          github_token: ${{ secrets.GITHUB_TOKEN }}\n          publish_dir: _build/\n          force_orphan: true "
  },
  {
    "path": ".github/workflows/test.yml",
    "content": "name: Test\n\non:\n  push:\n    branches: \n    - \"main\"\n  pull_request:\n\nconcurrency:\n  group: ${{ github.head_ref || github.run_id }}\n  cancel-in-progress: true\n\njobs:\n  test:\n    name: Python ${{ matrix.python-version }} on ${{ matrix.os }}\n    runs-on: ${{ matrix.os }}\n    strategy:\n      max-parallel: 4\n      matrix:\n        os:\n        - ubuntu-24.04\n        - windows-2022\n        - macos-14\n        python-version: \n        - \"3.8\"\n        - \"3.9\"\n        - \"3.10\"\n        - \"3.11\"\n        - \"3.12\" \n        - \"3.13\"\n\n    steps:\n    - uses: actions/checkout@v4\n    \n    - uses: actions/setup-python@v5\n      with:\n        python-version: ${{ matrix.python-version }}\n        allow-prereleases: true\n    \n    - name: Cache pip packages\n      uses: actions/cache@v3\n      with:\n        path: ~/.cache/pip\n        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}\n        restore-keys: |\n          ${{ runner.os }}-pip-\n    \n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        pip install -r requirements.txt\n    \n    - name: Run tests\n      run: |\n        pytest"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\npip-wheel-metadata/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n.python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# MacOS\n.DS_Store"
  },
  {
    "path": ".vscode/settings.json",
    "content": "{\n    \"python.testing.pytestEnabled\": true\n}"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2025 Ernesto González\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "Makefile",
    "content": "#!/usr/bin/env bash\n\nLIGHT_CYAN=\\033[1;36m\nNO_COLOR=\\033[0m\n\n.PHONY: docs\n\nhelp:\n\t@echo \"test - run tests with pytest\"\n\t@echo \"coverage - get code coverage report\"\n\t@echo \"lint - lint the python code\"\n\t@echo \"format - format the python code\"\n\n# Run tests\ntest:\n\t@echo \"${LIGHT_CYAN}Running tests...${NO_COLOR}\"\n\tpytest\n\n# Get code coverage report\ncoverage:\n\t@echo \"${LIGHT_CYAN}Running tests and collecting coverage data...${NO_COLOR}\"\n\tpytest\n\tcoverage combine\n\t@echo \"${LIGHT_CYAN}Reporting code coverage data...${NO_COLOR}\"\n\tcoverage report\n\t@echo \"${LIGHT_CYAN}Creating HTML report...${NO_COLOR}\"\n\tcoverage html\n\t@echo \"${LIGHT_CYAN}Creating coverage badge...${NO_COLOR}\"\n\t@rm ./coverage.svg\n\tcoverage-badge -o coverage.svg\n\n# Lint code\nlint:\n\t@echo \"${LIGHT_CYAN}Linting code...${NO_COLOR}\"\n\truff check\n\n# Format code\nformat:\n\t@echo \"${LIGHT_CYAN}Formatting code...${NO_COLOR}\"\n\truff check --select I --fix\n\truff format"
  },
  {
    "path": "README.md",
    "content": "# epub-utils\n\n[![PyPI](https://img.shields.io/pypi/v/epub-utils.svg)](https://pypi.org/project/epub-utils/)\n[![Changelog](https://img.shields.io/github/v/release/ernestofgonzalez/epub-utils?include_prereleases&label=changelog)](https://ernestofgonzalez.github.io/epub-utils/changelog)\n[![Python 3.x](https://img.shields.io/pypi/pyversions/epub-utils.svg?logo=python&logoColor=white)](https://pypi.org/project/epub-utils/)\n[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/ernestofgonzalez/epub-utils/blob/main/LICENSE)\n\nA Python library and CLI tool for inspecting ePub from the terminal.\n\n## Features\n\n- **Complete EPUB Support** - Parse both EPUB 2.0.1 and EPUB 3.0+ specifications with container, package, manifest, spine, and table of contents inspection\n- **Rich Metadata Extraction** - Extract Dublin Core metadata (title, author, language, publisher) with key-value, XML, and raw output formats for easy scripting\n- **Content Analysis** - Access document content by manifest ID or file path, with plain text extraction for content analysis and word counting\n- **File System Navigation** - Browse and extract any file within EPUB archives (XHTML, CSS, images, fonts) with detailed file information including sizes and compression ratios\n- **Multiple Output Formats** - XML with syntax highlighting, raw content, key-value pairs, plain text, and formatted tables to suit different workflows\n- **CLI and Python API** - Comprehensive command-line tool for terminal workflows plus a clean Python library for programmatic access\n- **Standards Compliance** - Built-in validation capabilities and adherence to W3C/IDPF specifications for reliable EPUB processing\n- **Performance Optimized** - Lazy loading, efficient ZIP parsing, and optional lxml support for handling large EPUB collections\n\n## Installation\n\n`epub-utils` is available as a [PyPI](https://pypi.org/) package\n\n```bash\npip install epub-utils\n```\n\n## Use as a CLI tool\n\nThe basic format is:\n\n```bash\nepub-utils EPUB_PATH COMMAND [OPTIONS]\n```\n\n### Commands\n\n- `container` - Display the container.xml contents\n    ```bash\n    # Show container.xml with syntax highlighting\n    epub-utils book.epub container\n\n    # Show container.xml as raw content\n    epub-utils book.epub container --format raw\n    \n    # Show container.xml with pretty formatting\n    epub-utils book.epub container --pretty-print\n    ```\n\n- `package` - Display the package OPF file contents\n    ```bash\n    # Show package.opf with syntax highlighting\n    epub-utils book.epub package\n\n    # Show package.opf as raw content\n    epub-utils book.epub package --format raw\n    ```\n\n- `toc` - Display the table of contents file contents\n    ```bash\n    # Show toc.ncx/nav.xhtml with syntax highlighting (auto-detect)\n    epub-utils book.epub toc\n\n    # Show toc.ncx/nav.xhtml as raw content\n    epub-utils book.epub toc --format raw\n\n    # Force NCX format (EPUB 2 navigation control file)\n    epub-utils book.epub toc --ncx\n\n    # Force Navigation Document (EPUB 3 navigation file)\n    epub-utils book.epub toc --nav\n    ```\n\n- `metadata` - Display the metadata information from the package file\n    ```bash\n    # Show metadata with syntax highlighting\n    epub-utils book.epub metadata\n\n    # Show metadata as key-value pairs\n    epub-utils book.epub metadata --format kv\n    \n    # Show metadata with pretty formatting\n    epub-utils book.epub metadata --pretty-print\n    ```\n\n- `manifest` - Display the manifest information from the package file\n    ```bash\n    # Show manifest with syntax highlighting\n    epub-utils book.epub manifest\n\n    # Show manifest as raw content\n    epub-utils book.epub manifest --format raw\n    ```\n\n- `spine` - Display the spine information from the package file\n    ```bash\n    # Show spine with syntax highlighting\n    epub-utils book.epub spine\n\n    # Show spine as raw content\n    epub-utils book.epub spine --format raw\n    ```\n\n- `content` - Display the content of a document by its manifest item ID\n    ```bash\n    # Show content with syntax highlighting\n    epub-utils book.epub content chapter1\n\n    # Show raw HTML/XML content\n    epub-utils book.epub content chapter1 --format raw\n    \n    # Show plain text content (HTML tags stripped)\n    epub-utils book.epub content chapter1 --format plain\n    ```\n\n- `files` - List all files in the EPUB archive or display content of a specific file\n    ```bash\n    # List all files in table format (default)\n    epub-utils book.epub files\n\n    # List all files as simple paths\n    epub-utils book.epub files --format raw\n\n    # Display content of a specific file by path\n    epub-utils book.epub files OEBPS/chapter1.xhtml\n\n    # Display XHTML file content in different formats\n    epub-utils book.epub files OEBPS/chapter1.xhtml --format raw\n    epub-utils book.epub files OEBPS/chapter1.xhtml --format xml --pretty-print\n    epub-utils book.epub files OEBPS/chapter1.xhtml --format plain\n\n    # Display non-XHTML files (CSS, images, etc.)\n    epub-utils book.epub files OEBPS/styles/main.css\n    epub-utils book.epub files META-INF/container.xml\n    ```\n\n### Options\n\n- `-h, --help` - Show help message and exit\n- `-v, --version` - Show program version and exit\n- `-fmt, --format` - Output format (default: xml)\n    - `xml` - Display with XML syntax highlighting (default)\n    - `raw` - Display raw content without formatting\n    - `plain` - Display plain text content (HTML tags stripped, for content command only)\n    - `kv` - Display key-value pairs (where supported)\n- `-pp, --pretty-print` - Pretty-print XML output (applies to xml and raw formats only)\n    \n    ```bash\n    # Display as raw content\n    epub-utils book.epub package --format raw\n    \n    # Display with XML syntax highlighting (default)\n    epub-utils book.epub package --format xml\n    \n    # Display as key-value pairs (for supported commands)\n    epub-utils book.epub metadata --format kv\n    \n    # Display plain text content (content command only)\n    epub-utils book.epub content chapter1 --format plain\n    \n    # Pretty-print XML with proper indentation\n    epub-utils book.epub package --pretty-print\n    \n    # Combine format and pretty-print options\n    epub-utils book.epub metadata --format raw --pretty-print\n    ```\n\n## Use as a Python library\n\n```python\nfrom epub_utils import Document\n\n# Load an EPUB document\ndoc = Document(\"path/to/book.epub\")\n```\n\n### Basic Document Access\n\nAccess the main components of an EPUB document:\n\n```python\n# Get container information\ncontainer = doc.container\nprint(container.to_xml())  # Formatted XML with syntax highlighting\nprint(container.to_str())  # Raw XML content\n\n# Get package information  \npackage = doc.package\nprint(package.to_xml())    # Formatted XML with syntax highlighting\nprint(package.to_str())    # Raw XML content\n\n# Get table of contents\ntoc = doc.toc\nif toc:  # TOC might be None if not present\n    print(toc.to_xml())    # Formatted XML with syntax highlighting\n    print(toc.to_str())    # Raw XML content\n\n# Access specific navigation formats\nncx = doc.ncx  # NCX format (EPUB 2 or EPUB 3 with NCX)\nif ncx:\n    print(\"NCX navigation available\")\n    print(ncx.to_xml())\n\nnav = doc.nav  # Navigation Document (EPUB 3 only)\nif nav:\n    print(\"Navigation Document available\")\n    print(nav.to_xml())\n    print(toc.to_str())    # Raw XML content\n```\n\n### Working with Metadata\n\nAccess and format metadata information:\n\n```python\n# Access package metadata\nmetadata = doc.package.metadata\n\n# Basic Dublin Core elements\nprint(f\"Title: {metadata.title}\")\nprint(f\"Creator: {metadata.creator}\")\nprint(f\"Identifier: {metadata.identifier}\")\nprint(f\"Language: {metadata.language}\")\nprint(f\"Publisher: {metadata.publisher}\")\nprint(f\"Date: {metadata.date}\")\n\n# Dynamic attribute access for any metadata field\nisbn = getattr(metadata, 'isbn', 'Not available')\nseries = getattr(metadata, 'series', 'Not available')\n\n# Get formatted metadata output\nprint(metadata.to_xml())     # Formatted XML with syntax highlighting\nprint(metadata.to_str())     # Raw XML content  \nprint(metadata.to_kv())      # Key-value format for easy parsing\n```\n\n### Working with Manifest\n\nAccess the manifest to see all files in the EPUB:\n\n```python\n# Get manifest information\nmanifest = doc.package.manifest\n\n# Access all manifest items\nfor item in manifest.items:\n    print(f\"ID: {item['id']}\")\n    print(f\"File: {item['href']}\")\n    print(f\"Type: {item['media_type']}\")\n    print(f\"Properties: {item['properties']}\")\n\n# Find specific items\nnav_item = manifest.find_by_property('nav')\nchapter = manifest.find_by_id('chapter1')\nxhtml_items = manifest.find_by_media_type('application/xhtml+xml')\n\n# Get formatted manifest output\nprint(manifest.to_xml())     # Formatted XML with syntax highlighting\nprint(manifest.to_str())     # Raw XML content\n```\n\n### Working with Spine\n\nAccess the spine to see the reading order:\n\n```python\n# Get spine information\nspine = doc.package.spine\n\n# Access spine properties\nprint(f\"TOC reference: {spine.toc}\")\nprint(f\"Page progression: {spine.page_progression_direction}\")\n\n# Access spine items in reading order\nfor itemref in spine.itemrefs:\n    print(f\"ID: {itemref['idref']}\")\n    print(f\"Linear: {itemref['linear']}\")\n    print(f\"Properties: {itemref['properties']}\")\n\n# Find specific spine item\nspine_item = spine.find_by_idref('chapter1')\n\n# Get formatted spine output\nprint(spine.to_xml())        # Formatted XML with syntax highlighting\nprint(spine.to_str())        # Raw XML content\n```\n\n### Content Extraction\n\nExtract content from specific documents within the EPUB:\n\n```python\n# Access content by manifest item ID\ntry:\n    content = doc.find_content_by_id('chapter1')\n    \n    # Get content in different formats\n    print(content.to_xml())      # Formatted XHTML with syntax highlighting\n    print(content.to_str())      # Raw XHTML content\n    print(content.to_plain())    # Plain text with HTML tags stripped\n    \n    # Access the parsed content tree for advanced processing\n    tree = content.tree\n    inner_text = content.inner_text\n    \nexcept ValueError as e:\n    print(f\"Content not found: {e}\")\n\n# Find publication resources by ID (for non-spine items)\ntry:\n    resource = doc.find_pub_resource_by_id('cover-image')\nexcept ValueError as e:\n    print(f\"Resource not found: {e}\")\n```\n\n### File Operations\n\nList and access files directly by their paths in the EPUB archive:\n\n```python\n# Get information about all files\nfiles_info = doc.get_files_info()\nfor file_info in files_info:\n    print(f\"Path: {file_info['path']}\")\n    print(f\"Size: {file_info['size']} bytes\")\n    print(f\"Compressed: {file_info['compressed_size']} bytes\")\n    print(f\"Modified: {file_info['modified']}\")\n\n# Access specific file by path\ntry:\n    # For XHTML files, returns XHTMLContent object\n    xhtml_content = doc.get_file_by_path('OEBPS/chapter1.xhtml')\n    print(xhtml_content.to_xml())\n    print(xhtml_content.to_plain())\n    \n    # For other files, returns raw string content\n    css_content = doc.get_file_by_path('OEBPS/styles/main.css')\n    print(css_content)\n    \nexcept ValueError as e:\n    print(f\"File not found: {e}\")\n```\n\n### Output Formatting Options\n\nAll document components support flexible output formatting:\n\n```python\n# Pretty-printed XML output\nprint(metadata.to_str(pretty_print=True))\nprint(manifest.to_xml(pretty_print=True))\n\n# Syntax highlighting can be controlled\nprint(package.to_xml(highlight_syntax=True))   # With highlighting (default)\nprint(package.to_xml(highlight_syntax=False))  # Without highlighting\n```\n\n## Industry Standards & Compliance\n\n`epub-utils` provides comprehensive support for industry-standard ePub specifications and related technologies, ensuring broad compatibility across the digital publishing ecosystem.\n\n### Supported EPUB Standards\n\n- **EPUB 2.0.1** (IDPF, 2010)\n  - Complete OPF 2.0 package document support\n  - NCX navigation control file support\n  - Dublin Core metadata extraction\n  - Legacy EPUB compatibility\n\n- **EPUB 3.0+** (IDPF/W3C, 2011-present)\n  - EPUB 3.3 specification compliance\n  - HTML5-based content documents\n  - Navigation document (nav.xhtml) support\n  - Enhanced accessibility features\n  - Media overlays and scripting support\n\n### Metadata Standards\n\n- **Dublin Core Metadata Initiative (DCMI)**\n  - Dublin Core Metadata Element Set v1.1\n  - Dublin Core Metadata Terms (DCTERMS)\n\n- **Open Packaging Format (OPF)**\n  - OPF 2.0 specification (EPUB 2.0.1)\n  - OPF 3.0 specification (EPUB 3.0+)\n\nThe library maintains strict adherence to published specifications while providing robust handling of real-world EPUB variations commonly found in commercial and open-source reading applications."
  },
  {
    "path": "docs/Makefile",
    "content": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the environment for the first two.\nSPHINXOPTS    ?=\nSPHINXBUILD   ?= sphinx-build\nSOURCEDIR     = .\nBUILDDIR      = _build\n\n# Put it first so that \"make\" without argument is like \"make help\".\nhelp:\n\t@$(SPHINXBUILD) -M help \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n.PHONY: help Makefile\n\n# Catch-all target: route all unknown targets to Sphinx using the new\n# \"make mode\" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).\n%: Makefile\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n"
  },
  {
    "path": "docs/api-reference.rst",
    "content": "API Reference\n=============\n\nThis section provides complete API documentation for all classes and methods in epub-utils.\n\nDocument Class\n--------------\n\n.. py:class:: Document(path)\n\n   Main class for working with EPUB files.\n\n   :param str path: Path to the EPUB file\n\n   **Example**:\n\n   .. code-block:: python\n\n      from epub_utils import Document\n      \n      doc = Document(\"book.epub\")\n      print(doc.package.metadata.title)\n\n   .. py:attribute:: container\n\n      Access to the container information.\n\n      :type: Container\n      :returns: Container object with container.xml information\n\n      **Example**:\n\n      .. code-block:: python\n\n         container = doc.container\n         print(f\"Package path: {container.rootfile_path}\")\n\n   .. py:attribute:: package\n\n      Access to the package (OPF) information.\n\n      :type: Package  \n      :returns: Package object with OPF file information\n\n      **Example**:\n\n      .. code-block:: python\n\n         package = doc.package\n         print(f\"Title: {package.metadata.title}\")\n\n   .. py:attribute:: toc\n\n      Access to the table of contents.\n\n      :type: TableOfContents\n      :returns: Table of contents object\n\n      **Example**:\n\n      .. code-block:: python\n\n         toc = doc.toc\n         toc_xml = toc.to_xml()\n\n   .. py:attribute:: ncx\n\n      Access to the NCX (Navigation Control for XML) table of contents.\n\n      :type: TableOfContents or None\n      :returns: NCX table of contents object for EPUB 2, or for EPUB 3 if NCX is present, None otherwise\n\n      **Example**:\n\n      .. code-block:: python\n\n         ncx = doc.ncx\n         if ncx:\n             ncx_xml = ncx.to_xml()\n\n      **Note**: For EPUB 2, this returns the same as ``toc``. For EPUB 3, this specifically \n      accesses the NCX file if present, which provides backward compatibility.\n\n   .. py:attribute:: nav\n\n      Access to the Navigation Document (EPUB 3 only).\n\n      :type: TableOfContents or None\n      :returns: Navigation Document table of contents object for EPUB 3, None for EPUB 2 or if not present\n\n      **Example**:\n\n      .. code-block:: python\n\n         nav = doc.nav\n         if nav:\n             nav_xml = nav.to_xml()\n\n      **Note**: This property specifically accesses EPUB 3 Navigation Documents. \n      Returns None for EPUB 2 documents.\n\n   .. py:method:: get_files_info()\n\n      Get detailed information about all files in the EPUB.\n\n      :returns: List of dictionaries containing file information\n      :rtype: List[Dict[str, Union[str, int]]]\n\n      Each dictionary contains:\n      - ``path`` (str): File path within the EPUB\n      - ``size`` (int): Uncompressed size in bytes  \n      - ``compressed_size`` (int): Compressed size in bytes\n      - ``modified`` (str): Last modified date in ISO format\n\n      **Example**:\n\n      .. code-block:: python\n\n         files = doc.get_files_info()\n         for file_info in files:\n             print(f\"{file_info['path']}: {file_info['size']} bytes\")\n\n   .. py:method:: list_files()\n\n      Get basic information about all files in the EPUB.\n\n      :returns: List of dictionaries with basic file information\n      :rtype: List[Dict[str, str]]\n\n      **Example**:\n\n      .. code-block:: python\n\n         files = doc.list_files()\n         print(f\"EPUB contains {len(files)} files\")\n\nContainer Class\n---------------\n\n.. py:class:: Container\n\n   Represents the META-INF/container.xml file information.\n\n   .. py:attribute:: rootfile_path\n\n      Path to the main package file within the EPUB.\n\n      :type: str\n\n   .. py:attribute:: rootfile_media_type\n\n      Media type of the main package file.\n\n      :type: str\n\n   .. py:method:: to_xml(highlight_syntax=True)\n\n      Get formatted XML representation.\n\n      :param bool highlight_syntax: Whether to apply syntax highlighting\n      :returns: Formatted XML string\n      :rtype: str\n\n   .. py:method:: to_str()\n\n      Get raw XML content.\n\n      :returns: Raw XML string\n      :rtype: str\n\nPackage Class\n-------------\n\n.. py:class:: Package\n\n   Represents the main OPF package file.\n\n   .. py:attribute:: metadata\n\n      Package metadata information.\n\n      :type: Metadata\n\n   .. py:attribute:: manifest\n\n      Package manifest information.\n\n      :type: Manifest\n\n   .. py:attribute:: spine\n\n      Package spine information.\n\n      :type: Spine\n\n   .. py:method:: to_xml(highlight_syntax=True)\n\n      Get formatted XML representation of the complete package.\n\n      :param bool highlight_syntax: Whether to apply syntax highlighting\n      :returns: Formatted XML string\n      :rtype: str\n\n   .. py:method:: to_str()\n\n      Get raw XML content of the complete package.\n\n      :returns: Raw XML string\n      :rtype: str\n\nMetadata Class\n--------------\n\n.. py:class:: Metadata\n\n   Represents Dublin Core and EPUB-specific metadata.\n\n   .. py:attribute:: title\n\n      Book title from dc:title element.\n\n      :type: str\n\n   .. py:attribute:: creator\n\n      Book author/creator from dc:creator element.\n\n      :type: str\n\n   .. py:attribute:: language\n\n      Language code from dc:language element.\n\n      :type: str\n\n   .. py:attribute:: identifier\n\n      Unique identifier from dc:identifier element.\n\n      :type: str\n\n   .. py:attribute:: publisher\n\n      Publisher from dc:publisher element.\n\n      :type: str\n\n   .. py:attribute:: date\n\n      Publication date from dc:date element.\n\n      :type: str\n\n   .. py:attribute:: subject\n\n      Subject/keywords from dc:subject element.\n\n      :type: str\n\n   .. py:attribute:: description\n\n      Description from dc:description element.\n\n      :type: str\n\n   .. py:attribute:: contributor\n\n      Contributor from dc:contributor element.\n\n      :type: str\n\n   .. py:attribute:: type\n\n      Resource type from dc:type element.\n\n      :type: str\n\n   .. py:attribute:: format\n\n      Format from dc:format element.\n\n      :type: str\n\n   .. py:attribute:: source\n\n      Source from dc:source element.\n\n      :type: str\n\n   .. py:attribute:: relation\n\n      Relation from dc:relation element.\n\n      :type: str\n\n   .. py:attribute:: coverage\n\n      Coverage from dc:coverage element.\n\n      :type: str\n\n   .. py:attribute:: rights\n\n      Rights information from dc:rights element.\n\n      :type: str\n\n   .. py:method:: __getattr__(name)\n\n      Dynamic attribute access for any metadata field.\n\n      :param str name: Metadata field name\n      :returns: Metadata value or empty string\n      :rtype: str\n\n      **Example**:\n\n      .. code-block:: python\n\n         # Access any metadata field\n         isbn = metadata.isbn if hasattr(metadata, 'isbn') else 'Not available'\n         series = getattr(metadata, 'series', 'Not available')\n\n   .. py:method:: to_xml(highlight_syntax=True)\n\n      Get formatted XML representation of metadata.\n\n      :param bool highlight_syntax: Whether to apply syntax highlighting\n      :returns: Formatted XML string\n      :rtype: str\n\n   .. py:method:: to_kv()\n\n      Get metadata as key-value pairs.\n\n      :returns: Key-value formatted string\n      :rtype: str\n\n      **Example**:\n\n      .. code-block:: python\n\n         kv_data = metadata.to_kv()\n         print(kv_data)\n         # Output:\n         # title: The Great Gatsby\n         # creator: F. Scott Fitzgerald\n         # language: en\n\n   .. py:method:: to_str()\n\n      Get raw XML content of metadata.\n\n      :returns: Raw XML string\n      :rtype: str\n\nManifest Class\n--------------\n\n.. py:class:: Manifest\n\n   Represents the package manifest section.\n\n   .. py:attribute:: items\n\n      Dictionary of manifest items.\n\n      :type: Dict[str, Dict[str, str]]\n\n      Each item contains:\n      - ``href``: File path\n      - ``media-type``: MIME type\n      - Other attributes as needed\n\n      **Example**:\n\n      .. code-block:: python\n\n         for item_id, item in manifest.items.items():\n             print(f\"ID: {item_id}\")\n             print(f\"  File: {item['href']}\")\n             print(f\"  Type: {item['media-type']}\")\n\n   .. py:method:: to_xml(highlight_syntax=True)\n\n      Get formatted XML representation.\n\n      :param bool highlight_syntax: Whether to apply syntax highlighting\n      :returns: Formatted XML string\n      :rtype: str\n\n   .. py:method:: to_str()\n\n      Get raw XML content.\n\n      :returns: Raw XML string\n      :rtype: str\n\nSpine Class\n-----------\n\n.. py:class:: Spine\n\n   Represents the package spine section.\n\n   .. py:attribute:: items\n\n      List of spine items in reading order.\n\n      :type: List[Dict[str, str]]\n\n      **Example**:\n\n      .. code-block:: python\n\n         for item in spine.items:\n             print(f\"Reading order item: {item}\")\n\n   .. py:method:: to_xml(highlight_syntax=True)\n\n      Get formatted XML representation.\n\n      :param bool highlight_syntax: Whether to apply syntax highlighting\n      :returns: Formatted XML string\n      :rtype: str\n\n   .. py:method:: to_str()\n\n      Get raw XML content.\n\n      :returns: Raw XML string\n      :rtype: str\n\nTableOfContents Class\n---------------------\n\n.. py:class:: TableOfContents\n\n   Represents the table of contents (NCX or Navigation Document).\n\n   .. py:method:: to_xml(highlight_syntax=True)\n\n      Get formatted XML representation.\n\n      :param bool highlight_syntax: Whether to apply syntax highlighting\n      :returns: Formatted XML string\n      :rtype: str\n\n   .. py:method:: to_str()\n\n      Get raw XML content.\n\n      :returns: Raw XML string\n      :rtype: str\n\nContent Classes\n---------------\n\n.. py:class:: Content\n\n   Base class for EPUB content documents.\n\n   .. py:method:: to_xml(highlight_syntax=True)\n\n      Get formatted content.\n\n      :param bool highlight_syntax: Whether to apply syntax highlighting\n      :returns: Formatted content string\n      :rtype: str\n\n   .. py:method:: to_str()\n\n      Get raw content.\n\n      :returns: Raw content string\n      :rtype: str\n\n.. py:class:: XHTMLContent\n\n   Specialized class for XHTML content documents.\n\n   Inherits from Content with additional XHTML-specific methods.\n\n   .. py:method:: to_plain()\n\n      Get plain text content with HTML tags stripped.\n\n      :returns: Plain text string\n      :rtype: str\n\n      **Example**:\n\n      .. code-block:: python\n\n         from epub_utils.content import XHTMLContent\n         \n         # This would typically be accessed through Document\n         # content = XHTMLContent(raw_html)\n         # plain_text = content.to_plain()\n\nException Classes\n-----------------\n\n.. py:exception:: ParseError\n\n   Raised when there's an error parsing EPUB content.\n\n   Base class: ``Exception``\n\n   **Example**:\n\n   .. code-block:: python\n\n      from epub_utils import Document\n      from epub_utils.exceptions import ParseError\n\n      try:\n          doc = Document(\"corrupted.epub\")\n          title = doc.package.metadata.title\n      except ParseError as e:\n          print(f\"Failed to parse EPUB: {e}\")\n      except FileNotFoundError:\n          print(\"EPUB file not found\")\n\nUsage Examples\n--------------\n\nBasic Usage\n~~~~~~~~~~~\n\n.. code-block:: python\n\n   from epub_utils import Document\n\n   # Load document\n   doc = Document(\"book.epub\")\n\n   # Access metadata\n   metadata = doc.package.metadata\n   print(f\"Title: {metadata.title}\")\n   print(f\"Author: {metadata.creator}\")\n\n   # Check file structure\n   files = doc.get_files_info()\n   print(f\"Contains {len(files)} files\")\n\n   # Get formatted output\n   toc_xml = doc.toc.to_xml()\n   metadata_kv = metadata.to_kv()\n\nError Handling\n~~~~~~~~~~~~~~\n\n.. code-block:: python\n\n   from epub_utils import Document\n   from epub_utils.exceptions import ParseError\n\n   def safe_load_epub(path):\n       try:\n           doc = Document(path)\n           return {\n               'status': 'success',\n               'document': doc,\n               'title': getattr(doc.package.metadata, 'title', 'Unknown')\n           }\n       except ParseError as e:\n           return {\n               'status': 'parse_error',\n               'error': str(e)\n           }\n       except FileNotFoundError:\n           return {\n               'status': 'file_not_found',\n               'error': 'EPUB file not found'\n           }\n       except Exception as e:\n           return {\n               'status': 'unknown_error', \n               'error': str(e)\n           }\n\nBatch Processing\n~~~~~~~~~~~~~~~~\n\n.. code-block:: python\n\n   import os\n   from pathlib import Path\n   from epub_utils import Document\n\n   def process_epub_directory(directory):\n       epub_files = Path(directory).glob(\"*.epub\")\n       results = []\n       \n       for epub_path in epub_files:\n           try:\n               doc = Document(str(epub_path))\n               metadata = doc.package.metadata\n               \n               result = {\n                   'file': epub_path.name,\n                   'title': getattr(metadata, 'title', ''),\n                   'author': getattr(metadata, 'creator', ''),\n                   'language': getattr(metadata, 'language', ''),\n                   'file_size': epub_path.stat().st_size,\n                   'epub_files': len(doc.get_files_info())\n               }\n               results.append(result)\n               \n           except Exception as e:\n               results.append({\n                   'file': epub_path.name,\n                   'error': str(e)\n               })\n       \n       return results\n\nType Hints\n----------\n\nFor better IDE support and type checking, here are the main type hints:\n\n.. code-block:: python\n\n   from typing import Dict, List, Union, Optional\n   from epub_utils import Document\n\n   # Function signatures for reference\n   def get_files_info(self) -> List[Dict[str, Union[str, int]]]: ...\n   def list_files(self) -> List[Dict[str, str]]: ...\n   def to_xml(self, highlight_syntax: bool = True) -> str: ...\n   def to_str(self) -> str: ...\n   def to_kv(self) -> str: ...\n\n   # Type-safe usage example\n   doc: Document = Document(\"book.epub\")\n   files_info: List[Dict[str, Union[str, int]]] = doc.get_files_info()\n   title: str = doc.package.metadata.title\n   kv_data: str = doc.package.metadata.to_kv()\n\nModule Structure\n----------------\n\nThe ``epub-utils`` package is organized as follows:\n\n.. code-block:: text\n\n   epub_utils/\n   ├── __init__.py          # Main exports (Document, Container)\n   ├── doc.py               # Document class\n   ├── container.py         # Container class\n   ├── package/\n   │   ├── __init__.py      # Package class\n   │   ├── metadata.py      # Metadata class\n   │   ├── manifest.py      # Manifest class\n   │   └── spine.py         # Spine class\n   ├── content/\n   │   ├── __init__.py      # Content classes\n   │   ├── base.py          # Base Content class\n   │   └── xhtml.py         # XHTMLContent class\n   ├── toc.py               # TableOfContents class\n   ├── exceptions.py        # Exception classes\n   ├── highlighters.py      # Syntax highlighting utilities\n   └── cli.py               # Command-line interface\n\nFor detailed implementation examples, see :doc:`api-tutorial` and :doc:`examples`.\n"
  },
  {
    "path": "docs/api-tutorial.rst",
    "content": "Use as a Python library\n=======================\n\nThis guide covers using ``epub-utils`` as a Python library. The API is designed to be intuitive \nand follows Python best practices for ease of use and integration into your projects.\n\nQuick Start\n-----------\n\nThe main entry point is the ``Document`` class:\n\n.. code-block:: python\n\n   from epub_utils import Document\n\n   # Load an EPUB file\n   doc = Document(\"path/to/book.epub\")\n\n   # Access various components\n   print(f\"Title: {doc.package.metadata.title}\")\n   print(f\"Author: {doc.package.metadata.creator}\")\n\nCore Classes\n------------\n\nDocument Class\n~~~~~~~~~~~~~~\n\nThe ``Document`` class is your main interface to an EPUB file:\n\n.. code-block:: python\n\n   from epub_utils import Document\n\n   doc = Document(\"example.epub\")\n\n   # Access major components\n   container = doc.container      # Container information\n   package = doc.package         # Package/OPF file\n   toc = doc.toc                 # Table of contents\n   \n   # Get file information\n   files_info = doc.get_files_info()\n\n**Key Methods**:\n\n- ``get_files_info()``: Returns detailed information about all files in the EPUB\n- ``list_files()``: Returns a simple list of files with basic metadata\n\nContainer Access\n~~~~~~~~~~~~~~~~\n\nThe container provides information from the META-INF/container.xml file:\n\n.. code-block:: python\n\n   # Access container properties\n   print(f\"Package path: {doc.container.rootfile_path}\")\n   print(f\"Media type: {doc.container.rootfile_media_type}\")\n\n   # Get raw XML\n   container_xml = doc.container.to_xml()\n   raw_container = doc.container.to_str()\n\nPackage and Metadata\n~~~~~~~~~~~~~~~~~~~~~\n\nThe package object gives you access to the main OPF file and its metadata:\n\n.. code-block:: python\n\n   package = doc.package\n\n   # Access metadata\n   metadata = package.metadata\n   print(f\"Title: {metadata.title}\")\n   print(f\"Author: {metadata.creator}\")\n   print(f\"Language: {metadata.language}\")\n   print(f\"Identifier: {metadata.identifier}\")\n   print(f\"Publisher: {metadata.publisher}\")\n\n   # Get all metadata as key-value pairs\n   kv_metadata = metadata.to_kv()\n   print(kv_metadata)\n\n   # Access manifest and spine\n   manifest = package.manifest\n   spine = package.spine\n\nWorking with Metadata\n----------------------\n\nExtracting Common Fields\n~~~~~~~~~~~~~~~~~~~~~~~~~\n\nThe metadata object provides easy access to Dublin Core and EPUB-specific metadata:\n\n.. code-block:: python\n\n   metadata = doc.package.metadata\n\n   # Basic Dublin Core elements\n   title = metadata.title\n   creator = metadata.creator  # Usually the author\n   subject = metadata.subject  # Keywords/topics\n   description = metadata.description\n   publisher = metadata.publisher\n   contributor = metadata.contributor\n   date = metadata.date\n   type = metadata.type\n   format = metadata.format\n   identifier = metadata.identifier\n   source = metadata.source\n   language = metadata.language\n   relation = metadata.relation\n   coverage = metadata.coverage\n   rights = metadata.rights\n\nDynamic Attribute Access\n~~~~~~~~~~~~~~~~~~~~~~~~\n\nThe metadata object supports dynamic attribute access for any metadata field:\n\n.. code-block:: python\n\n   # Access any metadata field by name\n   isbn = getattr(metadata, 'isbn', 'Not available')\n   series = getattr(metadata, 'series', 'Not available')\n\n   # Or use the more direct approach\n   try:\n       custom_field = metadata.custom_metadata_field\n   except AttributeError:\n       custom_field = \"Field not found\"\n\nFormatted Output\n~~~~~~~~~~~~~~~~\n\nGet metadata in different formats:\n\n.. code-block:: python\n\n   # XML format with syntax highlighting\n   xml_metadata = metadata.to_xml(highlight_syntax=True)\n\n   # Raw XML without highlighting\n   raw_xml = metadata.to_xml(highlight_syntax=False)\n\n   # Key-value format for easy parsing\n   kv_format = metadata.to_kv()\n\nManifest and Spine\n-------------------\n\nWorking with the Manifest\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nThe manifest lists all files in the EPUB package:\n\n.. code-block:: python\n\n   manifest = doc.package.manifest\n\n   # Get all items\n   items = manifest.items  # Dictionary of manifest items\n\n   # Find specific items\n   for item_id, item in items.items():\n       print(f\"ID: {item_id}\")\n       print(f\"  File: {item['href']}\")\n       print(f\"  Type: {item['media-type']}\")\n\n   # Get formatted output\n   manifest_xml = manifest.to_xml()\n\nUnderstanding the Spine\n~~~~~~~~~~~~~~~~~~~~~~~~\n\nThe spine defines the reading order:\n\n.. code-block:: python\n\n   spine = doc.package.spine\n\n   # Get spine items in reading order\n   spine_items = spine.items\n\n   # Get formatted output\n   spine_xml = spine.to_xml()\n\nTable of Contents\n-----------------\n\nWorking with TOC\n~~~~~~~~~~~~~~~~\n\nAccess the table of contents (either NCX or Navigation Document):\n\n.. code-block:: python\n\n   toc = doc.toc\n\n   # Get formatted TOC\n   toc_xml = toc.to_xml()\n   raw_toc = toc.to_str()\n\nSpecific TOC Access\n~~~~~~~~~~~~~~~~~~~\n\nFor fine-grained control over which table of contents format to access:\n\n.. code-block:: python\n\n   # Access NCX specifically (EPUB 2 or EPUB 3 with NCX)\n   ncx = doc.ncx\n   if ncx:\n       ncx_xml = ncx.to_xml()\n       print(\"NCX navigation available\")\n   else:\n       print(\"No NCX navigation found\")\n\n   # Access Navigation Document specifically (EPUB 3 only)\n   nav = doc.nav\n   if nav:\n       nav_xml = nav.to_xml()\n       print(\"Navigation Document available\")\n   else:\n       print(\"No Navigation Document found (likely EPUB 2)\")\n\n   # Handle different EPUB versions\n   package = doc.package\n   if package.version.major >= 3:\n       # EPUB 3 - prefer Navigation Document, fallback to NCX\n       nav_doc = doc.nav or doc.ncx\n   else:\n       # EPUB 2 - use NCX\n       nav_doc = doc.ncx\n\n   if nav_doc:\n       print(\"Table of contents found:\", nav_doc.to_str()[:100])\n\nContent Extraction\n------------------\n\nAccessing Document Content\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nExtract content from specific documents within the EPUB:\n\n.. code-block:: python\n\n   # First, find content IDs from the manifest\n   manifest = doc.package.manifest\n   content_items = {\n       item_id: item for item_id, item in manifest.items.items()\n       if item['media-type'] == 'application/xhtml+xml'\n   }\n\n   # Access content by ID\n   for content_id in content_items:\n       try:\n           content = doc.get_content(content_id)\n           # Process content as needed\n           print(f\"Content ID {content_id}: {len(content)} characters\")\n       except Exception as e:\n           print(f\"Could not access content {content_id}: {e}\")\n\nFile Information\n----------------\n\nDetailed File Analysis\n~~~~~~~~~~~~~~~~~~~~~~\n\nGet comprehensive information about all files in the EPUB:\n\n.. code-block:: python\n\n   files_info = doc.get_files_info()\n\n   for file_info in files_info:\n       print(f\"Path: {file_info['path']}\")\n       print(f\"Size: {file_info['size']} bytes\")\n       print(f\"Compressed: {file_info['compressed_size']} bytes\")\n       print(f\"Modified: {file_info['modified']}\")\n       print(\"---\")\n\n   # Calculate total size\n   total_size = sum(f['size'] for f in files_info)\n   total_compressed = sum(f['compressed_size'] for f in files_info)\n   compression_ratio = (1 - total_compressed / total_size) * 100\n   \n   print(f\"Total size: {total_size} bytes\")\n   print(f\"Compressed size: {total_compressed} bytes\")\n   print(f\"Compression ratio: {compression_ratio:.1f}%\")\n\nError Handling\n--------------\n\nRobust Error Handling\n~~~~~~~~~~~~~~~~~~~~~~\n\nepub-utils provides specific exception types for better error handling:\n\n.. code-block:: python\n\n   from epub_utils import Document\n   from epub_utils.exceptions import ParseError\n\n   try:\n       doc = Document(\"potentially_corrupt.epub\")\n       \n       # Try to access metadata\n       title = doc.package.metadata.title\n       print(f\"Successfully loaded: {title}\")\n       \n   except ParseError as e:\n       print(f\"EPUB parsing error: {e}\")\n   except FileNotFoundError:\n       print(\"EPUB file not found\")\n   except Exception as e:\n       print(f\"Unexpected error: {e}\")\n\nGraceful Degradation\n~~~~~~~~~~~~~~~~~~~~\n\nHandle missing or malformed metadata gracefully:\n\n.. code-block:: python\n\n   def safe_get_metadata(doc, field_name, default=\"Unknown\"):\n       \"\"\"Safely extract metadata field with fallback.\"\"\"\n       try:\n           return getattr(doc.package.metadata, field_name, default)\n       except (AttributeError, ParseError):\n           return default\n\n   # Usage\n   title = safe_get_metadata(doc, 'title', 'Untitled')\n   author = safe_get_metadata(doc, 'creator', 'Unknown Author')\n\nNext Steps\n----------\n\n- Explore the complete :doc:`api-reference` for detailed class documentation\n- See more :doc:`examples` for advanced use cases\n- Learn about :doc:`epub-standards` to understand the underlying specifications\n- Check out the :doc:`cli-reference` for command-line equivalents\n"
  },
  {
    "path": "docs/changelog.rst",
    "content": ".. _changelog:\n\n=========\nChangelog\n=========\n\n.. _v_0_1_0a1:\n\n0.1.0a1 (2025-06-14)\n--------------------\n\n* Added `toc` retrieval as dictionary (:issue:`4`)\n* Added Comprehensive navigation reading support (`#38 <https://github.com/ernestofgonzalez/epub-utils/pull/38>`__, `#39 <https://github.com/ernestofgonzalez/epub-utils/pull/39>`__, `#42 <https://github.com/ernestofgonzalez/epub-utils/pull/42>`__)\n* Added MacOS test runner (`#41 <https://github.com/ernestofgonzalez/epub-utils/pull/41>`__)\n* Added support for Python 3.8 and Python 3.9 (`#40 <https://github.com/ernestofgonzalez/epub-utils/pull/40>`__)\n\n.. _v_0_0_0a5:\n\n0.0.0a5 (2025-06-01)\n--------------------\n\n* Added file retrieval by file path. (:issue:`22`)\n* Added pretty printing to XML inspection (:issue:`23`)\n\n.. _v_0_0_0a4:\n\n0.0.0a4 (2025-05-26)\n--------------------\n\n* Added file inspection and ``files`` CLI command. (`#20 <https://github.com/ernestofgonzalez/epub-utils/pull/20>`__)\n* Added content inspection and ``content`` CLI command (:issue:`5`)\n* Added manifest parsing and ``manifest`` CLI command (`#13 <https://github.com/ernestofgonzalez/epub-utils/pull/13>`__)\n* Added spine parsing and ``spine`` CLI command (`#9 <https://github.com/ernestofgonzalez/epub-utils/pull/9>`__)\n* Added Key-value support for ``metadata`` CLI command \n* Fixed table of contents parsing for OEBPS 1 (`#11 <https://github.com/ernestofgonzalez/epub-utils/pull/11>`__). Thanks, `Christian Klein <https://github.com/cklein>`__.\n\n.. _v_0_0_0a3:\n\n0.0.0a3 (2025-05-04)\n--------------------\n\n* Fixed `toc` command. (:issue:`1`)\n\n.. _v_0_0_0a2:\n\n0.0.0a2 (2025-05-03)\n--------------------\n\n* Added classifiers\n\n.. _v_0_0_0a1:\n\n0.0.0a1 (2025-05-03)\n--------------------\n\n* Initial relese to PyPI"
  },
  {
    "path": "docs/cli-reference.rst",
    "content": "CLI Reference\n=============\n\nThis reference documents all available command-line options and commands for ``epub-utils``.\n\nSynopsis\n--------\n\n.. code-block:: text\n\n   epub-utils [GLOBAL_OPTIONS] EPUB_FILE COMMAND [COMMAND_OPTIONS]\n\nGlobal Options\n--------------\n\n``-h, --help``\n   Show help message and exit\n\n``-v, --version``\n   Show program version and exit\n\n``-pp, --pretty-print``\n   Pretty-print XML output with proper indentation (applies to xml and raw formats only)\n\nCommands\n--------\n\nAll commands operate on an EPUB file and support the ``--format`` and ``--pretty-print`` options unless otherwise noted.\n\ncontainer\n~~~~~~~~~\n\nDisplay the container.xml file contents.\n\n**Syntax**:\n\n.. code-block:: bash\n\n   epub-utils EPUB_FILE container [--format FORMAT] [--pretty-print]\n\n**Description**:\nThe container command shows the contents of META-INF/container.xml, which defines the \nlocation of the main package file within the EPUB.\n\n**Supported formats**: ``xml`` (default), ``raw``\n\n**Examples**:\n\n.. code-block:: bash\n\n   # Show container with syntax highlighting\n   epub-utils book.epub container\n\n   # Show raw container XML\n   epub-utils book.epub container --format raw\n   \n   # Show container with pretty formatting\n   epub-utils book.epub container --pretty-print\n   \n   # Combine both options\n   epub-utils book.epub container --format raw --pretty-print\n   epub-utils book.epub container --format raw\n\n**Sample output**:\n\n.. code-block:: xml\n\n   <?xml version=\"1.0\" encoding=\"UTF-8\"?>\n   <container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n     <rootfiles>\n       <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n     </rootfiles>\n   </container>\n\npackage\n~~~~~~~\n\nDisplay the main package (OPF) file contents.\n\n**Syntax**:\n\n.. code-block:: bash\n\n   epub-utils EPUB_FILE package [--format FORMAT] [--pretty-print]\n\n**Description**:\nThe package command shows the complete OPF (Open Packaging Format) file, which contains \nmetadata, manifest, and spine information.\n\n**Supported formats**: ``xml`` (default), ``raw``\n\n**Examples**:\n\n.. code-block:: bash\n\n   # Show package with syntax highlighting\n   epub-utils book.epub package\n\n   # Show raw package XML for processing\n   epub-utils book.epub package --format raw | xmllint --format -\n   \n   # Show package with pretty formatting\n   epub-utils book.epub package --pretty-print\n\ntoc\n~~~\n\nDisplay the table of contents file.\n\n**Syntax**:\n\n.. code-block:: bash\n\n   epub-utils EPUB_FILE toc [--format FORMAT] [--pretty-print] [--ncx | --nav]\n\n**Description**:\nShows the table of contents, which can be either an NCX file (EPUB 2.x) or a \nNavigation Document (EPUB 3.x). By default, automatically detects and uses the \nappropriate format for the EPUB version.\n\n**Options**:\n\n``--ncx``\n   Force retrieval of NCX file (EPUB 2 navigation control file). For EPUB 2, \n   this is the same as the default behavior. For EPUB 3, this specifically \n   accesses the NCX file if present for backward compatibility.\n\n``--nav``\n   Force retrieval of Navigation Document (EPUB 3 navigation file). Only works \n   with EPUB 3 documents that have a Navigation Document.\n\n**Note**: The ``--ncx`` and ``--nav`` flags are mutually exclusive.\n\n**Supported formats**: ``xml`` (default), ``raw``\n\n**Examples**:\n\n.. code-block:: bash\n\n   # Show TOC with highlighting (auto-detect format)\n   epub-utils book.epub toc\n\n   # Extract navigation structure\n   epub-utils book.epub toc --format raw\n   \n   # Show TOC with pretty formatting\n   epub-utils book.epub toc --pretty-print\n\n   # Force NCX format (EPUB 2 style)\n   epub-utils book.epub toc --ncx\n\n   # Force Navigation Document (EPUB 3 style)\n   epub-utils book.epub toc --nav\n\nmetadata\n~~~~~~~~\n\nDisplay metadata information from the package file.\n\n**Syntax**:\n\n.. code-block:: bash\n\n   epub-utils EPUB_FILE metadata [--format FORMAT] [--pretty-print]\n\n**Description**:\nExtracts and displays Dublin Core and EPUB-specific metadata from the package file.\n\n**Supported formats**: ``xml`` (default), ``raw``, ``kv``\n\n**Examples**:\n\n.. code-block:: bash\n\n   # Show formatted metadata\n   epub-utils book.epub metadata\n\n   # Get key-value pairs for scripting\n   epub-utils book.epub metadata --format kv\n\n   # Raw metadata XML\n   epub-utils book.epub metadata --format raw\n   \n   # Show metadata with pretty formatting\n   epub-utils book.epub metadata --pretty-print\n\n**Key-value output format**:\n\n.. code-block:: text\n\n   title: The Great Gatsby\n   creator: F. Scott Fitzgerald\n   language: en\n   identifier: urn:uuid:12345678-1234-1234-1234-123456789abc\n   publisher: Scribner\n   date: 2021-01-01\n   subject: Fiction, Classic Literature\n\nmanifest\n~~~~~~~~\n\nDisplay the manifest section from the package file.\n\n**Syntax**:\n\n.. code-block:: bash\n\n   epub-utils EPUB_FILE manifest [--format FORMAT] [--pretty-print]\n\n**Description**:\nShows the manifest, which lists all files included in the EPUB package with their \nIDs, file paths, and media types.\n\n**Supported formats**: ``xml`` (default), ``raw``\n\n**Examples**:\n\n.. code-block:: bash\n\n   # Show manifest with highlighting\n   epub-utils book.epub manifest\n\n   # Find all CSS files\n   epub-utils book.epub manifest --format raw | grep 'media-type=\"text/css\"'\n   \n   # Show manifest with pretty formatting\n   epub-utils book.epub manifest --pretty-print\n   epub-utils book.epub manifest --format raw | grep 'media-type=\"text/css\"'\n\n   # Count content files\n   epub-utils book.epub manifest --format raw | grep -c 'application/xhtml+xml'\n\nspine\n~~~~~\n\nDisplay the spine section from the package file.\n\n**Syntax**:\n\n.. code-block:: bash\n\n   epub-utils EPUB_FILE spine [--format FORMAT] [--pretty-print]\n\n**Description**:\nShows the spine, which defines the default reading order of the book's content.\n\n**Supported formats**: ``xml`` (default), ``raw``\n\n**Examples**:\n\n.. code-block:: bash\n\n   # Show spine with highlighting\n   epub-utils book.epub spine\n\n   # Extract reading order\n   epub-utils book.epub spine --format raw\n   \n   # Show spine with pretty formatting\n   epub-utils book.epub spine --pretty-print\n\ncontent\n~~~~~~~\n\nDisplay the content of a document by its manifest item ID.\n\n**Syntax**:\n\n.. code-block:: bash\n\n   epub-utils EPUB_FILE content ITEM_ID [--format FORMAT] [--pretty-print]\n\n**Description**:\nExtracts and displays the content of a specific document within the EPUB, identified \nby its manifest item ID.\n\n**Supported formats**: ``xml`` (default), ``raw``, ``plain``\n\n**Arguments**:\n- ``ITEM_ID``: The ID of the item as defined in the manifest\n\n**Examples**:\n\n.. code-block:: bash\n\n   # Show content with syntax highlighting\n   epub-utils book.epub content chapter1\n\n   # Get raw HTML/XHTML\n   epub-utils book.epub content intro --format raw\n\n   # Extract plain text (no HTML tags)\n   epub-utils book.epub content chapter2 --format plain\n   \n   # Show content with pretty formatting\n   epub-utils book.epub content chapter1 --pretty-print\n\n**Finding item IDs**:\n\n.. code-block:: bash\n\n   # First check the manifest for available IDs\n   epub-utils book.epub manifest | grep 'id='\n\n   # Then extract specific content\n   epub-utils book.epub content found_id --format plain\n\nfiles\n~~~~~\n\nList all files in the EPUB archive with metadata, or display content of a specific file.\n\n**Syntax**:\n\n.. code-block:: bash\n\n   epub-utils EPUB_FILE files [FILE_PATH] [--format FORMAT] [--pretty-print]\n\n**Description**:\nWhen used without a file path, provides detailed information about all files contained \nwithin the EPUB archive, including sizes, compression ratios, and modification dates.\n\nWhen used with a file path, displays the content of the specified file within the EPUB archive.\n\n**Supported formats**: \n\n- For file listing: ``table`` (default), ``raw``\n- For file content: ``raw``, ``xml`` (default), ``plain``, ``kv``\n\n**Arguments**:\n- ``FILE_PATH`` (optional): Path to a specific file within the EPUB archive\n\n**Supported formats**: ``table`` (default), ``raw``\n\n**Examples**:\n\n.. code-block:: bash\n\n   # List all files in table format (default)\n   epub-utils book.epub files\n\n   # Get simple file list\n   epub-utils book.epub files --format raw\n\n   # Count total files\n   epub-utils book.epub files --format raw | wc -l\n\n   # Display content of a specific XHTML file\n   epub-utils book.epub files OEBPS/chapter1.xhtml\n\n   # Display XHTML file in different formats\n   epub-utils book.epub files OEBPS/chapter1.xhtml --format raw\n   epub-utils book.epub files OEBPS/chapter1.xhtml --format xml --pretty-print\n   epub-utils book.epub files OEBPS/chapter1.xhtml --format plain\n\n   # Display non-XHTML files (CSS, etc.)\n   epub-utils book.epub files OEBPS/styles/main.css\n\n**Key differences from content command**:\n\n- ``files`` uses file paths within the EPUB archive\n- ``content`` uses manifest item IDs\n- ``files`` can access any file, including CSS, XML, and image files\n- ``content`` only accesses files listed in the manifest\n\n**Sample table output**:\n\n.. code-block:: text\n\n   File Information for book.epub\n   ┌────────────────────────────────────────┬──────────┬──────────────┬─────────────────────┐\n   │ Path                                   │ Size     │ Compressed   │ Modified            │\n   ├────────────────────────────────────────┼──────────┼──────────────┼─────────────────────┤\n   │ META-INF/container.xml                 │ 230 B    │ 140 B        │ 2021-01-01 10:00:00│\n   │ OEBPS/content.opf                      │ 2.1 KB   │ 856 B        │ 2021-01-01 10:00:00│\n   │ OEBPS/Text/chapter01.xhtml             │ 12.4 KB  │ 3.2 KB       │ 2021-01-01 10:00:00│\n   └────────────────────────────────────────┴──────────┴──────────────┴─────────────────────┘\n\nFormat Options\n--------------\n\nMost commands support the ``--format`` and ``--pretty-print`` options to control output formatting:\n\n``xml`` (default for most commands)\n   Syntax-highlighted, formatted XML output\n\n``raw``\n   Unformatted content exactly as stored in the EPUB\n\n``kv`` (metadata command only)\n   Key-value pairs suitable for shell scripting\n\n``plain`` (content command only)\n   Plain text with HTML tags stripped\n\n``table`` (files command only)\n   Formatted table with aligned columns\n\nPretty Print Option\n~~~~~~~~~~~~~~~~~~~\n\nThe ``--pretty-print`` (or ``-pp``) option formats XML output with proper indentation and structure:\n\n.. code-block:: bash\n\n   # Default output (with syntax highlighting but compact)\n   epub-utils book.epub metadata\n   \n   # Pretty-printed output (with proper indentation)\n   epub-utils book.epub metadata --pretty-print\n   \n   # Combine with raw format for clean, formatted XML\n   epub-utils book.epub package --format raw --pretty-print\n\n**Note**: The pretty-print option applies to both ``xml`` and ``raw`` formats, but has no effect on ``kv``, ``plain``, or ``table`` formats.\n\nExit Codes\n----------\n\nepub-utils uses standard exit codes:\n\n- ``0``: Success\n- ``1``: General error (file not found, invalid EPUB, etc.)\n- ``2``: Command line usage error\n\nExamples can check exit codes for error handling:\n\n.. code-block:: bash\n\n   if epub-utils book.epub metadata >/dev/null 2>&1; then\n       echo \"EPUB is valid\"\n   else\n       echo \"EPUB has issues\"\n   fi\n\nEnvironment Variables\n---------------------\n\nepub-utils respects these environment variables:\n\n``NO_COLOR``\n   Disable color output when set to any value\n\n``FORCE_COLOR``\n   Force color output even when not outputting to a terminal\n\n**Examples**:\n\n.. code-block:: bash\n\n   # Disable colors\n   NO_COLOR=1 epub-utils book.epub metadata\n\n   # Force colors in pipes\n   FORCE_COLOR=1 epub-utils book.epub metadata | less -R\n\nCommon Usage Patterns\n---------------------\n\nValidation Workflow\n~~~~~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n   #!/bin/zsh\n   # validate-epub.sh - Basic EPUB validation\n\n   epub_file=\"$1\"\n\n   echo \"Validating: $epub_file\"\n\n   # Check container\n   if ! epub-utils \"$epub_file\" container >/dev/null 2>&1; then\n       echo \"❌ Invalid container\"\n       exit 1\n   fi\n\n   # Check package\n   if ! epub-utils \"$epub_file\" package >/dev/null 2>&1; then\n       echo \"❌ Invalid package\"\n       exit 1\n   fi\n\n   # Check required metadata\n   metadata=$(epub-utils \"$epub_file\" metadata --format kv 2>/dev/null)\n   if ! echo \"$metadata\" | grep -q \"^title:\"; then\n       echo \"⚠️  Missing title\"\n   fi\n\n   if ! echo \"$metadata\" | grep -q \"^creator:\"; then\n       echo \"⚠️  Missing author\"\n   fi\n\n   echo \"✅ EPUB structure is valid\"\n\nMetadata Extraction\n~~~~~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n   #!/bin/zsh\n   # extract-metadata.sh - Extract metadata to CSV\n\n   echo \"filename,title,author,language,publisher\" > metadata.csv\n\n   for epub in *.epub; do\n       if [[ -f \"$epub\" ]]; then\n           metadata=$(epub-utils \"$epub\" metadata --format kv 2>/dev/null)\n           \n           title=$(echo \"$metadata\" | grep \"^title:\" | cut -d' ' -f2- | tr ',' ';')\n           author=$(echo \"$metadata\" | grep \"^creator:\" | cut -d' ' -f2- | tr ',' ';')\n           language=$(echo \"$metadata\" | grep \"^language:\" | cut -d' ' -f2-)\n           publisher=$(echo \"$metadata\" | grep \"^publisher:\" | cut -d' ' -f2- | tr ',' ';')\n           \n           echo \"$epub,$title,$author,$language,$publisher\" >> metadata.csv\n       fi\n   done\n\nContent Analysis\n~~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n   #!/bin/zsh\n   # analyze-content.sh - Analyze EPUB content structure\n\n   epub_file=\"$1\"\n\n   echo \"Content Analysis for: $epub_file\"\n   echo \"==================================\"\n\n   # Get content files from manifest\n   content_ids=$(epub-utils \"$epub_file\" manifest --format raw | \\\n                grep 'media-type=\"application/xhtml+xml\"' | \\\n                sed 's/.*id=\"\\([^\"]*\\)\".*/\\1/')\n\n   total_words=0\n\n   for content_id in $content_ids; do\n       if word_count=$(epub-utils \"$epub_file\" content \"$content_id\" --format plain 2>/dev/null | wc -w); then\n           echo \"Content ID '$content_id': $word_count words\"\n           total_words=$((total_words + word_count))\n       fi\n   done\n\n   echo \"==================================\"\n   echo \"Total words: $total_words\"\n\nError Handling\n--------------\n\nAlways handle errors when using epub-utils in scripts:\n\n.. code-block:: bash\n\n   # Check if file exists first\n   if [[ ! -f \"$epub_file\" ]]; then\n       echo \"Error: File '$epub_file' not found\" >&2\n       exit 1\n   fi\n\n   # Capture and handle command errors\n   if ! output=$(epub-utils \"$epub_file\" metadata --format kv 2>&1); then\n       echo \"Error processing EPUB: $output\" >&2\n       exit 1\n   fi\n\n   # Check for specific issues\n   if [[ -z \"$output\" ]]; then\n       echo \"Warning: No metadata found\" >&2\n   fi\n\nPerformance Tips\n----------------\n\n1. **Use raw format for large-scale processing** to avoid syntax highlighting overhead\n2. **Pipe efficiently** to avoid unnecessary intermediate files\n3. **Process files in parallel** when handling many EPUBs\n4. **Cache results** when running the same command multiple times\n\n.. code-block:: bash\n\n   # Efficient parallel processing\n   find . -name \"*.epub\" | xargs -n 1 -P 4 -I {} \\\n       zsh -c 'echo \"{}: $(epub-utils \"{}\" metadata --format kv | grep \"^title:\" | cut -d\" \" -f2-)\"'\n\nTroubleshooting\n---------------\n\nCommon Issues and Solutions\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n**\"Invalid value for 'PATH': File does not exist\"**\n   Check the file path and ensure the EPUB file exists.\n\n**\"ParseError: Unable to parse container.xml\"**\n   The EPUB file may be corrupted. Verify it's a valid ZIP file.\n\n**\"Content with id 'X' not found\"**\n   Check available content IDs using the manifest command first.\n\n**No color output**\n   Ensure your terminal supports colors and check the ``NO_COLOR`` environment variable.\n\n**Large file performance**\n   Use ``--format raw`` for better performance with large files.\n"
  },
  {
    "path": "docs/cli-tutorial.rst",
    "content": "Use as a command-line tool\n==========================\n\nThis tutorial will guide you through using ``epub-utils`` from the command line. We'll cover all \navailable commands with practical examples and tips for everyday usage.\n\nGetting Started\n---------------\n\nThe basic syntax for epub-utils is:\n\n.. code-block:: bash\n\n   epub-utils [OPTIONS] EPUB_FILE COMMAND [COMMAND_OPTIONS]\n\nLet's start with a simple example:\n\n.. code-block:: bash\n\n   # Display help\n   epub-utils --help\n\n   # Check version\n   epub-utils --version\n\nBasic File Inspection\n---------------------\n\nContainer Information\n~~~~~~~~~~~~~~~~~~~~~\n\nThe container command shows the EPUB's container.xml file, which points to the main package file:\n\n.. code-block:: bash\n\n   # Show container with syntax highlighting (default)\n   epub-utils book.epub container\n\n   # Show raw XML without highlighting\n   epub-utils book.epub container --format raw\n   \n   # Show container with pretty formatting\n   epub-utils book.epub container --pretty-print\n\n**Example output**:\n\n.. code-block:: xml\n\n   <?xml version=\"1.0\" encoding=\"UTF-8\"?>\n   <container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n     <rootfiles>\n       <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n     </rootfiles>\n   </container>\n\nPackage Information\n~~~~~~~~~~~~~~~~~~~\n\nThe package command displays the main OPF (Open Packaging Format) file:\n\n.. code-block:: bash\n\n   # Show package file with highlighting\n   epub-utils book.epub package\n\n   # Show raw package content\n   epub-utils book.epub package --format raw\n   \n   # Show package with pretty formatting\n   epub-utils book.epub package --pretty-print\n\nThis reveals the complete EPUB structure including metadata, manifest, and spine.\n\nWorking with Metadata\n----------------------\n\nExtracting Metadata\n~~~~~~~~~~~~~~~~~~~~\n\nThe metadata command is perfect for getting book information:\n\n.. code-block:: bash\n\n   # Pretty-printed metadata with highlighting\n   epub-utils book.epub metadata\n\n   # Key-value format for scripting\n   epub-utils book.epub metadata --format kv\n   \n   # Metadata with pretty formatting\n   epub-utils book.epub metadata --pretty-print\n\n**Example key-value output**:\n\n.. code-block:: text\n\n   title: The Great Gatsby\n   creator: F. Scott Fitzgerald\n   language: en\n   identifier: urn:uuid:12345678-1234-1234-1234-123456789abc\n   publisher: Scribner\n   date: 2021-01-01\n   subject: Fiction, Classic Literature\n\nScripting with Metadata\n~~~~~~~~~~~~~~~~~~~~~~~~\n\nThe key-value format is perfect for shell scripting:\n\n.. code-block:: bash\n\n   # Extract just the title\n   epub-utils book.epub metadata --format kv | grep \"^title:\" | cut -d' ' -f2-\n\n   # Get author name\n   author=$(epub-utils book.epub metadata --format kv | grep \"^creator:\" | cut -d' ' -f2-)\n   echo \"Author: $author\"\n\n   # Batch process multiple files\n   for epub in *.epub; do\n       title=$(epub-utils \"$epub\" metadata --format kv | grep \"^title:\" | cut -d' ' -f2-)\n       echo \"$epub: $title\"\n   done\n\nUnderstanding EPUB Structure\n-----------------------------\n\nTable of Contents\n~~~~~~~~~~~~~~~~~\n\nView the navigation structure of your EPUB:\n\n.. code-block:: bash\n\n   # Show table of contents with highlighting (auto-detect format)\n   epub-utils book.epub toc\n\n   # Raw TOC for processing\n   epub-utils book.epub toc --format raw\n   \n   # TOC with pretty formatting\n   epub-utils book.epub toc --pretty-print\n\n**EPUB Version-Specific Access**:\n\nFor precise control over which navigation format to access:\n\n.. code-block:: bash\n\n   # Force NCX format (EPUB 2 navigation control file)\n   epub-utils book.epub toc --ncx\n\n   # Force Navigation Document (EPUB 3 navigation file)\n   epub-utils book.epub toc --nav\n\n**Use Cases**:\n\n- Use ``--ncx`` when you specifically need the EPUB 2 style navigation or want to access backward-compatible NCX in EPUB 3\n- Use ``--nav`` when you specifically need the EPUB 3 Navigation Document features\n- Use the default (no flags) for general TOC access that works with any EPUB version\n\nManifest Inspection\n~~~~~~~~~~~~~~~~~~~\n\nThe manifest lists all files contained in the EPUB:\n\n.. code-block:: bash\n\n   # View manifest with syntax highlighting\n   epub-utils book.epub manifest\n\n   # Raw manifest content\n   epub-utils book.epub manifest --format raw\n   \n   # Manifest with pretty formatting\n   epub-utils book.epub manifest --pretty-print\n\n**What you'll see**: Each item in the manifest includes:\n- ``id``: Unique identifier for the item\n- ``href``: File path within the EPUB\n- ``media-type``: MIME type of the file\n\nSpine Information\n~~~~~~~~~~~~~~~~~\n\nThe spine defines the reading order of the book:\n\n.. code-block:: bash\n\n   # View spine with highlighting\n   epub-utils book.epub spine\n\n   # Raw spine for processing\n   epub-utils book.epub spine --format raw\n\nContent Extraction\n------------------\n\nViewing Document Content\n~~~~~~~~~~~~~~~~~~~~~~~~\n\nExtract content from specific documents using their manifest ID:\n\n.. code-block:: bash\n\n   # Show content with syntax highlighting\n   epub-utils book.epub content chapter1\n\n   # Raw HTML/XHTML content\n   epub-utils book.epub content chapter1 --format raw\n\n   # Plain text (HTML tags stripped)\n   epub-utils book.epub content chapter1 --format plain\n\n**Finding Content IDs**: Use the manifest command to see available content IDs:\n\n.. code-block:: bash\n\n   # First, check the manifest for available IDs\n   epub-utils book.epub manifest\n\n   # Then extract specific content\n   epub-utils book.epub content intro --format plain\n\nFile Listing and Content Access\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nGet detailed information about all files in the EPUB, or access specific file content:\n\n.. code-block:: bash\n\n   # Formatted table of files\n   epub-utils book.epub files\n\n   # Raw file list\n   epub-utils book.epub files --format raw\n\n   # Display content of a specific file by path\n   epub-utils book.epub files OEBPS/chapter1.xhtml\n\n   # Access different file types\n   epub-utils book.epub files META-INF/container.xml\n   epub-utils book.epub files OEBPS/styles/main.css\n   epub-utils book.epub files OEBPS/images/cover.jpg\n\n   # Different output formats for XHTML content\n   epub-utils book.epub files OEBPS/chapter1.xhtml --format raw\n   epub-utils book.epub files OEBPS/chapter1.xhtml --format xml --pretty-print\n   epub-utils book.epub files OEBPS/chapter1.xhtml --format plain\n\n**Key advantages of the files command**:\n\n- Access any file in the EPUB archive by its path\n- No need to know manifest item IDs\n- Works with all file types (XHTML, CSS, XML, images, etc.)\n- Complements the ``content`` command which uses manifest IDs\n\nContent Analysis\n~~~~~~~~~~~~~~~~\n\nAnalyze EPUB content structure:\n\n.. code-block:: bash\n\n   #!/bin/bash\n   # analyze-content.sh - Analyze EPUB content structure\n\n   epub_file=\"$1\"\n\n   echo \"=== Content Analysis for $epub_file ===\"\n\n   # Get all content files from manifest\n   epub-utils \"$epub_file\" manifest --format raw | \\\n   grep 'media-type=\"application/xhtml+xml\"' | \\\n   sed 's/.*id=\"\\([^\"]*\\)\".*/\\1/' | \\\n   while read -r content_id; do\n       echo \"--- Content ID: $content_id ---\"\n       word_count=$(epub-utils \"$epub_file\" content \"$content_id\" --format plain | wc -w)\n       echo \"Word count: $word_count\"\n       echo \"\"\n   done\n\nOutput Format Options\n---------------------\n\nepub-utils supports multiple output formats for different use cases:\n\nXML Format (Default)\n~~~~~~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n   epub-utils book.epub metadata\n   # Produces syntax-highlighted, formatted XML\n\nRaw Format\n~~~~~~~~~~\n\n.. code-block:: bash\n\n   epub-utils book.epub metadata --format raw\n   # Produces unformatted XML, perfect for piping to other tools\n\nKey-Value Format\n~~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n   epub-utils book.epub metadata --format kv\n   # Produces key: value pairs, ideal for scripting\n\nPlain Text Format\n~~~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n   epub-utils book.epub content chapter1 --format plain\n   # Strips HTML tags, produces readable text\n\nPretty-Print Option\n~~~~~~~~~~~~~~~~~~~\n\nUse the ``--pretty-print`` (or ``-pp``) option to format XML output with proper indentation:\n\n.. code-block:: bash\n\n   # Default output (compact XML)\n   epub-utils book.epub metadata --format raw\n   \n   # Pretty-formatted output (with indentation)\n   epub-utils book.epub metadata --format raw --pretty-print\n   \n   # Works with syntax highlighting too\n   epub-utils book.epub package --pretty-print\n\nNext Steps\n----------\n\nNow that you're familiar with the CLI basics, you might want to:\n\n- Explore the :doc:`api-tutorial` for programmatic access\n- Check out more :doc:`examples` for real-world use cases\n- Learn about :doc:`epub-standards` for deeper understanding\n- Contribute to the project via :doc:`contributing`\n"
  },
  {
    "path": "docs/conf.py",
    "content": "# Configuration file for the Sphinx documentation builder.\n#\n# For the full list of built-in configuration values, see the documentation:\n# https://www.sphinx-doc.org/en/master/usage/configuration.html\n\n# -- Project information -----------------------------------------------------\n# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information\n\nproject = 'epub-utils'\ncopyright = '2025, Ernesto González'\nauthor = 'Ernesto González'\nrelease = '0.1.0a1'\n\n# -- General configuration ---------------------------------------------------\n# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration\n\nextensions = [\n\t'sphinx.ext.autodoc',\n\t'sphinx.ext.autosummary',\n\t'sphinx.ext.napoleon',\n\t'sphinx.ext.viewcode',\n\t'sphinx_copybutton',\n\t'sphinx_issues',\n]\n\ntemplates_path = ['_templates']\nexclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']\n\n# -- Napoleon settings -------------------------------------------------------\nnapoleon_google_docstring = True\nnapoleon_numpy_docstring = True\nnapoleon_include_init_with_doc = False\nnapoleon_include_private_with_doc = False\n\n# -- Autodoc settings --------------------------------------------------------\nautodoc_member_order = 'bysource'\nautodoc_default_flags = ['members']\nautosummary_generate = True\n\n# -- Intersphinx mapping -----------------------------------------------------\nintersphinx_mapping = {\n\t'python': ('https://docs.python.org/3', None),\n\t'lxml': ('https://lxml.de/', None),\n}\n\n\n# -- Options for HTML output -------------------------------------------------\n# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output\n\nhtml_theme = 'furo'\nhtml_static_path = ['_static']\n\n# Add source link in footer\nhtml_show_sourcelink = True\nhtml_copy_source = True\nhtml_show_sphinx = True\n\n# -- Linking Github issues --------------------------------------------------\n# https://github.com/sloria/sphinx-issues\n\nissues_github_path = 'ernestofgonzalez/epub-utils'\n"
  },
  {
    "path": "docs/contributing.rst",
    "content": "============\nContributing\n============\n\nWe welcome contributions to ``epub-utils``! This guide will help you get started with contributing to the project.\n\nGetting Started\n===============\n\nSetting Up Development Environment\n----------------------------------\n\n1. **Fork the Repository**\n\n   Fork the ``epub-utils`` repository on GitHub to your own account.\n\n2. **Clone Your Fork**\n\n   .. code-block:: bash\n\n       git clone https://github.com/yourusername/epub-utils.git\n       cd epub-utils\n\n3. **Set Up Development Environment**\n\n   .. code-block:: bash\n\n       # Create virtual environment\n       python -m venv dev-env\n       source dev-env/bin/activate  # On Windows: dev-env\\Scripts\\activate\n       \n       # Install in development mode\n       pip install -e \".[dev]\"\n       \n       # Or install dependencies manually\n       pip install -e .\n       pip install pytest black flake8 mypy sphinx\n\n\nProject Structure\n-----------------\n\n.. code-block:: text\n\n    epub-utils/\n    ├── src/\n    │   └── epub_utils/\n    │       ├── __init__.py\n    │       ├── cli.py              # Command-line interface\n    │       ├── document.py         # Main Document class\n    │       ├── extractors.py       # Content extraction logic\n    │       └── formatters.py       # Output formatting\n    ├── tests/\n    │   ├── __init__.py\n    │   ├── test_document.py\n    │   ├── test_cli.py\n    │   └── fixtures/               # Test EPUB files\n    ├── docs/\n    │   ├── conf.py\n    │   ├── index.rst\n    │   └── ...                     # Documentation files\n    ├── pyproject.toml\n    ├── README.md\n    └── CHANGELOG.md\n\nDevelopment Workflow\n====================\n\nBranch Strategy\n---------------\n\n- ``main`` branch: Stable, release-ready code\n- ``develop`` branch: Integration branch for features\n- Feature branches: ``feature/your-feature-name``\n- Bug fix branches: ``fix/issue-description``\n\nMaking Changes\n--------------\n\n1. **Create a Feature Branch**\n\n   .. code-block:: bash\n\n       git checkout -b feature/your-feature-name\n\n2. **Make Your Changes**\n\n   Follow the coding standards outlined below.\n\n3. **Write Tests**\n\n   All new features should include comprehensive tests.\n\n4. **Run Tests Locally**\n\n   .. code-block:: bash\n\n       # Run all tests\n       pytest\n       \n       # Run with coverage\n       pytest --cov=epub_utils\n       \n       # Run specific test file\n       pytest tests/test_document.py\n\n5. **Check Code Quality**\n\n   .. code-block:: bash\n\n       # Format code\n       black src/ tests/\n       \n       # Check linting\n       flake8 src/ tests/\n       \n       # Type checking\n       mypy src/\n\n6. **Update Documentation**\n\n   If your changes affect the API or add new features, update the documentation.\n\n7. **Commit Your Changes**\n\n   .. code-block:: bash\n\n       git add .\n       git commit -m \"Add: Brief description of your changes\"\n\n8. **Push and Create Pull Request**\n\n   .. code-block:: bash\n\n       git push origin feature/your-feature-name\n\n   Then create a pull request on GitHub.\n\nCoding Standards\n================\n\nPython Style Guide\n------------------\n\nWe follow PEP 8 with some modifications:\n\n- **Line length**: 88 characters (Black's default)\n- **String quotes**: Use double quotes for strings\n- **Import sorting**: Use isort or similar tool\n- **Docstrings**: Use Google-style docstrings\n\nCode Formatting\n---------------\n\nWe use **Black** for code formatting:\n\n.. code-block:: bash\n\n    # Format all Python files\n    black src/ tests/\n    \n    # Check formatting without making changes\n    black --check src/ tests/\n\nExample of properly formatted code:\n\n.. code-block:: python\n\n    def extract_metadata(epub_path: str, format_type: str = \"dict\") -> dict:\n        \"\"\"Extract metadata from an EPUB file.\n        \n        Args:\n            epub_path: Path to the EPUB file.\n            format_type: Output format ('dict', 'xml', 'json').\n            \n        Returns:\n            Dictionary containing extracted metadata.\n            \n        Raises:\n            FileNotFoundError: If the EPUB file doesn't exist.\n            ValueError: If format_type is not supported.\n        \"\"\"\n        if not os.path.exists(epub_path):\n            raise FileNotFoundError(f\"EPUB file not found: {epub_path}\")\n        \n        if format_type not in [\"dict\", \"xml\", \"json\"]:\n            raise ValueError(f\"Unsupported format: {format_type}\")\n        \n        # Implementation here...\n        return {}\n\nLinting\n-------\n\nWe use **ruff** for linting:\n\n.. code-block:: bash\n\n    # Check for linting errors\n    make lint\n\nType Hints\n----------\n\nUse type hints for all function signatures:\n\n.. code-block:: python\n\n    from typing import List, Dict, Optional, Union\n    from pathlib import Path\n\n    def process_files(\n        file_paths: List[Union[str, Path]], \n        output_format: str = \"table\"\n    ) -> Optional[Dict[str, any]]:\n        \"\"\"Process multiple EPUB files.\"\"\"\n        pass\n\nDocumentation Standards\n=======================\n\nDocstring Format\n----------------\n\nUse Google-style docstrings:\n\n.. code-block:: python\n\n    def complex_function(param1: str, param2: int, param3: bool = False) -> dict:\n        \"\"\"Brief description of the function.\n        \n        Longer description if needed. Explain the purpose, behavior,\n        and any important details about the function.\n        \n        Args:\n            param1: Description of the first parameter.\n            param2: Description of the second parameter.\n            param3: Description of optional parameter. Defaults to False.\n            \n        Returns:\n            Description of return value and its structure.\n            \n        Raises:\n            ValueError: When param2 is negative.\n            FileNotFoundError: When the specified file doesn't exist.\n            \n        Example:\n            Basic usage example:\n            \n            >>> result = complex_function(\"test\", 42)\n            >>> print(result[\"status\"])\n            \"success\"\n        \"\"\"\n        pass\n\nAPI Documentation\n-----------------\n\nWhen adding new classes or functions to the public API:\n\n1. **Add to __init__.py** exports if appropriate\n2. **Update API reference** documentation\n3. **Include usage examples** in docstrings\n4. **Add to tutorials** if it's a major feature\n\nRST Documentation\n-----------------\n\nWhen writing RST documentation:\n\n.. code-block:: rst\n\n    Section Title\n    =============\n    \n    Subsection\n    ----------\n    \n    Code examples:\n    \n    .. code-block:: python\n    \n        # Python code here\n        import epub_utils\n        \n    Shell commands:\n    \n    .. code-block:: bash\n    \n        epub-utils info book.epub\n\nTesting Guidelines\n==================\n\nTest Structure\n--------------\n\n- **Unit tests**: Test individual functions and methods\n- **Integration tests**: Test component interactions\n- **End-to-end tests**: Test complete workflows\n- **Performance tests**: Test with large files (optional)\n\nWriting Tests\n-------------\n\nUse pytest for all tests:\n\n.. code-block:: python\n\n    import pytest\n    from epub_utils import Document\n    from pathlib import Path\n\n\n    def test_document_with_invalid_file():\n        \"\"\"Test error handling with invalid file.\"\"\"\n        with pytest.raises(FileNotFoundError):\n            Document(\"nonexistent.epub\")\n            \n\n    @pytest.mark.parametrize(\"format_type\", [\"dict\", \"xml\", \"json\"])\n    def test_metadata_formats(doc_path, format_type):\n        \"\"\"Test different metadata formats.\"\"\"\n        doc = Document(str(doc_path)\n        metadata = doc.get_metadata(format_type=format_type)\n        assert metadata is not None\n\nTest Fixtures\n-------------\n\nCreate test EPUB files in ``tests/fixtures/``:\n\n.. code-block:: python\n\n    # tests/conftest.py\n    import pytest\n    from pathlib import Path\n\n\n    @pytest.fixture\n    def sample_epub():\n        \"\"\"Provide path to sample EPUB for testing.\"\"\"\n        return Path(__file__).parent / \"fixtures\" / \"sample.epub\"\n\n\n    @pytest.fixture\n    def invalid_epub():\n        \"\"\"Provide path to invalid EPUB for error testing.\"\"\"\n        return Path(__file__).parent / \"fixtures\" / \"invalid.epub\"\n\nRunning Tests\n-------------\n\n.. code-block:: bash\n\n    # Run all tests\n    make test\n    \n    # Run specific test file\n    pytest tests/test_document.py\n\nTypes of Contributions\n======================\n\nBug Reports\n-----------\n\nWhen reporting bugs:\n\n1. Check existing issues first\n2. Use the issue template if available\n3. Provide minimal reproduction case\n4. Include system information\n\n.. code-block:: text\n\n    **Bug Description**\n    Clear description of the bug.\n    \n    **Steps to Reproduce**\n    1. Step one\n    2. Step two\n    3. Step three\n    \n    **Expected Behavior**\n    What should happen.\n    \n    **Actual Behavior**\n    What actually happens.\n    \n    **Environment**\n    - epub-utils version: \n    - Python version:\n    - Operating system:\n    \n    **Sample File**\n    Attach or link to EPUB file if relevant.\n\nFeature Requests\n----------------\n\nFor new features:\n\n1. Describe the use case clearly\n2. Explain why it's valuable to users\n3. Suggest implementation approach if you have ideas\n4. Consider backward compatibility\n\nDocumentation Improvements\n--------------------------\n\nDocumentation contributions are highly valued:\n\n- Fix typos and grammar errors\n- Improve clarity of explanations\n- Add more examples to existing docs\n- Create new tutorials for common use cases\n- Update outdated information\n\nCode Contributions\n------------------\n\nAreas where contributions are welcome:\n\n1. Performance improvements\n2. New output formats\n3. Additional EPUB validation\n4. Better error handling\n5. CLI usability enhancements\n6. Support for EPUB 3 features\n\nRelease Process\n===============\n\nVersioning\n----------\n\nWe follow `Semantic Versioning <https://semver.org/>`_:\n\n- MAJOR: Incompatible API changes\n- MINOR: New functionality (backward compatible)\n- PATCH: Bug fixes (backward compatible)\n\nVersion format: ``MAJOR.MINOR.PATCH`` (e.g., ``1.2.3``)\n\nDevelopment versions may include additional identifiers:\n- ``1.2.3-dev`` (development)\n- ``1.2.3rc1`` (release candidate)"
  },
  {
    "path": "docs/epub-standards.rst",
    "content": "==============\nEPUB Standards\n==============\n\nUnderstanding EPUB Specifications\n=================================\n\nEPUB (Electronic Publication) is an open standard for digital books and publications. \nThis guide covers the EPUB specifications and how epub-utils ensures compliance.\n\nEPUB 3.3 Specification\n======================\n\nCurrent Standard\n----------------\n\nEPUB 3.3 is the current specification, published by the W3C. It defines:\n\n- **Package Document**: Contains metadata, manifest, and spine\n- **Container Format**: ZIP-based archive structure\n- **Content Documents**: XHTML5, SVG, and other media types\n- **Navigation Document**: Replaces NCX for table of contents\n\nKey Components\n--------------\n\nContainer Structure\n~~~~~~~~~~~~~~~~~~~\n\n.. code-block:: text\n\n    book.epub\n    ├── META-INF/\n    │   ├── container.xml          # Points to package document\n    │   └── signatures.xml         # Digital signatures (optional)\n    ├── OEBPS/                     # Content folder (common name)\n    │   ├── package.opf            # Package document\n    │   ├── nav.xhtml              # Navigation document\n    │   ├── content/               # Text content\n    │   ├── images/                # Images\n    │   ├── styles/                # CSS files\n    │   └── fonts/                 # Font files (optional)\n    └── mimetype                   # Must be first file, uncompressed\n\nPackage Document (OPF)\n~~~~~~~~~~~~~~~~~~~~~~\n\nThe package document defines three main sections:\n\n**Metadata Section**:\n\n.. code-block:: xml\n\n    <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n        <dc:title>Book Title</dc:title>\n        <dc:creator>Author Name</dc:creator>\n        <dc:identifier id=\"bookid\">urn:uuid:12345</dc:identifier>\n        <dc:language>en</dc:language>\n        <meta property=\"dcterms:modified\">2024-01-01T00:00:00Z</meta>\n    </metadata>\n\n**Manifest Section**:\n\n.. code-block:: xml\n\n    <manifest>\n        <item id=\"nav\" href=\"nav.xhtml\" media-type=\"application/xhtml+xml\" \n              properties=\"nav\"/>\n        <item id=\"chapter1\" href=\"content/chapter1.xhtml\" \n              media-type=\"application/xhtml+xml\"/>\n        <item id=\"cover-image\" href=\"images/cover.jpg\" \n              media-type=\"image/jpeg\" properties=\"cover-image\"/>\n    </manifest>\n\n**Spine Section**:\n\n.. code-block:: xml\n\n    <spine>\n        <itemref idref=\"chapter1\"/>\n        <itemref idref=\"chapter2\"/>\n    </spine>\n\nNavigation Document\n~~~~~~~~~~~~~~~~~~~\n\nEPUB 3 uses XHTML navigation documents instead of NCX:\n\n.. code-block:: html\n\n    <!DOCTYPE html>\n    <html xmlns=\"http://www.w3.org/1999/xhtml\" \n          xmlns:epub=\"http://www.idpf.org/2007/ops\">\n    <head>\n        <title>Navigation</title>\n    </head>\n    <body>\n        <nav epub:type=\"toc\">\n            <h1>Table of Contents</h1>\n            <ol>\n                <li><a href=\"content/chapter1.xhtml\">Chapter 1</a></li>\n                <li><a href=\"content/chapter2.xhtml\">Chapter 2</a></li>\n            </ol>\n        </nav>\n    </body>\n    </html>\n\nEPUB Compliance with epub-utils\n===============================\n\nValidation Capabilities\n-----------------------\n\nepub-utils helps ensure EPUB compliance by:\n\n1. **Structure Validation**: Checks container format\n2. **Metadata Validation**: Verifies required elements\n3. **Manifest Validation**: Ensures all files are declared\n4. **Spine Validation**: Checks reading order\n5. **Content Validation**: Basic XHTML structure checks\n\nChecking Compliance\n-------------------\n\nUse epub-utils to validate EPUB structure:\n\n.. code-block:: bash\n\n    # Check basic structure\n    epub-utils info book.epub\n\n    # Detailed manifest information\n    epub-utils manifest book.epub --format table\n\n    # Extract and examine package document\n    epub-utils extract book.epub --output-dir temp/\n    cat temp/OEBPS/package.opf\n\nPython API for Validation\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. code-block:: python\n\n    from epub_utils import Document\n\n    def validate_epub_structure(epub_path):\n        \"\"\"Validate basic EPUB structure.\"\"\"\n        try:\n            doc = Document(epub_path)\n            \n            # Check required components\n            checks = {\n                'has_container': hasattr(doc, 'container'),\n                'has_package': hasattr(doc, 'package'),\n                'has_metadata': len(doc.metadata) > 0,\n                'has_manifest': len(doc.manifest) > 0,\n                'has_spine': len(doc.spine) > 0,\n            }\n            \n            # Check required metadata\n            required_metadata = ['title', 'language', 'identifier']\n            metadata_present = {}\n            \n            for item in doc.metadata:\n                for req in required_metadata:\n                    if req in item.get('name', '').lower():\n                        metadata_present[req] = True\n            \n            print(\"Structure Validation:\")\n            for check, passed in checks.items():\n                status = \"✓\" if passed else \"✗\"\n                print(f\"  {status} {check}\")\n            \n            print(\"\\nRequired Metadata:\")\n            for req in required_metadata:\n                status = \"✓\" if metadata_present.get(req) else \"✗\"\n                print(f\"  {status} {req}\")\n                \n            return all(checks.values()) and len(metadata_present) >= 2\n            \n        except Exception as e:\n            print(f\"Validation failed: {e}\")\n            return False\n\nCommon Compliance Issues\n========================\n\nMissing Required Elements\n-------------------------\n\n**Problem**: EPUB missing required metadata\n\n.. code-block:: bash\n\n    # Check metadata completeness\n    epub-utils metadata book.epub --format table\n\n**Solution**: Ensure these elements are present:\n\n- ``dc:title``\n- ``dc:language`` \n- ``dc:identifier`` (with unique ID)\n- ``meta property=\"dcterms:modified\"`` (EPUB 3)\n\nInvalid File References\n-----------------------\n\n**Problem**: Manifest references files that don't exist\n\n.. code-block:: python\n\n    def check_file_references(epub_path):\n        \"\"\"Check if all manifest files exist in the archive.\"\"\"\n        doc = Document(epub_path)\n        \n        missing_files = []\n        for item in doc.manifest:\n            file_path = item.get('href')\n            if file_path:\n                # Check if file exists in the EPUB\n                try:\n                    # This would need zip file checking\n                    pass  \n                except:\n                    missing_files.append(file_path)\n        \n        if missing_files:\n            print(\"Missing files referenced in manifest:\")\n            for file in missing_files:\n                print(f\"  - {file}\")\n\nIncorrect MIME Types\n--------------------\n\n**Problem**: Wrong media-type attributes in manifest\n\nCommon correct MIME types:\n\n- XHTML: ``application/xhtml+xml``\n- CSS: ``text/css``\n- JPEG: ``image/jpeg``\n- PNG: ``image/png``\n- NCX: ``application/x-dtbncx+xml``\n\nEPUB 2 vs EPUB 3 Differences\n============================\n\nFormat Evolution\n-----------------\n\n+------------------+-------------------------+-------------------------+\n| Feature          | EPUB 2                  | EPUB 3                  |\n+==================+=========================+=========================+\n| Navigation       | NCX file required       | XHTML nav document      |\n+------------------+-------------------------+-------------------------+\n| Content Types    | XHTML 1.1, limited      | XHTML5, SVG, MathML     |\n+------------------+-------------------------+-------------------------+\n| Metadata         | Dublin Core only        | Enhanced metadata       |\n+------------------+-------------------------+-------------------------+\n| Accessibility    | Limited                 | Rich accessibility      |\n+------------------+-------------------------+-------------------------+\n| Scripting        | Not allowed             | Limited JavaScript      |\n+------------------+-------------------------+-------------------------+\n\nMigration Considerations\n------------------------\n\nWhen working with older EPUB 2 files:\n\n.. code-block:: python\n\n    def detect_epub_version(epub_path):\n        \"\"\"Detect EPUB version from package document.\"\"\"\n        doc = Document(epub_path)\n        \n        # Check package document for version attribute\n        # This is a simplified example\n        for item in doc.manifest:\n            if 'nav' in item.get('properties', ''):\n                return \"EPUB 3\"\n        \n        # Check for NCX file (EPUB 2 indicator)\n        for item in doc.manifest:\n            if item.get('media-type') == 'application/x-dtbncx+xml':\n                return \"EPUB 2\"\n        \n        return \"Unknown\"\n\nBest Practices for Compliance\n=============================\n\nMetadata Best Practices\n-----------------------\n\n1. **Always include required elements**:\n\n   .. code-block:: xml\n\n       <dc:title>Complete Book Title</dc:title>\n       <dc:creator>Author Full Name</dc:creator>\n       <dc:identifier id=\"bookid\">urn:uuid:unique-identifier</dc:identifier>\n       <dc:language>en-US</dc:language>\n\n2. **Use proper Dublin Core refinements**:\n\n   .. code-block:: xml\n\n       <dc:creator id=\"author\">Jane Doe</dc:creator>\n       <meta refines=\"#author\" property=\"role\" scheme=\"marc:relators\">aut</meta>\n\n3. **Include modification date for EPUB 3**:\n\n   .. code-block:: xml\n\n       <meta property=\"dcterms:modified\">2024-05-25T10:30:00Z</meta>\n\nFile Organization\n-----------------\n\n1. **Use consistent folder structure**\n2. **Declare all files in manifest**\n3. **Use proper MIME types**\n4. **Include fallbacks for specialized content**\n\nContent Guidelines\n------------------\n\n1. **Valid XHTML**: Ensure all content files are well-formed\n2. **Proper encoding**: Use UTF-8 encoding\n3. **Relative links**: Use relative paths for internal references\n4. **Alt text**: Include alt attributes for images\n\nTesting and Validation Tools\n============================\n\nExternal Validators\n-------------------\n\n- **EPUBCheck**: Official EPUB validator\n- **Ace by DAISY**: Accessibility checker\n- **pagina EPUB-Checker**: Online validator\n\nIntegration with epub-utils\n---------------------------\n\n.. code-block:: bash\n\n    # Basic structure check\n    epub-utils info book.epub\n\n    # Export for external validation\n    epub-utils extract book.epub --output-dir validation/\n    # Run EPUBCheck on extracted content\n\n    # Check specific components\n    epub-utils manifest book.epub --format xml > manifest.xml\n    epub-utils metadata book.epub --format xml > metadata.xml\n\nFuture Standards\n================\n\nEPUB 3.3 and Beyond\n-------------------\n\nCurrent developments in EPUB standards:\n\n- **Enhanced accessibility features**\n- **Better multimedia support**\n- **Improved metadata vocabularies**\n- **Web standards alignment**\n\nStaying Current\n---------------\n\n- Monitor W3C EPUB Working Group\n- Test with latest validators\n- Follow accessibility guidelines (WCAG)\n- Use semantic markup\n\nResources\n=========\n\nOfficial Specifications\n-----------------------\n\n- `EPUB 3.3 Specification <https://www.w3.org/TR/epub-33/>`_\n- `EPUB Accessibility 1.1 <https://www.w3.org/TR/epub-a11y-11/>`_\n- `EPUB Open Container Format 3.0.1 <https://www.w3.org/TR/epub-ocf-301/>`_\n\nValidation Tools\n----------------\n\n- `EPUBCheck <https://github.com/w3c/epubcheck>`_\n- `Ace Accessibility Checker <https://github.com/daisy/ace>`_\n- `EPUB Validator <https://validator.idpf.org/>`_\n\nDeveloper Resources\n-------------------\n\n- `EPUB 3 Best Practices <https://www.w3.org/TR/epub-bp/>`_\n- `IDPF EPUB Resources <http://idpf.org/epub/31/spec/>`_\n- `Accessibility Guidelines <https://www.w3.org/WAI/WCAG21/quickref/>`_\n"
  },
  {
    "path": "docs/examples.rst",
    "content": "Examples and Use Cases\n======================\n\nThis page showcases real-world examples of using epub-utils for various tasks. Each example \nincludes both CLI and Python API approaches where applicable.\n\nDigital Library Management\n--------------------------\n\nCataloging Your EPUB Collection\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n**Scenario**: You have a large collection of EPUB files and want to create a comprehensive catalog.\n\n**CLI Approach**:\n\n.. code-block:: bash\n\n   #!/bin/bash\n   # catalog-epubs.sh - Create a catalog of all EPUB files\n\n   echo \"Creating EPUB catalog...\"\n   echo \"File,Title,Author,Publisher,Language,Year,Files,Size\" > epub_catalog.csv\n\n   find . -name \"*.epub\" -type f | while read -r epub; do\n       echo \"Processing: $epub\"\n       \n       # Extract metadata using epub-utils\n       metadata=$(epub-utils \"$epub\" metadata --format kv 2>/dev/null)\n       \n       if [ $? -eq 0 ]; then\n           title=$(echo \"$metadata\" | grep \"^title:\" | cut -d' ' -f2- | sed 's/,/;/g')\n           author=$(echo \"$metadata\" | grep \"^creator:\" | cut -d' ' -f2- | sed 's/,/;/g')\n           publisher=$(echo \"$metadata\" | grep \"^publisher:\" | cut -d' ' -f2- | sed 's/,/;/g')\n           language=$(echo \"$metadata\" | grep \"^language:\" | cut -d' ' -f2-)\n           year=$(echo \"$metadata\" | grep \"^date:\" | cut -d' ' -f2- | cut -d'-' -f1)\n           \n           # Count files and get size\n           file_count=$(epub-utils \"$epub\" files --format raw 2>/dev/null | wc -l)\n           size=$(stat -f%z \"$epub\" 2>/dev/null || stat -c%s \"$epub\" 2>/dev/null)\n           \n           echo \"$epub,$title,$author,$publisher,$language,$year,$file_count,$size\" >> epub_catalog.csv\n       else\n           echo \"$epub,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR\" >> epub_catalog.csv\n       fi\n   done\n\n   echo \"Catalog complete! See epub_catalog.csv\"\n\n**Python Approach**:\n\n.. code-block:: python\n\n   import csv\n   import os\n   from pathlib import Path\n   from epub_utils import Document\n\n   def create_epub_catalog(directory, output_file=\"epub_catalog.csv\"):\n       \"\"\"Create a comprehensive catalog of EPUB files.\"\"\"\n       \n       fieldnames = [\n           'filepath', 'filename', 'title', 'author', 'publisher', \n           'language', 'year', 'isbn', 'file_count', 'size_bytes', 'size_mb'\n       ]\n       \n       epub_files = list(Path(directory).rglob(\"*.epub\"))\n       print(f\"Found {len(epub_files)} EPUB files\")\n       \n       with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:\n           writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n           writer.writeheader()\n           \n           for i, epub_path in enumerate(epub_files, 1):\n               print(f\"Processing {i}/{len(epub_files)}: {epub_path.name}\")\n               \n               try:\n                   doc = Document(str(epub_path))\n                   metadata = doc.package.metadata\n                   \n                   # Extract date year\n                   date_str = getattr(metadata, 'date', '')\n                   year = date_str.split('-')[0] if date_str else ''\n                   \n                   # Get file size\n                   size_bytes = epub_path.stat().st_size\n                   size_mb = round(size_bytes / (1024 * 1024), 2)\n                   \n                   row = {\n                       'filepath': str(epub_path),\n                       'filename': epub_path.name,\n                       'title': getattr(metadata, 'title', ''),\n                       'author': getattr(metadata, 'creator', ''),\n                       'publisher': getattr(metadata, 'publisher', ''),\n                       'language': getattr(metadata, 'language', ''),\n                       'year': year,\n                       'isbn': getattr(metadata, 'identifier', ''),\n                       'file_count': len(doc.get_files_info()),\n                       'size_bytes': size_bytes,\n                       'size_mb': size_mb\n                   }\n                   \n                   writer.writerow(row)\n                   \n               except Exception as e:\n                   print(f\"  Error: {e}\")\n                   # Write error row\n                   writer.writerow({\n                       'filepath': str(epub_path),\n                       'filename': epub_path.name,\n                       'title': f'ERROR: {str(e)}',\n                       'author': '',\n                       'publisher': '',\n                       'language': '',\n                       'year': '',\n                       'isbn': '',\n                       'file_count': 0,\n                       'size_bytes': epub_path.stat().st_size,\n                       'size_mb': 0\n                   })\n\n   # Usage\n   create_epub_catalog(\"/path/to/your/epub/collection\")\n\nQuality Assurance and Validation\n---------------------------------\n\nEPUB Health Check\n~~~~~~~~~~~~~~~~~\n\n**Scenario**: Validate EPUB files and identify potential issues.\n\n.. code-block:: python\n\n   from epub_utils import Document, ParseError\n   import zipfile\n   from pathlib import Path\n\n   class EPUBHealthChecker:\n       def __init__(self):\n           self.issues = []\n           \n       def check_epub(self, epub_path):\n           \"\"\"Comprehensive EPUB health check.\"\"\"\n           self.issues = []\n           epub_path = Path(epub_path)\n           \n           print(f\"Checking EPUB: {epub_path.name}\")\n           \n           # Basic file checks\n           if not epub_path.exists():\n               self.issues.append(\"File does not exist\")\n               return self.get_report()\n           \n           if epub_path.stat().st_size == 0:\n               self.issues.append(\"File is empty\")\n               return self.get_report()\n           \n           # ZIP integrity check\n           try:\n               with zipfile.ZipFile(epub_path, 'r') as zf:\n                   corrupt_files = zf.testzip()\n                   if corrupt_files:\n                       self.issues.append(f\"Corrupt ZIP file: {corrupt_files}\")\n           except zipfile.BadZipFile:\n               self.issues.append(\"Invalid ZIP file\")\n               return self.get_report()\n           \n           # EPUB structure checks\n           try:\n               doc = Document(str(epub_path))\n               self._check_container(doc)\n               self._check_package(doc)\n               self._check_metadata(doc)\n               self._check_manifest(doc)\n               self._check_files(doc)\n               \n           except ParseError as e:\n               self.issues.append(f\"Parse error: {e}\")\n           except Exception as e:\n               self.issues.append(f\"Unexpected error: {e}\")\n           \n           return self.get_report()\n       \n       def _check_container(self, doc):\n           \"\"\"Check container structure.\"\"\"\n           try:\n               container = doc.container\n               if not container.rootfile_path:\n                   self.issues.append(\"No rootfile specified in container\")\n           except Exception as e:\n               self.issues.append(f\"Container error: {e}\")\n       \n       def _check_package(self, doc):\n           \"\"\"Check package/OPF file.\"\"\"\n           try:\n               package = doc.package\n               if not hasattr(package, 'metadata'):\n                   self.issues.append(\"Package missing metadata\")\n               if not hasattr(package, 'manifest'):\n                   self.issues.append(\"Package missing manifest\")\n               if not hasattr(package, 'spine'):\n                   self.issues.append(\"Package missing spine\")\n           except Exception as e:\n               self.issues.append(f\"Package error: {e}\")\n       \n       def _check_metadata(self, doc):\n           \"\"\"Check metadata quality.\"\"\"\n           try:\n               metadata = doc.package.metadata\n               \n               # Check required fields\n               if not getattr(metadata, 'title', '').strip():\n                   self.issues.append(\"Missing or empty title\")\n               if not getattr(metadata, 'language', '').strip():\n                   self.issues.append(\"Missing or empty language\")\n               if not getattr(metadata, 'identifier', '').strip():\n                   self.issues.append(\"Missing or empty identifier\")\n                   \n           except Exception as e:\n               self.issues.append(f\"Metadata error: {e}\")\n       \n       def _check_manifest(self, doc):\n           \"\"\"Check manifest integrity.\"\"\"\n           try:\n               manifest = doc.package.manifest\n               if not manifest.items:\n                   self.issues.append(\"Empty manifest\")\n               \n               # Check for common content types\n               has_html = any(\n                   item.get('media-type') == 'application/xhtml+xml'\n                   for item in manifest.items.values()\n               )\n               if not has_html:\n                   self.issues.append(\"No XHTML content files found\")\n                   \n           except Exception as e:\n               self.issues.append(f\"Manifest error: {e}\")\n       \n       def _check_files(self, doc):\n           \"\"\"Check file structure.\"\"\"\n           try:\n               files_info = doc.get_files_info()\n               if len(files_info) < 3:  # At least container, package, and one content file\n                   self.issues.append(\"Very few files in EPUB (possibly incomplete)\")\n               \n               # Check for suspiciously large files\n               for file_info in files_info:\n                   if file_info['size'] > 10 * 1024 * 1024:  # 10MB\n                       self.issues.append(f\"Large file found: {file_info['path']} ({file_info['size']} bytes)\")\n                       \n           except Exception as e:\n               self.issues.append(f\"File check error: {e}\")\n       \n       def get_report(self):\n           \"\"\"Generate health check report.\"\"\"\n           if not self.issues:\n               return {\"status\": \"healthy\", \"issues\": []}\n           else:\n               return {\"status\": \"issues_found\", \"issues\": self.issues}\n\n   # Usage\n   checker = EPUBHealthChecker()\n   report = checker.check_epub(\"book.epub\")\n\n   if report[\"status\"] == \"healthy\":\n       print(\"✓ EPUB is healthy!\")\n   else:\n       print(\"⚠ Issues found:\")\n       for issue in report[\"issues\"]:\n           print(f\"  - {issue}\")\n\nMetadata Management\n-------------------\n\nStandardizing Metadata\n~~~~~~~~~~~~~~~~~~~~~~\n\n**Scenario**: Clean and standardize metadata across your EPUB collection.\n\n.. code-block:: python\n\n   import re\n   from epub_utils import Document\n\n   class MetadataStandardizer:\n       def __init__(self):\n           self.language_codes = {\n               'english': 'en',\n               'spanish': 'es', \n               'french': 'fr',\n               'german': 'de',\n               'italian': 'it'\n               # Add more as needed\n           }\n       \n       def analyze_metadata(self, epub_path):\n           \"\"\"Analyze and suggest metadata improvements.\"\"\"\n           doc = Document(epub_path)\n           metadata = doc.package.metadata\n           suggestions = []\n           \n           # Check title\n           title = getattr(metadata, 'title', '')\n           if not title:\n               suggestions.append(\"Missing title\")\n           elif len(title) > 200:\n               suggestions.append(\"Title is very long (>200 chars)\")\n           elif title.isupper():\n               suggestions.append(\"Title is all uppercase - consider title case\")\n           \n           # Check author\n           creator = getattr(metadata, 'creator', '')\n           if not creator:\n               suggestions.append(\"Missing author/creator\")\n           elif ',' not in creator and len(creator.split()) > 2:\n               suggestions.append(\"Author name might need reformatting (Last, First)\")\n           \n           # Check language\n           language = getattr(metadata, 'language', '')\n           if not language:\n               suggestions.append(\"Missing language code\")\n           elif len(language) > 3:\n               # Might be full language name instead of code\n               lang_lower = language.lower()\n               if lang_lower in self.language_codes:\n                   suggestions.append(f\"Use language code '{self.language_codes[lang_lower]}' instead of '{language}'\")\n           \n           # Check identifier\n           identifier = getattr(metadata, 'identifier', '')\n           if not identifier:\n               suggestions.append(\"Missing identifier\")\n           elif not self._is_valid_identifier(identifier):\n               suggestions.append(\"Identifier format might be invalid\")\n           \n           # Check date format\n           date = getattr(metadata, 'date', '')\n           if date and not re.match(r'\\d{4}(-\\d{2}-\\d{2})?', date):\n               suggestions.append(\"Date should be in YYYY or YYYY-MM-DD format\")\n           \n           return {\n               'file': epub_path,\n               'current_metadata': {\n                   'title': title,\n                   'creator': creator,\n                   'language': language,\n                   'identifier': identifier,\n                   'date': date\n               },\n               'suggestions': suggestions\n           }\n       \n       def _is_valid_identifier(self, identifier):\n           \"\"\"Check if identifier looks valid.\"\"\"\n           # Check for ISBN, DOI, UUID patterns\n           patterns = [\n               r'urn:isbn:\\d{10,13}',  # ISBN URN\n               r'isbn:\\d{10,13}',      # Simple ISBN\n               r'urn:uuid:[a-f0-9-]{36}',  # UUID URN\n               r'doi:10\\.\\d+/.+',      # DOI\n               r'urn:doi:10\\.\\d+/.+'   # DOI URN\n           ]\n           \n           return any(re.match(pattern, identifier, re.I) for pattern in patterns)\n\n   # Usage\n   standardizer = MetadataStandardizer()\n   analysis = standardizer.analyze_metadata(\"book.epub\")\n\n   print(f\"Analyzing: {analysis['file']}\")\n   if analysis['suggestions']:\n       print(\"Suggestions for improvement:\")\n       for suggestion in analysis['suggestions']:\n           print(f\"  - {suggestion}\")\n   else:\n       print(\"Metadata looks good!\")\n\nContent Analysis and Statistics\n-------------------------------\n\nReading Level Analysis\n~~~~~~~~~~~~~~~~~~~~~~\n\n**Scenario**: Analyze EPUB content to determine reading complexity.\n\n.. code-block:: python\n\n   import re\n   import math\n   from epub_utils import Document\n\n   class ReadingLevelAnalyzer:\n       def analyze_epub(self, epub_path):\n           \"\"\"Analyze reading level of an EPUB.\"\"\"\n           doc = Document(epub_path)\n           \n           # Get all text content\n           all_text = self._extract_all_text(doc)\n           \n           if not all_text.strip():\n               return {\"error\": \"No readable text found\"}\n           \n           # Calculate statistics\n           stats = self._calculate_text_stats(all_text)\n           \n           # Calculate reading level scores\n           flesch_score = self._flesch_reading_ease(stats)\n           flesch_grade = self._flesch_kincaid_grade(stats)\n           \n           return {\n               'title': getattr(doc.package.metadata, 'title', 'Unknown'),\n               'word_count': stats['words'],\n               'sentence_count': stats['sentences'],\n               'syllable_count': stats['syllables'],\n               'avg_words_per_sentence': round(stats['words'] / stats['sentences'], 2),\n               'avg_syllables_per_word': round(stats['syllables'] / stats['words'], 2),\n               'flesch_reading_ease': round(flesch_score, 2),\n               'flesch_kincaid_grade': round(flesch_grade, 2),\n               'reading_level': self._interpret_flesch_score(flesch_score)\n           }\n       \n       def _extract_all_text(self, doc):\n           \"\"\"Extract all readable text from EPUB.\"\"\"\n           # This is a simplified version - real implementation would\n           # need to parse XHTML content files\n           try:\n               manifest = doc.package.manifest\n               # In a real implementation, you'd extract and parse each content file\n               # For now, return placeholder\n               return \"Sample text for analysis. This would contain the actual book content.\"\n           except Exception:\n               return \"\"\n       \n       def _calculate_text_stats(self, text):\n           \"\"\"Calculate basic text statistics.\"\"\"\n           # Clean text\n           text = re.sub(r'[^\\w\\s\\.\\!\\?]', '', text)\n           \n           # Count words\n           words = len(text.split())\n           \n           # Count sentences\n           sentences = len(re.findall(r'[.!?]+', text))\n           if sentences == 0:\n               sentences = 1  # Avoid division by zero\n           \n           # Count syllables (simplified)\n           syllables = self._count_syllables(text)\n           \n           return {\n               'words': words,\n               'sentences': sentences,\n               'syllables': syllables\n           }\n       \n       def _count_syllables(self, text):\n           \"\"\"Simplified syllable counting.\"\"\"\n           words = text.lower().split()\n           syllable_count = 0\n           \n           for word in words:\n               word = re.sub(r'[^a-z]', '', word)\n               if word:\n                   # Simple syllable counting heuristic\n                   vowels = 'aeiouy'\n                   syllables = sum(1 for i, char in enumerate(word) \n                                 if char in vowels and (i == 0 or word[i-1] not in vowels))\n                   if word.endswith('e') and syllables > 1:\n                       syllables -= 1\n                   syllable_count += max(1, syllables)\n           \n           return syllable_count\n       \n       def _flesch_reading_ease(self, stats):\n           \"\"\"Calculate Flesch Reading Ease score.\"\"\"\n           return (206.835 - \n                   (1.015 * (stats['words'] / stats['sentences'])) - \n                   (84.6 * (stats['syllables'] / stats['words'])))\n       \n       def _flesch_kincaid_grade(self, stats):\n           \"\"\"Calculate Flesch-Kincaid Grade Level.\"\"\"\n           return ((0.39 * (stats['words'] / stats['sentences'])) + \n                   (11.8 * (stats['syllables'] / stats['words'])) - 15.59)\n       \n       def _interpret_flesch_score(self, score):\n           \"\"\"Interpret Flesch Reading Ease score.\"\"\"\n           if score >= 90:\n               return \"Very Easy (5th grade)\"\n           elif score >= 80:\n               return \"Easy (6th grade)\"\n           elif score >= 70:\n               return \"Fairly Easy (7th grade)\"\n           elif score >= 60:\n               return \"Standard (8th-9th grade)\"\n           elif score >= 50:\n               return \"Fairly Difficult (10th-12th grade)\"\n           elif score >= 30:\n               return \"Difficult (College level)\"\n           else:\n               return \"Very Difficult (Graduate level)\"\n\n   # Usage\n   analyzer = ReadingLevelAnalyzer()\n   analysis = analyzer.analyze_epub(\"book.epub\")\n\n   print(f\"Reading Level Analysis for: {analysis['title']}\")\n   print(f\"Word Count: {analysis['word_count']:,}\")\n   print(f\"Reading Level: {analysis['reading_level']}\")\n   print(f\"Flesch-Kincaid Grade: {analysis['flesch_kincaid_grade']}\")\n\nDirect File Access and Extraction\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n**Scenario**: Extract specific files from EPUB archives for processing or analysis.\n\n**CLI Approach**:\n\n.. code-block:: bash\n\n   #!/bin/bash\n   # extract-epub-assets.sh - Extract and process EPUB content files\n\n   epub_file=\"$1\"\n   output_dir=\"extracted_content\"\n   \n   mkdir -p \"$output_dir\"\n   \n   echo \"Extracting content from: $epub_file\"\n   \n   # Get list of all XHTML content files\n   epub-utils \"$epub_file\" files --format raw | grep '\\.xhtml$' | while read -r file_path; do\n       echo \"Processing: $file_path\"\n       \n       # Extract plain text content\n       safe_name=$(echo \"$file_path\" | tr '/' '_')\n       epub-utils \"$epub_file\" files \"$file_path\" --format plain > \"$output_dir/${safe_name}.txt\"\n       \n       # Extract styled HTML content\n       epub-utils \"$epub_file\" files \"$file_path\" --format raw > \"$output_dir/${safe_name}.html\"\n   done\n   \n   # Extract CSS files for styling reference\n   epub-utils \"$epub_file\" files --format raw | grep '\\.css$' | while read -r css_path; do\n       echo \"Extracting CSS: $css_path\"\n       safe_name=$(echo \"$css_path\" | tr '/' '_')\n       epub-utils \"$epub_file\" files \"$css_path\" > \"$output_dir/${safe_name}\"\n   done\n   \n   echo \"Extraction complete! Files saved to $output_dir/\"\n\n**Comparing files vs content commands**:\n\n.. code-block:: bash\n\n   # Using files command (direct path access)\n   epub-utils book.epub files OEBPS/chapter1.xhtml --format plain\n   epub-utils book.epub files OEBPS/styles/main.css\n   epub-utils book.epub files META-INF/container.xml\n   \n   # Using content command (requires manifest item ID)\n   epub-utils book.epub manifest | grep chapter1  # Find the ID first\n   epub-utils book.epub content chapter1-id --format plain\n\n**Key advantages of the files command**:\n\n- **Direct access**: Use actual file paths without needing manifest IDs\n- **Universal file access**: Access any file type (XHTML, CSS, XML, images, etc.)\n- **Simpler automation**: No need to parse manifest to find item IDs\n- **Better for file-system-based workflows**: Mirrors actual EPUB structure\n\n**Python equivalent using API**:\n\n.. code-block:: python\n\n   from epub_utils import Document\n\n   def extract_file_content(epub_path, file_path):\n       \"\"\"Extract content from a specific file in EPUB.\"\"\"\n       doc = Document(epub_path)\n       \n       try:\n           content = doc.get_file_by_path(file_path)\n           \n           # Handle different content types\n           if hasattr(content, 'to_plain'):\n               # XHTML content - can extract plain text\n               return {\n                   'raw_html': content.to_str(),\n                   'plain_text': content.to_plain(),\n                   'formatted_xml': content.to_xml(pretty_print=True)\n               }\n           else:\n               # Other file types (CSS, XML, etc.)\n               return {'raw_content': content}\n               \n       except ValueError as e:\n           return {'error': str(e)}\n\n   # Usage\n   doc = Document(\"book.epub\")\n   \n   # Extract chapter content\n   chapter_content = extract_file_content(\"book.epub\", \"OEBPS/chapter1.xhtml\")\n   if 'plain_text' in chapter_content:\n       print(f\"Chapter text: {chapter_content['plain_text'][:200]}...\")\n   \n   # Extract CSS for styling analysis\n   css_content = extract_file_content(\"book.epub\", \"OEBPS/styles/main.css\")\n   if 'raw_content' in css_content:\n       print(f\"CSS rules: {len(css_content['raw_content'].split('{'))} rules found\")\n\nAutomation and Workflows\n-------------------------\n\nAutomated EPUB Processing Pipeline\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n**Scenario**: Set up an automated pipeline for processing new EPUB files.\n\n.. code-block:: python\n\n   import os\n   import shutil\n   import json\n   from pathlib import Path\n   from datetime import datetime\n   from epub_utils import Document\n\n   class EPUBProcessor:\n       def __init__(self, input_dir, output_dir, processed_dir):\n           self.input_dir = Path(input_dir)\n           self.output_dir = Path(output_dir)\n           self.processed_dir = Path(processed_dir)\n           \n           # Create directories if they don't exist\n           self.output_dir.mkdir(exist_ok=True)\n           self.processed_dir.mkdir(exist_ok=True)\n       \n       def process_new_files(self):\n           \"\"\"Process all new EPUB files in input directory.\"\"\"\n           epub_files = list(self.input_dir.glob(\"*.epub\"))\n           \n           if not epub_files:\n               print(\"No EPUB files found to process\")\n               return\n           \n           print(f\"Found {len(epub_files)} EPUB files to process\")\n           \n           results = []\n           for epub_path in epub_files:\n               result = self.process_single_file(epub_path)\n               results.append(result)\n           \n           # Generate processing report\n           self.generate_report(results)\n           \n           return results\n       \n       def process_single_file(self, epub_path):\n           \"\"\"Process a single EPUB file.\"\"\"\n           print(f\"Processing: {epub_path.name}\")\n           \n           try:\n               doc = Document(str(epub_path))\n               \n               # Extract metadata\n               metadata = self.extract_metadata(doc)\n               \n               # Validate file\n               validation_result = self.validate_epub(doc)\n               \n               # Generate file info\n               file_info = self.generate_file_info(epub_path, doc)\n               \n               # Create organized filename\n               new_filename = self.create_organized_filename(metadata)\n               \n               # Move file to organized location\n               organized_path = self.organize_file(epub_path, new_filename, metadata)\n               \n               result = {\n                   'original_path': str(epub_path),\n                   'new_path': str(organized_path),\n                   'status': 'success',\n                   'metadata': metadata,\n                   'validation': validation_result,\n                   'file_info': file_info,\n                   'processed_at': datetime.now().isoformat()\n               }\n               \n               # Move original to processed directory\n               processed_path = self.processed_dir / epub_path.name\n               shutil.move(str(epub_path), str(processed_path))\n               \n               return result\n               \n           except Exception as e:\n               result = {\n                   'original_path': str(epub_path),\n                   'status': 'error',\n                   'error': str(e),\n                   'processed_at': datetime.now().isoformat()\n               }\n               \n               # Move problematic file to processed directory\n               processed_path = self.processed_dir / f\"ERROR_{epub_path.name}\"\n               shutil.move(str(epub_path), str(processed_path))\n               \n               return result\n       \n       def extract_metadata(self, doc):\n           \"\"\"Extract standardized metadata.\"\"\"\n           metadata = doc.package.metadata\n           \n           return {\n               'title': getattr(metadata, 'title', '').strip(),\n               'author': getattr(metadata, 'creator', '').strip(),\n               'publisher': getattr(metadata, 'publisher', '').strip(),\n               'language': getattr(metadata, 'language', '').strip(),\n               'year': self.extract_year(getattr(metadata, 'date', '')),\n               'identifier': getattr(metadata, 'identifier', '').strip(),\n               'subject': getattr(metadata, 'subject', '').strip()\n           }\n       \n       def extract_year(self, date_str):\n           \"\"\"Extract year from date string.\"\"\"\n           if not date_str:\n               return ''\n           return date_str.split('-')[0] if '-' in date_str else date_str[:4]\n       \n       def validate_epub(self, doc):\n           \"\"\"Basic EPUB validation.\"\"\"\n           issues = []\n           \n           try:\n               metadata = doc.package.metadata\n               \n               if not getattr(metadata, 'title', '').strip():\n                   issues.append('Missing title')\n               if not getattr(metadata, 'creator', '').strip():\n                   issues.append('Missing author')\n               if not getattr(metadata, 'language', '').strip():\n                   issues.append('Missing language')\n               \n               # Check for content\n               manifest = doc.package.manifest\n               has_content = any(\n                   item.get('media-type') == 'application/xhtml+xml'\n                   for item in manifest.items.values()\n               )\n               \n               if not has_content:\n                   issues.append('No content files found')\n               \n           except Exception as e:\n               issues.append(f'Validation error: {e}')\n           \n           return {\n               'is_valid': len(issues) == 0,\n               'issues': issues\n           }\n       \n       def generate_file_info(self, epub_path, doc):\n           \"\"\"Generate file information.\"\"\"\n           stat = epub_path.stat()\n           \n           return {\n               'filename': epub_path.name,\n               'size_bytes': stat.st_size,\n               'size_mb': round(stat.st_size / (1024 * 1024), 2),\n               'file_count': len(doc.get_files_info()),\n               'modified': datetime.fromtimestamp(stat.st_mtime).isoformat()\n           }\n       \n       def create_organized_filename(self, metadata):\n           \"\"\"Create an organized filename from metadata.\"\"\"\n           # Clean strings for filename\n           def clean_for_filename(s):\n               return re.sub(r'[^\\w\\s-]', '', s).strip()[:50]\n           \n           author = clean_for_filename(metadata['author'] or 'Unknown_Author')\n           title = clean_for_filename(metadata['title'] or 'Unknown_Title')\n           year = metadata['year'] or 'Unknown_Year'\n           \n           return f\"{author} - {title} ({year}).epub\"\n       \n       def organize_file(self, epub_path, new_filename, metadata):\n           \"\"\"Organize file into structured directory.\"\"\"\n           # Create author directory\n           author = metadata['author'] or 'Unknown_Author'\n           author_dir = self.output_dir / author[:50]  # Limit length\n           author_dir.mkdir(exist_ok=True)\n           \n           # Create final path\n           final_path = author_dir / new_filename\n           \n           # Copy file to organized location\n           shutil.copy2(str(epub_path), str(final_path))\n           \n           return final_path\n       \n       def generate_report(self, results):\n           \"\"\"Generate processing report.\"\"\"\n           report_path = self.output_dir / f\"processing_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json\"\n           \n           summary = {\n               'total_files': len(results),\n               'successful': len([r for r in results if r['status'] == 'success']),\n               'errors': len([r for r in results if r['status'] == 'error']),\n               'generated_at': datetime.now().isoformat(),\n               'results': results\n           }\n           \n           with open(report_path, 'w', encoding='utf-8') as f:\n               json.dump(summary, f, indent=2, ensure_ascii=False)\n           \n           print(f\"Processing complete!\")\n           print(f\"Successfully processed: {summary['successful']}\")\n           print(f\"Errors: {summary['errors']}\")\n           print(f\"Report saved to: {report_path}\")\n\n   # Usage\n   processor = EPUBProcessor(\n       input_dir=\"/path/to/new/epubs\",\n       output_dir=\"/path/to/organized/library\", \n       processed_dir=\"/path/to/processed/files\"\n   )\n\n   results = processor.process_new_files()\n\nCommand-Line Power User Examples\n--------------------------------\n\nAdvanced Shell Scripts\n~~~~~~~~~~~~~~~~~~~~~~\n\n**Complex metadata extraction with error handling**:\n\n.. code-block:: bash\n\n   #!/bin/bash\n   # advanced-epub-analysis.sh\n\n   set -euo pipefail\n\n   EPUB_DIR=\"${1:-./}\"\n   OUTPUT_FILE=\"detailed_analysis.json\"\n\n   echo \"Starting advanced EPUB analysis...\"\n   echo \"Directory: $EPUB_DIR\"\n   echo \"Output: $OUTPUT_FILE\"\n\n   # Initialize JSON output\n   echo '{\"analysis_date\": \"'$(date -Iseconds)'\", \"epubs\": [' > \"$OUTPUT_FILE\"\n\n   first=true\n   find \"$EPUB_DIR\" -name \"*.epub\" -type f | while read -r epub; do\n       echo \"Analyzing: $(basename \"$epub\")\"\n       \n       if [ \"$first\" = true ]; then\n           first=false\n       else\n           echo \",\" >> \"$OUTPUT_FILE\"\n       fi\n       \n       # Start JSON object for this EPUB\n       echo '  {' >> \"$OUTPUT_FILE\"\n       echo \"    \\\"file\\\": \\\"$epub\\\",\" >> \"$OUTPUT_FILE\"\n       \n       # Extract metadata with error handling\n       if metadata=$(epub-utils \"$epub\" metadata --format kv 2>/dev/null); then\n           echo \"    \\\"metadata\\\": {\" >> \"$OUTPUT_FILE\"\n           \n           # Parse metadata into JSON\n           echo \"$metadata\" | while IFS=': ' read -r key value; do\n               if [ -n \"$key\" ] && [ -n \"$value\" ]; then\n                   echo \"      \\\"$key\\\": \\\"$value\\\",\" >> \"$OUTPUT_FILE\"\n               fi\n           done | sed '$s/,$//' # Remove last comma\n           \n           echo \"    },\" >> \"$OUTPUT_FILE\"\n       else\n           echo \"    \\\"metadata\\\": null,\" >> \"$OUTPUT_FILE\"\n           echo \"    \\\"metadata_error\\\": true,\" >> \"$OUTPUT_FILE\"\n       fi\n       \n       # File analysis\n       if file_info=$(epub-utils \"$epub\" files --format raw 2>/dev/null); then\n           file_count=$(echo \"$file_info\" | wc -l)\n           echo \"    \\\"file_count\\\": $file_count,\" >> \"$OUTPUT_FILE\"\n       else\n           echo \"    \\\"file_count\\\": null,\" >> \"$OUTPUT_FILE\"\n       fi\n       \n       # File size\n       size=$(stat -f%z \"$epub\" 2>/dev/null || stat -c%s \"$epub\" 2>/dev/null || echo \"0\")\n       echo \"    \\\"size_bytes\\\": $size,\" >> \"$OUTPUT_FILE\"\n       \n       # Validation check\n       if epub-utils \"$epub\" container >/dev/null 2>&1 && \\\n          epub-utils \"$epub\" package >/dev/null 2>&1; then\n           echo \"    \\\"is_valid\\\": true\" >> \"$OUTPUT_FILE\"\n       else\n           echo \"    \\\"is_valid\\\": false\" >> \"$OUTPUT_FILE\"\n       fi\n       \n       echo \"  }\" >> \"$OUTPUT_FILE\"\n   done\n\n   # Close JSON\n   echo \"]}\" >> \"$OUTPUT_FILE\"\n\n   echo \"Analysis complete! Results in $OUTPUT_FILE\"\n\n**Batch processing with parallel execution**:\n\n.. code-block:: bash\n\n   #!/bin/bash\n   # parallel-epub-check.sh\n\n   EPUB_DIR=\"${1:-./}\"\n   MAX_JOBS=4\n\n   export -f check_single_epub\n   check_single_epub() {\n       epub=\"$1\"\n       base=$(basename \"$epub\")\n       \n       echo \"[$base] Starting check...\"\n       \n       # Quick validation\n       if ! epub-utils \"$epub\" container >/dev/null 2>&1; then\n           echo \"[$base] ❌ Invalid container\"\n           return 1\n       fi\n       \n       if ! epub-utils \"$epub\" package >/dev/null 2>&1; then\n           echo \"[$base] ❌ Invalid package\"\n           return 1\n       fi\n       \n       # Check for required metadata\n       metadata=$(epub-utils \"$epub\" metadata --format kv 2>/dev/null)\n       \n       if ! echo \"$metadata\" | grep -q \"^title:\"; then\n           echo \"[$base] ⚠️  Missing title\"\n       fi\n       \n       if ! echo \"$metadata\" | grep -q \"^creator:\"; then\n           echo \"[$base] ⚠️  Missing author\"\n       fi\n       \n       echo \"[$base] ✅ Check complete\"\n   }\n\n   # Run parallel checks\n   find \"$EPUB_DIR\" -name \"*.epub\" -type f | \\\n   xargs -n 1 -P $MAX_JOBS -I {} bash -c 'check_single_epub \"$@\"' _ {}\n\nNavigation and Table of Contents\n--------------------------------\n\nWorking with EPUB Navigation Documents\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n**Scenario**: Extract and analyze navigation structures from both EPUB 2 and EPUB 3 files.\n\n**CLI Approach - Version-Specific TOC Access**:\n\n.. code-block:: bash\n\n   #!/bin/bash\n   # extract-navigation.sh - Extract navigation from EPUB files\n   \n   EPUB_FILE=\"$1\"\n   \n   if [ -z \"$EPUB_FILE\" ]; then\n       echo \"Usage: $0 <epub-file>\"\n       exit 1\n   fi\n   \n   echo \"Analyzing navigation in: $(basename \"$EPUB_FILE\")\"\n   echo \"========================================\"\n   \n   # Try EPUB 3 nav document first\n   echo \"Attempting EPUB 3 nav document extraction...\"\n   if epub-utils \"$EPUB_FILE\" toc --nav > /tmp/nav.xml 2>/dev/null; then\n       echo \"✅ EPUB 3 nav document found\"\n       echo \"Navigation structure:\"\n       # Extract navigation items with their hierarchy\n       grep -o '<a[^>]*href=\"[^\"]*\"[^>]*>[^<]*</a>' /tmp/nav.xml | \\\n       sed 's/<a[^>]*href=\"\\([^\"]*\\)\"[^>]*>\\([^<]*\\)<\\/a>/  → \\2 (\\1)/' | \\\n       head -10\n       \n       # Count navigation items\n       nav_count=$(grep -c '<a[^>]*href=' /tmp/nav.xml)\n       echo \"Total navigation items: $nav_count\"\n   else\n       echo \"❌ No EPUB 3 nav document found\"\n   fi\n   \n   echo \"\"\n   echo \"Attempting EPUB 2 NCX extraction...\"\n   if epub-utils \"$EPUB_FILE\" toc --ncx > /tmp/ncx.xml 2>/dev/null; then\n       echo \"✅ EPUB 2 NCX document found\"\n       echo \"Table of contents structure:\"\n       # Extract NCX navigation points\n       grep -o '<navLabel><text>[^<]*</text></navLabel>' /tmp/ncx.xml | \\\n       sed 's/<navLabel><text>\\([^<]*\\)<\\/text><\\/navLabel>/  → \\1/' | \\\n       head -10\n       \n       # Count NCX nav points\n       ncx_count=$(grep -c '<navPoint' /tmp/ncx.xml)\n       echo \"Total NCX navigation points: $ncx_count\"\n   else\n       echo \"❌ No EPUB 2 NCX document found\"\n   fi\n   \n   # Compare standard TOC with version-specific extracts\n   echo \"\"\n   echo \"Standard TOC extraction:\"\n   standard_toc=$(epub-utils \"$EPUB_FILE\" toc --format raw 2>/dev/null | wc -l)\n   echo \"Standard TOC items: $standard_toc\"\n\n**Python Approach - Advanced Navigation Analysis**:\n\n.. code-block:: python\n\n   from epub_utils import Document\n   import xml.etree.ElementTree as ET\n   from pathlib import Path\n   \n   class NavigationAnalyzer:\n       def __init__(self, epub_path):\n           self.doc = Document(epub_path)\n           self.epub_path = Path(epub_path)\n           \n       def analyze_navigation(self):\n           \"\"\"Comprehensive navigation analysis.\"\"\"\n           print(f\"Analyzing: {self.epub_path.name}\")\n           print(\"=\" * 50)\n           \n           # Check EPUB version\n           version = getattr(self.doc.package.metadata, 'version', 'unknown')\n           print(f\"EPUB Version: {version}\")\n           print()\n           \n           # Analyze EPUB 3 nav document\n           self._analyze_nav_document()\n           \n           # Analyze EPUB 2 NCX document  \n           self._analyze_ncx_document()\n           \n           # Compare with standard TOC\n           self._analyze_standard_toc()\n           \n       def _analyze_nav_document(self):\n           \"\"\"Analyze EPUB 3 navigation document.\"\"\"\n           print(\"EPUB 3 Navigation Document Analysis:\")\n           print(\"-\" * 40)\n           \n           try:\n               nav_content = self.doc.nav\n               if nav_content:\n                   print(\"✅ Nav document found\")\n                   \n                   # Parse navigation structure\n                   nav_items = self._parse_nav_structure(nav_content)\n                   print(f\"Navigation items found: {len(nav_items)}\")\n                   \n                   # Show hierarchy\n                   print(\"\\nNavigation hierarchy:\")\n                   for item in nav_items[:10]:  # Show first 10\n                       indent = \"  \" * item['level']\n                       print(f\"{indent}→ {item['title']} ({item['href']})\")\n                   \n                   if len(nav_items) > 10:\n                       print(f\"  ... and {len(nav_items) - 10} more items\")\n                       \n               else:\n                   print(\"❌ No nav document found\")\n                   \n           except Exception as e:\n               print(f\"❌ Error accessing nav document: {e}\")\n           print()\n           \n       def _analyze_ncx_document(self):\n           \"\"\"Analyze EPUB 2 NCX document.\"\"\"\n           print(\"EPUB 2 NCX Document Analysis:\")\n           print(\"-\" * 30)\n           \n           try:\n               ncx_content = self.doc.ncx\n               if ncx_content:\n                   print(\"✅ NCX document found\")\n                   \n                   # Parse NCX structure\n                   ncx_items = self._parse_ncx_structure(ncx_content)\n                   print(f\"NCX navigation points: {len(ncx_items)}\")\n                   \n                   # Show structure\n                   print(\"\\nNCX structure:\")\n                   for item in ncx_items[:10]:  # Show first 10\n                       indent = \"  \" * item['level']\n                       print(f\"{indent}→ {item['title']} ({item['src']})\")\n                   \n                   if len(ncx_items) > 10:\n                       print(f\"  ... and {len(ncx_items) - 10} more items\")\n                       \n               else:\n                   print(\"❌ No NCX document found\")\n                   \n           except Exception as e:\n               print(f\"❌ Error accessing NCX document: {e}\")\n           print()\n           \n       def _analyze_standard_toc(self):\n           \"\"\"Analyze standard TOC extraction.\"\"\"\n           print(\"Standard TOC Analysis:\")\n           print(\"-\" * 22)\n           \n           try:\n               toc = self.doc.get_toc()\n               toc_items = len(toc.get_nav_items())\n               print(f\"✅ Standard TOC items: {toc_items}\")\n               \n               # Show some items\n               print(\"\\nStandard TOC items:\")\n               for i, item in enumerate(toc.get_nav_items()[:5]):\n                   print(f\"  → {item.title} ({item.href})\")\n               \n           except Exception as e:\n               print(f\"❌ Error with standard TOC: {e}\")\n           print()\n           \n       def _parse_nav_structure(self, nav_content):\n           \"\"\"Parse EPUB 3 nav document structure.\"\"\"\n           items = []\n           try:\n               root = ET.fromstring(nav_content)\n               # Handle namespaces\n               namespaces = {'xhtml': 'http://www.w3.org/1999/xhtml'}\n               \n               def parse_nav_list(ol_element, level=0):\n                   for li in ol_element.findall('.//xhtml:li', namespaces):\n                       a_elem = li.find('.//xhtml:a', namespaces)\n                       if a_elem is not None:\n                           title = a_elem.text or \"\"\n                           href = a_elem.get('href', '')\n                           items.append({\n                               'title': title.strip(),\n                               'href': href,\n                               'level': level\n                           })\n                           \n                           # Check for nested lists\n                           nested_ol = li.find('.//xhtml:ol', namespaces)\n                           if nested_ol is not None:\n                               parse_nav_list(nested_ol, level + 1)\n               \n               # Find main navigation\n               nav_elem = root.find('.//xhtml:nav[@*=\"toc\"]', namespaces)\n               if nav_elem is None:\n                   nav_elem = root.find('.//xhtml:nav', namespaces)\n               \n               if nav_elem is not None:\n                   ol_elem = nav_elem.find('.//xhtml:ol', namespaces)\n                   if ol_elem is not None:\n                       parse_nav_list(ol_elem)\n                       \n           except ET.ParseError as e:\n               print(f\"Warning: Could not parse nav XML: {e}\")\n           \n           return items\n           \n       def _parse_ncx_structure(self, ncx_content):\n           \"\"\"Parse EPUB 2 NCX document structure.\"\"\"\n           items = []\n           try:\n               root = ET.fromstring(ncx_content)\n               # NCX namespace\n               namespaces = {'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n               \n               def parse_nav_point(nav_point, level=0):\n                   # Get label\n                   nav_label = nav_point.find('ncx:navLabel/ncx:text', namespaces)\n                   title = nav_label.text if nav_label is not None else \"\"\n                   \n                   # Get content source\n                   content = nav_point.find('ncx:content', namespaces)\n                   src = content.get('src', '') if content is not None else \"\"\n                   \n                   items.append({\n                       'title': title.strip(),\n                       'src': src,\n                       'level': level\n                   })\n                   \n                   # Process child nav points\n                   for child_nav_point in nav_point.findall('ncx:navPoint', namespaces):\n                       parse_nav_point(child_nav_point, level + 1)\n               \n               # Find all top-level navigation points\n               nav_map = root.find('ncx:navMap', namespaces)\n               if nav_map is not None:\n                   for nav_point in nav_map.findall('ncx:navPoint', namespaces):\n                       parse_nav_point(nav_point)\n                       \n           except ET.ParseError as e:\n               print(f\"Warning: Could not parse NCX XML: {e}\")\n           \n           return items\n   \n   # Usage examples\n   def analyze_single_epub(epub_path):\n       \"\"\"Analyze a single EPUB file.\"\"\"\n       analyzer = NavigationAnalyzer(epub_path)\n       analyzer.analyze_navigation()\n   \n   def compare_navigation_across_epubs(epub_directory):\n       \"\"\"Compare navigation structures across multiple EPUB files.\"\"\"\n       epub_files = list(Path(epub_directory).glob(\"*.epub\"))\n       \n       print(f\"Comparing navigation across {len(epub_files)} EPUB files\")\n       print(\"=\" * 60)\n       \n       results = []\n       \n       for epub_path in epub_files:\n           try:\n               doc = Document(str(epub_path))\n               \n               # Check what navigation documents are available\n               has_nav = bool(doc.nav)\n               has_ncx = bool(doc.ncx)\n               standard_toc_count = len(doc.get_toc().get_nav_items())\n               \n               results.append({\n                   'file': epub_path.name,\n                   'has_nav': has_nav,\n                   'has_ncx': has_ncx,\n                   'toc_items': standard_toc_count,\n                   'version': getattr(doc.package.metadata, 'version', 'unknown')\n               })\n               \n           except Exception as e:\n               print(f\"Error processing {epub_path.name}: {e}\")\n       \n       # Print comparison table\n       print(f\"{'File':<30} {'Version':<8} {'Nav':<5} {'NCX':<5} {'TOC Items':<10}\")\n       print(\"-\" * 65)\n       \n       for result in results:\n           nav_mark = \"✅\" if result['has_nav'] else \"❌\"\n           ncx_mark = \"✅\" if result['has_ncx'] else \"❌\"\n           \n           print(f\"{result['file']:<30} {result['version']:<8} \"\n                 f\"{nav_mark:<5} {ncx_mark:<5} {result['toc_items']:<10}\")\n   \n   # Example usage\n   if __name__ == \"__main__\":\n       # Analyze single file\n       analyze_single_epub(\"/path/to/your/book.epub\")\n       \n       # Compare multiple files\n       compare_navigation_across_epubs(\"/path/to/epub/collection\")\n\nBuilding Smart Reading Lists\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n**Scenario**: Create curated reading lists based on navigation complexity and structure.\n\n.. code-block:: python\n\n   from epub_utils import Document\n   import json\n   from pathlib import Path\n   from collections import defaultdict\n   \n   class ReadingListBuilder:\n       def __init__(self):\n           self.books = []\n           \n       def analyze_book_complexity(self, epub_path):\n           \"\"\"Analyze book's structural complexity.\"\"\"\n           try:\n               doc = Document(str(epub_path))\n               \n               # Get navigation info\n               toc_items = len(doc.get_toc().get_nav_items())\n               has_advanced_nav = bool(doc.nav) or bool(doc.ncx)\n               \n               # Get file structure info\n               files_info = doc.get_files_info()\n               html_files = [f for f in files_info if f['media_type'] == 'application/xhtml+xml']\n               \n               complexity_score = self._calculate_complexity_score(\n                   toc_items, len(html_files), has_advanced_nav\n               )\n               \n               return {\n                   'path': epub_path,\n                   'title': getattr(doc.package.metadata, 'title', ''),\n                   'author': getattr(doc.package.metadata, 'creator', ''),\n                   'toc_items': toc_items,\n                   'html_files': len(html_files),\n                   'has_advanced_nav': has_advanced_nav,\n                   'complexity_score': complexity_score,\n                   'complexity_level': self._get_complexity_level(complexity_score)\n               }\n               \n           except Exception as e:\n               print(f\"Error analyzing {epub_path}: {e}\")\n               return None\n               \n       def _calculate_complexity_score(self, toc_items, html_files, has_advanced_nav):\n           \"\"\"Calculate structural complexity score.\"\"\"\n           score = 0\n           \n           # TOC complexity\n           if toc_items > 50:\n               score += 30\n           elif toc_items > 20:\n               score += 20\n           elif toc_items > 10:\n               score += 10\n           \n           # File structure complexity\n           if html_files > 100:\n               score += 25\n           elif html_files > 50:\n               score += 15\n           elif html_files > 20:\n               score += 10\n           \n           # Advanced navigation features\n           if has_advanced_nav:\n               score += 15\n           \n           return min(score, 100)  # Cap at 100\n           \n       def _get_complexity_level(self, score):\n           \"\"\"Convert score to complexity level.\"\"\"\n           if score >= 70:\n               return \"Advanced\"\n           elif score >= 40:\n               return \"Intermediate\"\n           else:\n               return \"Beginner\"\n               \n       def build_reading_lists(self, epub_directory, output_file=\"reading_lists.json\"):\n           \"\"\"Build categorized reading lists.\"\"\"\n           epub_files = list(Path(epub_directory).glob(\"*.epub\"))\n           \n           print(f\"Analyzing {len(epub_files)} EPUB files for reading lists...\")\n           \n           # Analyze all books\n           for epub_path in epub_files:\n               book_info = self.analyze_book_complexity(epub_path)\n               if book_info:\n                   self.books.append(book_info)\n           \n           # Categorize books\n           categories = defaultdict(list)\n           \n           for book in self.books:\n               # By complexity\n               categories[f\"complexity_{book['complexity_level'].lower()}\"].append(book)\n               \n               # By navigation richness\n               if book['toc_items'] >= 20:\n                   categories['detailed_structure'].append(book)\n               \n               if book['has_advanced_nav']:\n                   categories['advanced_navigation'].append(book)\n           \n           # Create final reading lists\n           reading_lists = {\n               'beginner_friendly': {\n                   'description': 'Books with simple structure, perfect for casual reading',\n                   'books': sorted(categories['complexity_beginner'], \n                                 key=lambda x: x['toc_items'])[:10]\n               },\n               'intermediate_reads': {\n                   'description': 'Well-structured books with moderate complexity',\n                   'books': sorted(categories['complexity_intermediate'], \n                                 key=lambda x: x['complexity_score'])[:15]\n               },\n               'advanced_studies': {\n                   'description': 'Complex books with rich navigation, ideal for research',\n                   'books': sorted(categories['complexity_advanced'], \n                                 key=lambda x: x['complexity_score'], reverse=True)[:10]\n               },\n               'detailed_references': {\n                   'description': 'Books with detailed table of contents',\n                   'books': sorted(categories['detailed_structure'], \n                                 key=lambda x: x['toc_items'], reverse=True)[:12]\n               },\n               'enhanced_navigation': {\n                   'description': 'Books with advanced navigation features',\n                   'books': categories['advanced_navigation'][:10]\n               }\n           }\n           \n           # Save to file\n           with open(output_file, 'w', encoding='utf-8') as f:\n               json.dump(reading_lists, f, indent=2, ensure_ascii=False, default=str)\n           \n           # Print summary\n           print(f\"\\nReading Lists Generated:\")\n           print(\"=\" * 25)\n           for list_name, list_data in reading_lists.items():\n               print(f\"{list_name}: {len(list_data['books'])} books\")\n               print(f\"  → {list_data['description']}\")\n           \n           print(f\"\\nSaved to: {output_file}\")\n           \n   # Usage\n   builder = ReadingListBuilder()\n   builder.build_reading_lists(\"/path/to/epub/collection\")\n\nThese examples demonstrate the power and flexibility of ``epub-utils`` for various real-world scenarios. Whether you're managing a digital library, performing quality assurance, building automated workflows, or analyzing navigation structures, epub-utils provides the tools you need to work effectively with EPUB files.\n"
  },
  {
    "path": "docs/formats.rst",
    "content": "Output Formats Reference\n========================\n\n``epub-utils`` supports multiple output formats to suit different use cases. This guide explains each \nformat with examples and best practices for when to use each one.\n\nOverview\n--------\n\nAll commands in ``epub-utils`` support the ``--format`` option with these values:\n\n- ``xml`` - Syntax-highlighted XML (default for most commands)\n- ``raw`` - Unformatted, raw content\n- ``kv`` - Key-value pairs (where supported)\n- ``plain`` - Plain text with HTML tags stripped (content command only)\n- ``table`` - Formatted table (files command only)\n\nAdditionally, most commands support the ``--pretty-print`` option to format XML output with proper indentation and structure.\n\nXML Format (Default)\n--------------------\n\nThe XML format provides syntax-highlighted, pretty-printed XML output that's easy to read.\n\n**When to use**: Interactive inspection, debugging, learning EPUB structure\n\n**Example**:\n\n.. code-block:: bash\n\n   $ epub-utils book.epub metadata --format xml\n\n**Output**:\n\n.. code-block:: xml\n\n   <?xml version=\"1.0\" encoding=\"UTF-8\"?>\n   <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" \n             xmlns:opf=\"http://www.idpf.org/2007/opf\">\n     <dc:title>The Great Gatsby</dc:title>\n     <dc:creator>F. Scott Fitzgerald</dc:creator>\n     <dc:language>en</dc:language>\n     <dc:identifier id=\"bookid\">urn:uuid:12345678-1234-1234-1234-123456789abc</dc:identifier>\n     <dc:publisher>Scribner</dc:publisher>\n     <dc:date>2021-01-01</dc:date>\n     <dc:subject>Fiction</dc:subject>\n     <dc:subject>Classic Literature</dc:subject>\n   </metadata>\n\n**Features**:\n- Color syntax highlighting\n- Proper indentation\n- Easy to read structure\n- Preserves all XML attributes and namespaces\n\nRaw Format\n----------\n\nThe raw format outputs unprocessed content exactly as stored in the EPUB file.\n\n**When to use**: Piping to other tools, automated processing, debugging XML issues\n\n**Example**:\n\n.. code-block:: bash\n\n   $ epub-utils book.epub metadata --format raw\n\n**Output**:\n\n.. code-block:: xml\n\n   <?xml version=\"1.0\" encoding=\"UTF-8\"?><metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\"><dc:title>The Great Gatsby</dc:title><dc:creator>F. Scott Fitzgerald</dc:creator><dc:language>en</dc:language><dc:identifier id=\"bookid\">urn:uuid:12345678-1234-1234-1234-123456789abc</dc:identifier><dc:publisher>Scribner</dc:publisher><dc:date>2021-01-01</dc:date><dc:subject>Fiction</dc:subject><dc:subject>Classic Literature</dc:subject></metadata>\n\n**Use cases**:\n\n.. code-block:: bash\n\n   # Pipe to xmllint for custom formatting\n   $ epub-utils book.epub package --format raw | xmllint --format -\n\n   # Extract specific elements with grep\n   $ epub-utils book.epub manifest --format raw | grep 'media-type=\"text/css\"'\n\n   # Validate XML structure\n   $ epub-utils book.epub toc --format raw | xmllint --valid -\n\nKey-Value Format\n----------------\n\nThe key-value format presents metadata as simple ``key: value`` pairs, perfect for scripting.\n\n**When to use**: Shell scripting, automated data extraction, configuration files\n\n**Supported commands**: ``metadata``\n\n**Example**:\n\n.. code-block:: bash\n\n   $ epub-utils book.epub metadata --format kv\n\n**Output**:\n\n.. code-block:: text\n\n   title: The Great Gatsby\n   creator: F. Scott Fitzgerald\n   language: en\n   identifier: urn:uuid:12345678-1234-1234-1234-123456789abc\n   publisher: Scribner\n   date: 2021-01-01\n   subject: Fiction, Classic Literature\n\n**Scripting examples**:\n\n.. code-block:: bash\n\n   # Extract just the title\n   title=$(epub-utils book.epub metadata --format kv | grep \"^title:\" | cut -d' ' -f2-)\n\n   # Get all metadata into shell variables\n   eval \"$(epub-utils book.epub metadata --format kv | sed 's/^/meta_/')\"\n   echo \"Book title: $meta_title\"\n   echo \"Author: $meta_creator\"\n\n   # Create a simple database\n   echo \"filename,title,author\" > books.csv\n   for epub in *.epub; do\n       metadata=$(epub-utils \"$epub\" metadata --format kv)\n       title=$(echo \"$metadata\" | grep \"^title:\" | cut -d' ' -f2- | tr ',' ';')\n       author=$(echo \"$metadata\" | grep \"^creator:\" | cut -d' ' -f2- | tr ',' ';')\n       echo \"$epub,$title,$author\" >> books.csv\n   done\n\nPlain Text Format\n-----------------\n\nThe plain text format strips HTML tags and returns readable text content.\n\n**When to use**: Content analysis, word counting, text extraction\n\n**Supported commands**: ``content``, ``files`` (with file path)\n\n**Example**:\n\n.. code-block:: bash\n\n   $ epub-utils book.epub content chapter1 --format plain\n\n**Output**:\n\n.. code-block:: text\n\n   Chapter 1: The Beginning\n\n   In my younger and more vulnerable years my father gave me some advice \n   that I've carried with me ever since. \"Whenever you feel like criticizing \n   anyone,\" he told me, \"just remember that all the people in this world \n   haven't had the advantages that you've had.\"\n\n**Use cases**:\n\n.. code-block:: bash\n\n   # Count words in a chapter (using content command)\n   word_count=$(epub-utils book.epub content chapter1 --format plain | wc -w)\n   echo \"Chapter 1 has $word_count words\"\n\n   # Extract all text for analysis (using files command)\n   epub-utils book.epub files OEBPS/chapter1.xhtml --format plain > chapter1.txt\n\n   # Search for specific content in any file\n   if epub-utils book.epub files OEBPS/chapter2.xhtml --format plain | grep -q \"important phrase\"; then\n       echo \"Found the phrase in chapter 2\"\n   fi\n\n   # Access files by path without knowing manifest IDs\n   epub-utils book.epub files OEBPS/styles/main.css\n   epub-utils book.epub files META-INF/container.xml\n\nTable Format\n------------\n\nThe table format presents file information in a readable tabular layout.\n\n**When to use**: File analysis, human-readable file listings\n\n**Supported commands**: ``files``\n\n**Example**:\n\n.. code-block:: bash\n\n   $ epub-utils book.epub files --format table\n\n**Output**:\n\n.. code-block:: text\n\n   File Information for book.epub\n   ┌────────────────────────────────────────┬──────────┬──────────────┬─────────────────────┐\n   │ Path                                   │ Size     │ Compressed   │ Modified            │\n   ├────────────────────────────────────────┼──────────┼──────────────┼─────────────────────┤\n   │ META-INF/container.xml                 │ 230 B    │ 140 B        │ 2021-01-01 10:00:00│\n   │ OEBPS/content.opf                      │ 2.1 KB   │ 856 B        │ 2021-01-01 10:00:00│\n   │ OEBPS/toc.ncx                          │ 1.8 KB   │ 542 B        │ 2021-01-01 10:00:00│\n   │ OEBPS/Text/chapter01.xhtml             │ 12.4 KB  │ 3.2 KB       │ 2021-01-01 10:00:00│\n   │ OEBPS/Text/chapter02.xhtml             │ 15.6 KB  │ 4.1 KB       │ 2021-01-01 10:00:00│\n   │ OEBPS/Styles/stylesheet.css            │ 3.2 KB   │ 1.1 KB       │ 2021-01-01 10:00:00│\n   │ OEBPS/Images/cover.jpg                 │ 145.2 KB │ 144.8 KB     │ 2021-01-01 10:00:00│\n   └────────────────────────────────────────┴──────────┴──────────────┴─────────────────────┘\n\nCommand-Specific Format Support\n-------------------------------\n\nHere's a quick reference for which formats each command supports:\n\n.. list-table:: Format Support by Command\n   :header-rows: 1\n   :widths: 20 15 15 15 15 15\n\n   * - Command\n     - XML\n     - Raw\n     - KV\n     - Plain\n     - Table\n   * - ``container``\n     - ✓\n     - ✓\n     - ✗\n     - ✗\n     - ✗\n   * - ``package``\n     - ✓\n     - ✓\n     - ✗\n     - ✗\n     - ✗\n   * - ``toc``\n     - ✓\n     - ✓\n     - ✗\n     - ✗\n     - ✗\n   * - ``metadata``\n     - ✓\n     - ✓\n     - ✓\n     - ✗\n     - ✗\n   * - ``manifest``\n     - ✓\n     - ✓\n     - ✗\n     - ✗\n     - ✗\n   * - ``spine``\n     - ✓\n     - ✓\n     - ✗\n     - ✗\n     - ✗\n   * - ``content``\n     - ✓\n     - ✓\n     - ✗\n     - ✓\n     - ✗\n   * - ``files``\n     - ✓*\n     - ✓\n     - ✗\n     - ✓*\n     - ✓*\n\n.. note::\n   \\* For the ``files`` command: ``xml``, ``plain``, and ``table`` formats are only available when specifying a file path. When listing files (no path specified), only ``table`` and ``raw`` formats are supported.\n\nAdvanced Format Usage\n---------------------\n\nCombining Formats with Shell Tools\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n**Pretty-print with custom tools**:\n\n.. code-block:: bash\n\n   # Use xmllint for custom XML formatting\n   epub-utils book.epub package --format raw | xmllint --format --noblanks -\n\n   # Convert to JSON using xq (if available)\n   epub-utils book.epub metadata --format raw | xq '.'\n\n**Processing key-value output**:\n\n.. code-block:: bash\n\n   # Convert to environment variables\n   export $(epub-utils book.epub metadata --format kv | tr ' ' '_' | tr ':' '=')\n   echo \"Title: $title\"\n\n   # Create YAML-like output\n   epub-utils book.epub metadata --format kv | sed 's/^/  /' | sed '1i metadata:'\n\n**Text analysis workflows**:\n\n.. code-block:: bash\n\n   # Analyze reading time (assuming 200 words per minute)\n   words=$(epub-utils book.epub content chapter1 --format plain | wc -w)\n   minutes=$((words / 200))\n   echo \"Chapter 1 reading time: $minutes minutes\"\n\n   # Extract quotes (lines starting with quotation marks)\n   epub-utils book.epub content chapter1 --format plain | grep '^\".*\"$'\n\nFormat Selection Guidelines\n---------------------------\n\nChoose the right format based on your use case:\n\n**For Human Reading**:\n- Use ``xml`` for inspecting EPUB structure\n- Use ``table`` for file listings\n- Use ``plain`` for content reading\n\n**For Automation**:\n- Use ``raw`` for piping to other XML tools\n- Use ``kv`` for simple scripting and data extraction\n- Use ``raw`` with ``files`` for getting simple file lists\n\n**For Integration**:\n- Use ``raw`` when feeding into other programs\n- Use ``kv`` for configuration file generation\n- Use ``plain`` for text processing workflows\n\n**Performance Considerations**:\n- ``raw`` format is fastest (no syntax highlighting)\n- ``xml`` format has slight overhead for highlighting\n- ``table`` format requires additional formatting computation\n\nError Handling with Formats\n----------------------------\n\nDifferent formats handle errors differently:\n\n.. code-block:: bash\n\n   # XML format shows formatted error messages\n   $ epub-utils corrupted.epub metadata --format xml\n   Error: Unable to parse metadata\n\n   # Raw format may show parsing errors directly\n   $ epub-utils corrupted.epub metadata --format raw\n   ParseError: Invalid XML structure\n\n   # KV format gracefully handles missing fields\n   $ epub-utils incomplete.epub metadata --format kv\n   title: \n   creator: Unknown Author\n   language: en\n\nCustom Format Processing\n------------------------\n\nYou can create custom output formats by post-processing the raw output:\n\n.. code-block:: bash\n\n   #!/bin/zsh\n   # custom-json-format.sh - Convert metadata to JSON\n\n   epub_file=\"$1\"\n\n   echo \"{\"\n   epub-utils \"$epub_file\" metadata --format kv | while IFS=': ' read -r key value; do\n       if [[ -n \"$key\" && -n \"$value\" ]]; then\n           echo \"  \\\"$key\\\": \\\"$value\\\",\"\n       fi\n   done | sed '$s/,$//'\n   echo \"}\"\n\n.. code-block:: bash\n\n   #!/bin/zsh\n   # custom-markdown-format.sh - Convert metadata to Markdown\n\n   epub_file=\"$1\"\n   \n   echo \"# Book Information\"\n   echo \"\"\n   \n   epub-utils \"$epub_file\" metadata --format kv | while IFS=': ' read -r key value; do\n       if [[ -n \"$key\" && -n \"$value\" ]]; then\n           formatted_key=$(echo \"$key\" | sed 's/\\b\\w/\\U&/g')  # Title case\n           echo \"**$formatted_key**: $value\"\n       fi\n   done\n\nPretty-Print Option\n-------------------\n\nThe ``--pretty-print`` (or ``-pp``) option enhances XML output by adding proper indentation and structure, making it more readable for human inspection.\n\n**When to use**: Human review, debugging XML structure, cleaner output for documentation\n\n**Supported formats**: ``xml`` and ``raw``\n\n**Example without pretty-print**:\n\n.. code-block:: bash\n\n   $ epub-utils book.epub metadata --format raw\n\n**Output**:\n\n.. code-block:: xml\n\n   <?xml version=\"1.0\" encoding=\"UTF-8\"?><metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\"><dc:title>The Great Gatsby</dc:title><dc:creator>F. Scott Fitzgerald</dc:creator><dc:language>en</dc:language></metadata>\n\n**Example with pretty-print**:\n\n.. code-block:: bash\n\n   $ epub-utils book.epub metadata --format raw --pretty-print\n\n**Output**:\n\n.. code-block:: xml\n\n   <?xml version=\"1.0\" encoding=\"UTF-8\"?>\n   <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" \n             xmlns:opf=\"http://www.idpf.org/2007/opf\">\n     <dc:title>The Great Gatsby</dc:title>\n     <dc:creator>F. Scott Fitzgerald</dc:creator>\n     <dc:language>en</dc:language>\n   </metadata>\n\n**Use cases**:\n\n.. code-block:: bash\n\n   # Better readability for manual inspection\n   epub-utils book.epub package --pretty-print\n   \n   # Clean output for documentation or examples\n   epub-utils book.epub container --format raw --pretty-print\n   \n   # Pipe to file with proper formatting\n   epub-utils book.epub toc --pretty-print > toc-formatted.xml\n\n**Note**: Pretty-print has no effect on ``kv``, ``plain``, or ``table`` formats as these are already optimized for readability.\n\nBest Practices\n--------------\n\n1. **Default to XML for interactive use** - it's the most readable\n2. **Use raw for scripting** - it's the most reliable for automation\n3. **Use kv for metadata extraction** - it's purpose-built for simple parsing\n4. **Use plain for content analysis** - it removes HTML complexity\n5. **Use pretty-print for human review** - it makes XML structure clearer\n6. **Always handle errors** - EPUB files can be malformed\n7. **Test with various EPUB files** - format output can vary with different EPUB structures\n\nThese format options make epub-utils flexible enough to handle everything from quick \ninteractive inspection to complex automated workflows.\n"
  },
  {
    "path": "docs/index.rst",
    "content": "epub-utils: EPUB Inspection and Manipulation\n=============================================\n\n.. image:: https://img.shields.io/pypi/v/epub-utils.svg\n   :target: https://pypi.org/project/epub-utils/\n   :alt: PyPI version\n\n.. image:: https://img.shields.io/pypi/pyversions/epub-utils.svg?logo=python&logoColor=white\n   :target: https://pypi.org/project/epub-utils/\n   :alt: Python versions\n\n.. image:: https://img.shields.io/badge/license-Apache%202.0-blue.svg\n   :target: https://github.com/ernestofgonzalez/epub-utils/blob/main/LICENSE\n   :alt: License\n\n**epub-utils** is a comprehensive Python library and command-line tool for working with EPUB files. \nIt provides both a programmatic API and an intuitive CLI interface for inspecting and parsing EPUB archives.\n\n.. note::\n   epub-utils supports **EPUB 2.0.1** and **EPUB 3.0+** specifications, ensuring compatibility \n   with the vast majority of EPUB files in circulation.\n\nKey Features\n------------\n\n**Rich CLI Interface**\n   - Syntax-highlighted XML output\n   - Multiple output formats (XML, raw, key-value, plain text)\n   - Comprehensive file inspection capabilities\n\n**Complete EPUB Support**\n   - Parse container.xml and package files\n   - Extract and display table of contents\n   - Access manifest and spine information\n   - Retrieve document content by ID\n\n**Metadata Extraction**\n   - Dublin Core metadata support\n   - EPUB-specific metadata fields\n   - Key-value output for easy parsing\n\n**Python API**\n   - Clean, object-oriented interface\n   - Lazy loading for performance\n   - Comprehensive error handling\n\nQuick Start\n-----------\n\nInstallation\n~~~~~~~~~~~~\n\n.. code-block:: bash\n\n   $ pip install epub-utils\n\nBasic CLI Usage\n~~~~~~~~~~~~~~~\n\nInspect an EPUB file with a simple command:\n\n.. code-block:: bash\n\n   # Display metadata with beautiful syntax highlighting\n   $ epub-utils my-book.epub metadata\n\n   # Show table of contents structure\n   $ epub-utils my-book.epub toc\n\n   # Get key-value metadata for scripting\n   $ epub-utils my-book.epub metadata --format kv\n\nBasic Python Usage\n~~~~~~~~~~~~~~~~~~\n\n.. code-block:: python\n\n   from epub_utils import Document\n\n   # Load an EPUB document\n   doc = Document(\"path/to/book.epub\")\n\n   # Access metadata easily\n   print(f\"Title: {doc.package.metadata.title}\")\n   print(f\"Author: {doc.package.metadata.creator}\")\n   print(f\"Language: {doc.package.metadata.language}\")\n\n   # Get table of contents\n   toc_xml = doc.toc.to_xml()\n   print(toc_xml)\n\nWhy epub-utils?\n---------------\n\nepub-utils fills a crucial gap in the Python ecosystem for EPUB file manipulation. While there are \nlibraries for creating EPUBs, few focus on inspection and analysis. This tool is perfect for:\n\n**Publishers and Authors**\n   Validate EPUB structure and metadata before distribution\n\n**Digital Librarians**\n   Batch process and analyze EPUB collections\n\n**Automation Scripts**\n   Extract metadata for catalogs and databases\n\n**Debugging**\n   Inspect malformed or problematic EPUB files\n\n**Learning**\n   Understand EPUB structure and standards compliance\n\nDocumentation Contents\n----------------------\n\n.. toctree::\n   :maxdepth: 2\n   :caption: User Guide\n\n   installation\n   cli-tutorial\n   api-tutorial\n   examples\n   formats\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Reference\n\n   cli-reference\n   api-reference\n   epub-standards\n\n.. toctree::\n   :maxdepth: 1\n   :caption: Development\n\n   contributing\n   changelog\n\nCommunity & Support\n-------------------\n\n- **Source Code**: `GitHub Repository <https://github.com/ernestofgonzalez/epub-utils>`_\n- **Issues**: `Bug Reports & Feature Requests <https://github.com/ernestofgonzalez/epub-utils/issues>`_\n- **PyPI**: `Package Index <https://pypi.org/project/epub-utils/>`_\n\nLicense\n-------\n\n``epub-utils`` is distributed under the `Apache License 2.0 <https://github.com/ernestofgonzalez/epub-utils/blob/main/LICENSE>`_.\n"
  },
  {
    "path": "docs/installation.rst",
    "content": "Installation Guide\n==================\n\nSystem Requirements\n-------------------\n\n``epub-utils`` requires Python 3.10 or higher and works on:\n\n- **Linux** (Ubuntu 18.04+, Debian 10+, CentOS 7+, Fedora 30+)\n- **macOS** (10.14+)\n- **Windows** (Windows 10+)\n\nInstalling from PyPI\n---------------------\n\nThe easiest way to install ``epub-utils`` is using pip:\n\n.. code-block:: bash\n\n   $ pip install epub-utils\n\nThis will install the latest stable version with all required dependencies.\n\nDevelopment Installation\n------------------------\n\nIf you want to contribute to ``epub-utils`` or use the latest development version:\n\n.. code-block:: bash\n\n   # Clone the repository\n   $ git clone https://github.com/ernestofgonzalez/epub-utils.git\n   $ cd epub-utils\n\n   # Create a virtual environment\n   $ python -m venv env\n   $ source env/bin/activate  # On Windows: env\\Scripts\\activate\n\n   # Install in development mode\n   $ pip install -e .\n\n   # Install development dependencies\n   $ pip install -r requirements/requirements-testing.txt\n   $ pip install -r requirements/requirements-linting.txt\n\nVirtual Environment Installation\n--------------------------------\n\nFor isolated installations, we recommend using virtual environments:\n\nUsing venv (Python 3.3+)\n~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n   # Create virtual environment\n   $ python -m venv epub-utils-env\n\n   # Activate virtual environment\n   $ source epub-utils-env/bin/activate  # Linux/macOS\n   $ epub-utils-env\\Scripts\\activate     # Windows\n\n   # Install epub-utils\n   $ pip install epub-utils\n\nUsing conda\n~~~~~~~~~~~\n\n.. code-block:: bash\n\n   # Create conda environment\n   $ conda create -n epub-utils python=3.10\n\n   # Activate environment\n   $ conda activate epub-utils\n\n   # Install epub-utils\n   $ pip install epub-utils\n\nVerifying Installation\n----------------------\n\nAfter installation, verify that ``epub-utils`` is working correctly:\n\n.. code-block:: bash\n\n   # Check version\n   $ epub-utils --version\n\n   # Test with a sample EPUB (if you have one)\n   $ epub-utils sample.epub metadata\n\nIf you see the version number and can run commands without errors, the installation was successful!\n\nInstalling from Source\n----------------------\n\nTo install from source code:\n\n.. code-block:: bash\n\n   # Download and extract the source\n   $ wget https://github.com/ernestofgonzalez/epub-utils/archive/main.zip\n   $ unzip main.zip\n   $ cd epub-utils-main\n\n   # Install\n   $ pip install .\n\nUpgrading\n---------\n\nTo upgrade to the latest version:\n\n.. code-block:: bash\n\n   $ pip install --upgrade epub-utils\n\nUninstalling\n------------\n\nTo remove epub-utils:\n\n.. code-block:: bash\n\n   $ pip uninstall epub-utils\n\nPerformance Considerations\n--------------------------\n\nInstalling lxml\n~~~~~~~~~~~~~~~\n\nWhile not required, installing ``lxml`` can significantly improve XML parsing performance:\n\n.. code-block:: bash\n\n   $ pip install lxml\n\n``epub-utils`` will automatically use lxml if available, falling back to the standard library's \n``xml.etree.ElementTree`` if not.\n\n"
  },
  {
    "path": "epub_utils/__init__.py",
    "content": "from epub_utils.container import Container\nfrom epub_utils.doc import Document\n\n__all__ = ['Document', 'Container']\n"
  },
  {
    "path": "epub_utils/__main__.py",
    "content": "from epub_utils.cli import main\n\nif __name__ == '__main__':\n\tmain(prog_name='epub-utils')\n"
  },
  {
    "path": "epub_utils/cli.py",
    "content": "import click\n\nfrom epub_utils.doc import Document\nfrom epub_utils.exceptions import (\n\tEPUBError,\n\tFileNotFoundError,\n)\n\nVERSION = '0.1.0a1'\n\n\ndef format_error_message(e: Exception) -> str:\n\t\"\"\"Format exception messages for CLI output.\"\"\"\n\tif isinstance(e, EPUBError):\n\t\t# Use the custom formatting from our EPUBError class\n\t\treturn str(e)\n\telse:\n\t\t# For other exceptions, just return the message\n\t\treturn str(e)\n\n\ndef print_version(ctx, param, value):\n\tif not value or ctx.resilient_parsing:\n\t\treturn\n\tclick.echo(VERSION)\n\tctx.exit()\n\n\n@click.group(\n\tcontext_settings=dict(help_option_names=['-h', '--help']),\n)\n@click.option(\n\t'-v',\n\t'--version',\n\tis_flag=True,\n\tcallback=print_version,\n\texpose_value=False,\n\tis_eager=True,\n\thelp='Print epub-utils version.',\n)\n@click.argument(\n\t'path',\n\ttype=click.Path(exists=True, file_okay=True),\n\trequired=True,\n)\n@click.pass_context\ndef main(ctx, path):\n\tctx.ensure_object(dict)\n\tctx.obj['path'] = path\n\n\ndef format_option(default='xml'):\n\t\"\"\"Reusable decorator for the format option.\"\"\"\n\treturn click.option(\n\t\t'-fmt',\n\t\t'--format',\n\t\ttype=click.Choice(['raw', 'xml', 'plain', 'kv'], case_sensitive=False),\n\t\tdefault=default,\n\t\thelp=f'Output format, defaults to {default}.',\n\t)\n\n\ndef pretty_print_option():\n\t\"\"\"Reusable decorator for the pretty-print option.\"\"\"\n\treturn click.option(\n\t\t'-pp',\n\t\t'--pretty-print',\n\t\tis_flag=True,\n\t\tdefault=False,\n\t\thelp='Pretty-print XML output (only applies to str and xml format).',\n\t)\n\n\ndef output_document_part(doc, part_name, format, pretty_print=False):\n\t\"\"\"Helper function to output document parts in the specified format.\"\"\"\n\tpart = getattr(doc, part_name)\n\tif format == 'raw':\n\t\tclick.echo(part.to_str(pretty_print=pretty_print))\n\telif format == 'xml':\n\t\tclick.echo(part.to_xml(pretty_print=pretty_print))\n\telif format == 'kv':\n\t\tif hasattr(part, 'to_kv') and callable(getattr(part, 'to_kv')):\n\t\t\tclick.echo(part.to_kv())\n\t\telse:\n\t\t\tclick.secho(\n\t\t\t\t'Key-value format not supported for this document part. Falling back to raw:\\n',\n\t\t\t\tfg='yellow',\n\t\t\t)\n\t\t\tclick.echo(part.to_str())\n\n\ndef format_file_size(size_bytes: int) -> str:\n\t\"\"\"Format file size in human-readable format.\"\"\"\n\tif size_bytes == 0:\n\t\treturn '0 B'\n\n\tsize_names = ['B', 'KB', 'MB', 'GB']\n\ti = 0\n\tsize = float(size_bytes)\n\n\twhile size >= 1024.0 and i < len(size_names) - 1:\n\t\tsize /= 1024.0\n\t\ti += 1\n\n\tif i == 0:\n\t\treturn f'{int(size)} {size_names[i]}'\n\telse:\n\t\treturn f'{size:.1f} {size_names[i]}'\n\n\ndef format_files_table(files_info: list) -> str:\n\t\"\"\"Format file information as a table.\"\"\"\n\tif not files_info:\n\t\treturn 'No files found in EPUB archive.'\n\n\t# Calculate column widths\n\tmax_path_width = max(len(file_info['path']) for file_info in files_info)\n\tmax_size_width = max(len(format_file_size(file_info['size'])) for file_info in files_info)\n\tmax_compressed_width = max(\n\t\tlen(format_file_size(file_info['compressed_size'])) for file_info in files_info\n\t)\n\n\t# Ensure minimum widths for headers\n\tpath_width = max(max_path_width, len('Path'))\n\tsize_width = max(max_size_width, len('Size'))\n\tcompressed_width = max(max_compressed_width, len('Compressed'))\n\tmodified_width = len('Modified')  # Fixed width for date/time\n\n\t# Create header\n\theader = f'{\"Path\":<{path_width}} | {\"Size\":>{size_width}} | {\"Compressed\":>{compressed_width}} | {\"Modified\":<{modified_width}}'\n\tseparator = '-' * len(header)\n\n\t# Create rows\n\trows = []\n\tfor file_info in files_info:\n\t\tpath = file_info['path'][:path_width]  # Truncate if too long\n\t\tsize = format_file_size(file_info['size'])\n\t\tcompressed = format_file_size(file_info['compressed_size'])\n\t\tmodified = file_info['modified']\n\n\t\trow = f'{path:<{path_width}} | {size:>{size_width}} | {compressed:>{compressed_width}} | {modified:<{modified_width}}'\n\t\trows.append(row)\n\n\t# Combine all parts\n\tresult = [header, separator] + rows\n\treturn '\\n'.join(result)\n\n\n@main.command()\n@format_option()\n@pretty_print_option()\n@click.pass_context\ndef container(ctx, format, pretty_print):\n\t\"\"\"Outputs the container information of the EPUB file.\"\"\"\n\ttry:\n\t\tdoc = Document(ctx.obj['path'])\n\t\toutput_document_part(doc, 'container', format, pretty_print)\n\texcept EPUBError as e:\n\t\tclick.secho('EPUB Error:', fg='red', bold=True, err=True)\n\t\tclick.secho(format_error_message(e), fg='red', err=True)\n\t\tctx.exit(1)\n\texcept Exception as e:\n\t\tclick.secho('Unexpected Error:', fg='red', bold=True, err=True)\n\t\tclick.secho(str(e), fg='red', err=True)\n\t\tctx.exit(1)\n\n\n@main.command()\n@format_option()\n@pretty_print_option()\n@click.pass_context\ndef package(ctx, format, pretty_print):\n\t\"\"\"Outputs the package information of the EPUB file.\"\"\"\n\tdoc = Document(ctx.obj['path'])\n\toutput_document_part(doc, 'package', format, pretty_print)\n\n\n@main.command()\n@format_option()\n@pretty_print_option()\n@click.option(\n\t'--ncx',\n\tis_flag=True,\n\tdefault=False,\n\thelp='Force retrieval of NCX file (EPUB 2 navigation control file).',\n)\n@click.option(\n\t'--nav',\n\tis_flag=True,\n\tdefault=False,\n\thelp='Force retrieval of Navigation Document (EPUB 3 navigation file).',\n)\n@click.pass_context\ndef toc(ctx, format, pretty_print, ncx, nav):\n\t\"\"\"Outputs the Table of Contents (TOC) of the EPUB file.\"\"\"\n\tdoc = Document(ctx.obj['path'])\n\n\tif ncx and nav:\n\t\tclick.secho('Error: --ncx and --nav flags cannot be used together.', fg='red', err=True)\n\t\tctx.exit(1)\n\n\tif ncx:\n\t\tpart = 'ncx'\n\t\tif doc.ncx is None:\n\t\t\tclick.secho(\n\t\t\t\t'Error: This document does not include a Navigation Control eXtended (NCX).',\n\t\t\t\tfg='red',\n\t\t\t\terr=True,\n\t\t\t)\n\t\t\tctx.exit(1)\n\telif nav:\n\t\tpart = 'nav'\n\t\tif doc.nav is None:\n\t\t\tclick.secho(\n\t\t\t\t'Error: This document does not include an EPUB Navigation Document.',\n\t\t\t\tfg='red',\n\t\t\t\terr=True,\n\t\t\t)\n\t\t\tctx.exit(1)\n\telse:\n\t\tpart = 'toc'\n\n\toutput_document_part(doc, part, format, pretty_print)\n\n\n@main.command()\n@format_option()\n@pretty_print_option()\n@click.pass_context\ndef metadata(ctx, format, pretty_print):\n\t\"\"\"Outputs the metadata information from the package file.\"\"\"\n\tdoc = Document(ctx.obj['path'])\n\tpackage = doc.package\n\toutput_document_part(package, 'metadata', format, pretty_print)\n\n\n@main.command()\n@format_option()\n@pretty_print_option()\n@click.pass_context\ndef manifest(ctx, format, pretty_print):\n\t\"\"\"Outputs the manifest information from the package file.\"\"\"\n\tdoc = Document(ctx.obj['path'])\n\tpackage = doc.package\n\toutput_document_part(package, 'manifest', format, pretty_print)\n\n\n@main.command()\n@format_option()\n@pretty_print_option()\n@click.pass_context\ndef spine(ctx, format, pretty_print):\n\t\"\"\"Outputs the spine information from the package file.\"\"\"\n\tdoc = Document(ctx.obj['path'])\n\tpackage = doc.package\n\toutput_document_part(package, 'spine', format, pretty_print)\n\n\n@main.command()\n@click.argument('item_id', required=True)\n@format_option()\n@pretty_print_option()\n@click.pass_context\ndef content(ctx, item_id, format, pretty_print):\n\t\"\"\"Outputs the content of a document by its manifest item ID.\"\"\"\n\tdoc = Document(ctx.obj['path'])\n\n\tcontent = doc.find_content_by_id(item_id)\n\tif format == 'raw':\n\t\tclick.echo(content.to_str())\n\telif format == 'xml':\n\t\tif hasattr(content, 'to_xml'):\n\t\t\tclick.echo(content.to_xml(pretty_print=pretty_print))\n\t\telse:\n\t\t\tclick.echo(content.to_str())\n\telif format == 'plain':\n\t\tclick.echo(content.to_plain())\n\telif format == 'kv':\n\t\tclick.secho(\n\t\t\t'Key-value format not supported for content documents. Falling back to raw:\\n',\n\t\t\tfg='yellow',\n\t\t)\n\t\tclick.echo(content.to_str())\n\n\n@main.command()\n@click.argument('file_path', required=False)\n@click.option(\n\t'-fmt',\n\t'--format',\n\ttype=click.Choice(['table', 'raw', 'xml', 'plain', 'kv'], case_sensitive=False),\n\tdefault=None,\n\thelp='Output format. For file listing: table, raw. For file content: raw, xml, plain, kv. Defaults to table for listing, xml for file content.',\n)\n@pretty_print_option()\n@click.pass_context\ndef files(ctx, file_path, format, pretty_print):\n\t\"\"\"List all files in the EPUB archive with their metadata, or output content of a specific file.\"\"\"\n\tdoc = Document(ctx.obj['path'])\n\n\t# Set dynamic default based on whether file_path is provided\n\tif format is None:\n\t\tformat = 'xml' if file_path else 'table'\n\n\tif file_path:\n\t\t# Display content of specific file\n\t\ttry:\n\t\t\tcontent = doc.get_file_by_path(file_path)\n\t\texcept FileNotFoundError as e:\n\t\t\tclick.secho('FileNotFoundError:', fg='red', bold=True, err=True)\n\t\t\tclick.secho(format_error_message(e), fg='red', err=True)\n\t\t\tctx.exit(1)\n\t\t\treturn\n\n\t\t# Handle XHTMLContent objects\n\t\tif hasattr(content, 'to_str'):\n\t\t\tif format == 'raw':\n\t\t\t\tclick.echo(content.to_str())\n\t\t\telif format == 'xml':\n\t\t\t\tif hasattr(content, 'to_xml'):\n\t\t\t\t\tclick.echo(content.to_xml(pretty_print=pretty_print))\n\t\t\t\telse:\n\t\t\t\t\tclick.echo(content.to_str())\n\t\t\telif format == 'plain':\n\t\t\t\tif hasattr(content, 'to_plain'):\n\t\t\t\t\tclick.echo(content.to_plain())\n\t\t\t\telse:\n\t\t\t\t\tclick.echo(content.to_str())\n\t\t\telif format == 'kv':\n\t\t\t\tclick.secho(\n\t\t\t\t\t'Key-value format not supported for file content. Falling back to raw:\\n',\n\t\t\t\t\tfg='yellow',\n\t\t\t\t)\n\t\t\t\tclick.echo(content.to_str())\n\t\t\telif format == 'table':\n\t\t\t\t# For file content, table format doesn't make sense, fall back to raw\n\t\t\t\tclick.secho(\n\t\t\t\t\t'Table format not supported for file content. Falling back to raw:\\n',\n\t\t\t\t\tfg='yellow',\n\t\t\t\t)\n\t\t\t\tclick.echo(content.to_str())\n\t\telse:\n\t\t\t# Handle raw string content (non-XHTML files)\n\t\t\tclick.echo(content)\n\telse:\n\t\t# List all files (existing behavior)\n\t\tfiles_info = doc.get_files_info()\n\n\t\tif format == 'table':\n\t\t\tclick.echo(format_files_table(files_info))\n\t\telif format == 'raw':\n\t\t\tfor file_info in files_info:\n\t\t\t\tclick.echo(f'{file_info[\"path\"]}')\n\t\telse:\n\t\t\t# For file listing, only table and raw make sense\n\t\t\tif format in ['xml', 'plain', 'kv']:\n\t\t\t\tclick.secho(\n\t\t\t\t\tf'{format.title()} format not supported for file listing. Using table format:\\n',\n\t\t\t\t\tfg='yellow',\n\t\t\t\t)\n\t\t\tclick.echo(format_files_table(files_info))\n"
  },
  {
    "path": "epub_utils/container.py",
    "content": "\"\"\"\nOpen Container Format: https://www.w3.org/TR/epub/#sec-ocf\n\nThis file includes the `Container` class, which is responsible for parsing the `container.xml` file\nof an EPUB archive. The `container.xml` file is a required component of the EPUB Open Container\nFormat (OCF) and is located in the `META-INF` directory of the EPUB archive.\n\nThe `container.xml` file serves as the entry point for identifying the package document(s)\nwithin the EPUB container. It must conform to the following structure as defined in the EPUB\nspecification:\n\n- The root element is `<container>` and must include the `version` attribute with the value \"1.0\".\n- The `<container>` element must contain exactly one `<rootfiles>` child element.\n- The `<rootfiles>` element must contain one or more `<rootfile>` child elements.\n- Each `<rootfile>` element must include a `full-path` attribute that specifies the location of\n  the package document relative to the root of the EPUB container.\n\nNamespace:\n- All elements in the `container.xml` file are in the namespace\n  `urn:oasis:names:tc:opendocument:xmlns:container`.\n\nFor more details on the structure and requirements of the `container.xml` file, refer to the\nEPUB specification: https://www.w3.org/TR/epub/#sec-ocf\n\"\"\"\n\ntry:\n\tfrom lxml import etree\nexcept ImportError:\n\timport xml.etree.ElementTree as etree\n\nfrom epub_utils.exceptions import InvalidEPUBError, ParseError\nfrom epub_utils.printers import XMLPrinter\n\n\nclass Container:\n\t\"\"\"\n\tRepresents the parsed container.xml file of an EPUB.\n\n\tAttributes:\n\t    xml_content (str): The raw XML content of the container.xml file.\n\t    rootfile_path (str): The path to the rootfile specified in the container.\n\t\"\"\"\n\n\tNAMESPACE = 'urn:oasis:names:tc:opendocument:xmlns:container'\n\tROOTFILE_XPATH = f'.//{{{NAMESPACE}}}rootfile'\n\n\tdef __init__(self, xml_content: str) -> None:\n\t\t\"\"\"\n\t\tInitialize the Container by parsing the container.xml data.\n\n\t\tArgs:\n\t\t    xml_content (str): The raw XML content of the container.xml file.\n\t\t\"\"\"\n\t\tself.xml_content = xml_content\n\t\tself.rootfile_path: str = None\n\n\t\tself._parse(xml_content)\n\n\t\tself._printer = XMLPrinter(self)\n\n\tdef __str__(self) -> str:\n\t\treturn self.xml_content\n\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_str(*args, **kwargs)\n\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_xml(*args, **kwargs)\n\n\tdef _find_rootfile_element(self, root: etree.Element) -> etree.Element:\n\t\t\"\"\"\n\t\tFinds the rootfile element in the container.xml data.\n\n\t\tArgs:\n\t\t    root (etree.Element): The root element of the parsed XML.\n\n\t\tReturns:\n\t\t    etree.Element: The rootfile element.\n\n\t\tRaises:\n\t\t    InvalidEPUBError: If the rootfile element or its 'full-path' attribute is missing.\n\t\t\"\"\"\n\t\trootfile_element = root.find(self.ROOTFILE_XPATH)\n\t\tif rootfile_element is None:\n\t\t\traise InvalidEPUBError(\n\t\t\t\t'Invalid container.xml: Missing rootfile element',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Ensure the container.xml contains a rootfile element',\n\t\t\t\t\t'Check that the container structure follows EPUB specifications',\n\t\t\t\t\t'Verify the EPUB was created with compliant tools',\n\t\t\t\t],\n\t\t\t)\n\n\t\tif 'full-path' not in rootfile_element.attrib:\n\t\t\traise InvalidEPUBError(\n\t\t\t\t\"Invalid container.xml: Missing 'full-path' attribute in rootfile element\",\n\t\t\t\tsuggestions=[\n\t\t\t\t\t\"Ensure the rootfile element has a 'full-path' attribute\",\n\t\t\t\t\t'Check that the container.xml follows EPUB specifications',\n\t\t\t\t\t'Verify the EPUB package structure is complete',\n\t\t\t\t],\n\t\t\t)\n\n\t\treturn rootfile_element\n\n\tdef _parse(self, xml_content: str) -> None:\n\t\t\"\"\"\n\t\tParses the container.xml data to extract the rootfile path.\n\n\t\tArgs:\n\t\t    xml_content (str): The raw XML content of the container.xml file.\n\n\t\tRaises:\n\t\t    ParseError: If the XML is invalid or cannot be parsed.\n\t\t    InvalidEPUBError: If the container.xml structure is invalid.\n\t\t\"\"\"\n\t\ttry:\n\t\t\tif isinstance(xml_content, str):\n\t\t\t\txml_content = xml_content.encode('utf-8')\n\t\t\troot = etree.fromstring(xml_content)\n\t\t\trootfile_element = self._find_rootfile_element(root)\n\t\t\tself.rootfile_path = rootfile_element.attrib['full-path']\n\n\t\t\tif not self.rootfile_path.strip():\n\t\t\t\traise InvalidEPUBError(\n\t\t\t\t\t\"Invalid container.xml: 'full-path' attribute is empty\",\n\t\t\t\t\tsuggestions=[\n\t\t\t\t\t\t\"Ensure the rootfile element has a non-empty 'full-path' attribute\",\n\t\t\t\t\t\t'Check that the path points to a valid OPF file',\n\t\t\t\t\t\t'Verify the EPUB package structure is complete',\n\t\t\t\t\t],\n\t\t\t\t)\n\t\texcept etree.ParseError as e:\n\t\t\traise ParseError(\n\t\t\t\tf'Invalid XML in container.xml: {str(e)}',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the container.xml file contains valid XML',\n\t\t\t\t\t'Verify the file is not corrupted',\n\t\t\t\t\t'Ensure all XML tags are properly closed',\n\t\t\t\t\t'Check for invalid characters in the XML',\n\t\t\t\t],\n\t\t\t) from e\n"
  },
  {
    "path": "epub_utils/content/__init__.py",
    "content": "from epub_utils.content.base import Content\nfrom epub_utils.content.xhtml import XHTMLContent\n\n__all__ = ['Content', 'XHTMLContent']\n"
  },
  {
    "path": "epub_utils/content/base.py",
    "content": "class Content:\n\t\"\"\"\n\tBase class for EPUB content documents.\n\n\tAttributes:\n\t    media_type (str): The MIME type of the content.\n\t    href (str): The path to the content file within the EPUB.\n\t\"\"\"\n\n\tdef __init__(self, media_type: str, href: str) -> None:\n\t\tself.media_type = media_type\n\t\tself.href = href\n"
  },
  {
    "path": "epub_utils/content/xhtml.py",
    "content": "import re\n\nfrom lxml import etree\n\nfrom epub_utils.content.base import Content\nfrom epub_utils.exceptions import ParseError, UnsupportedFormatError\nfrom epub_utils.printers import XMLPrinter\n\n\nclass XHTMLContent(Content):\n\t\"\"\"\n\tRepresents an XHTML content document within an EPUB file.\n\t\"\"\"\n\n\tMEDIA_TYPES = ['application/xhtml+xml', 'text/html']\n\n\tdef __init__(self, xml_content: str, media_type: str, href: str) -> None:\n\t\tself.xml_content = xml_content\n\n\t\tself._tree = None\n\n\t\tif media_type not in self.MEDIA_TYPES:\n\t\t\traise UnsupportedFormatError(\n\t\t\t\tf\"Media type '{media_type}' is not supported for XHTML content\",\n\t\t\t\tsuggestions=[\n\t\t\t\t\tf'Use one of the supported media types: {\", \".join(self.MEDIA_TYPES)}',\n\t\t\t\t\t'Check that this is an XHTML content file',\n\t\t\t\t\t'Verify the manifest declares the correct media type',\n\t\t\t\t],\n\t\t\t)\n\t\tsuper().__init__(media_type, href)\n\n\t\tself._parse(xml_content)\n\n\t\tself._printer = XMLPrinter(self)\n\n\tdef __str__(self) -> str:\n\t\treturn self.xml_content\n\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_str(*args, **kwargs)\n\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_xml(*args, **kwargs)\n\n\tdef to_plain(self) -> str:\n\t\treturn self.inner_text\n\n\tdef _parse(self, xml_content: str) -> None:\n\t\ttry:\n\t\t\tself._tree = etree.fromstring(xml_content.encode('utf-8'))\n\t\texcept etree.ParseError as e:\n\t\t\traise ParseError(\n\t\t\t\tf'Invalid XML in XHTML content file: {str(e)}',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the content file contains valid XHTML',\n\t\t\t\t\t'Verify the file is not corrupted',\n\t\t\t\t\t'Ensure all XML tags are properly closed',\n\t\t\t\t\t'Check for invalid characters in the XML',\n\t\t\t\t],\n\t\t\t) from e\n\n\t@property\n\tdef tree(self):\n\t\t\"\"\"Lazily parse and cache the XHTML tree.\"\"\"\n\t\tif self._tree is None:\n\t\t\tself._parse(self.xml_content)\n\t\treturn self._tree\n\n\t@property\n\tdef inner_text(self) -> str:\n\t\ttree = self.tree\n\n\t\tbody_elements = tree.xpath('//*[local-name()=\"body\"]')\n\n\t\tif body_elements:\n\t\t\tinner_text = ''.join(body_elements[0].itertext())\n\t\telse:\n\t\t\tinner_text = ''.join(tree.itertext())\n\n\t\t# Normalize whitespace\n\t\tinner_text = re.sub(r'\\s+', ' ', inner_text).strip()\n\n\t\treturn inner_text\n"
  },
  {
    "path": "epub_utils/doc.py",
    "content": "import os\nimport zipfile\nfrom datetime import datetime\nfrom functools import cached_property\nfrom pathlib import Path\nfrom typing import Dict, List, Optional, Union\n\nfrom epub_utils.container import Container\nfrom epub_utils.content import XHTMLContent\nfrom epub_utils.exceptions import FileNotFoundError as EPUBFileNotFoundError\nfrom epub_utils.exceptions import InvalidEPUBError\nfrom epub_utils.navigation import EPUBNavDocNavigation, Navigation, NCXNavigation\nfrom epub_utils.package import Package\n\n\nclass Document:\n\t\"\"\"\n\tRepresents an EPUB document.\n\n\tAttributes:\n\t    path (Path): The path to the EPUB file.\n\t    _container (Container): The parsed container document.\n\t    _package (Package): The parsed package document.\n\t    _toc (TableOfContents): The parsed table of contents document.\n\t\"\"\"\n\n\tCONTAINER_FILE_PATH = 'META-INF/container.xml'\n\n\tdef __init__(self, path: Union[str, Path]) -> None:\n\t\t\"\"\"\n\t\tInitialize the Document from a given path.\n\n\t\tArgs:\n\t\t    path (str | Path): The path to the EPUB file.\n\n\t\tRaises:\n\t\t    InvalidEPUBError: If the file is not a valid EPUB archive.\n\t\t\"\"\"\n\t\tself.path: Path = Path(path)\n\n\t\tif not self.path.exists():\n\t\t\traise InvalidEPUBError(\n\t\t\t\tf'EPUB file does not exist: {self.path}',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the file path is correct',\n\t\t\t\t\t'Verify the file has not been moved or deleted',\n\t\t\t\t],\n\t\t\t\tfile_path=str(self.path),\n\t\t\t)\n\n\t\tif not zipfile.is_zipfile(self.path):\n\t\t\traise InvalidEPUBError(\n\t\t\t\tf'File is not a valid ZIP archive: {self.path}',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Ensure the file is a valid EPUB (which is a ZIP archive)',\n\t\t\t\t\t'Check that the file is not corrupted',\n\t\t\t\t\t'Verify the file extension is .epub',\n\t\t\t\t],\n\t\t\t\tfile_path=str(self.path),\n\t\t\t)\n\n\t\tself._container: Container = None\n\t\tself._package: Package = None\n\n\t\tself._toc: Navigation = None\n\t\tself._ncx: NCXNavigation = None\n\t\tself._nav: EPUBNavDocNavigation = None\n\n\tdef _read_file_from_epub(self, file_path: str) -> str:\n\t\t\"\"\"\n\t\tRead and decode a file from the EPUB archive.\n\n\t\tArgs:\n\t\t    file_path (str): Path to the file within the EPUB archive.\n\n\t\tReturns:\n\t\t    str: Decoded contents of the file.\n\n\t\tRaises:\n\t\t    EPUBFileNotFoundError: If the file is missing from the EPUB archive.\n\t\t\"\"\"\n\t\twith zipfile.ZipFile(self.path, 'r') as epub_zip:\n\t\t\tnorm_namelist = {os.path.normpath(name): name for name in epub_zip.namelist()}\n\t\t\tnorm_path = os.path.normpath(file_path)\n\n\t\t\tif norm_path not in norm_namelist:\n\t\t\t\tavailable_files = sorted(norm_namelist.keys())[:10]  # Show first 10 files\n\t\t\t\tsuggestions = [\n\t\t\t\t\t'Check that the file path is correct',\n\t\t\t\t\t'Verify the EPUB file structure is complete',\n\t\t\t\t]\n\t\t\t\tif available_files:\n\t\t\t\t\tfile_list = ', '.join(available_files)\n\t\t\t\t\tif len(norm_namelist) > 10:\n\t\t\t\t\t\tfile_list += f' (and {len(norm_namelist) - 10} more)'\n\t\t\t\t\tsuggestions.append(f'Available files include: {file_list}')\n\n\t\t\t\traise EPUBFileNotFoundError(\n\t\t\t\t\tfile_path, epub_path=str(self.path), suggestions=suggestions\n\t\t\t\t)\n\n\t\t\ttry:\n\t\t\t\treturn epub_zip.read(norm_namelist[norm_path]).decode('utf-8')\n\t\t\texcept UnicodeDecodeError as e:\n\t\t\t\traise InvalidEPUBError(\n\t\t\t\t\tf\"Cannot decode file '{file_path}' as UTF-8\",\n\t\t\t\t\tsuggestions=[\n\t\t\t\t\t\t'Check that the file contains valid UTF-8 text',\n\t\t\t\t\t\t'Verify the EPUB file is not corrupted',\n\t\t\t\t\t\t'Ensure the file is a text-based format (XML, HTML, etc.)',\n\t\t\t\t\t],\n\t\t\t\t\tfile_path=str(self.path),\n\t\t\t\t) from e\n\n\t@property\n\tdef container(self) -> Container:\n\t\tif self._container is None:\n\t\t\tcontainer_xml_content = self._read_file_from_epub(self.CONTAINER_FILE_PATH)\n\t\t\tself._container = Container(container_xml_content)\n\t\treturn self._container\n\n\t@property\n\tdef package(self) -> Package:\n\t\tif self._package is None:\n\t\t\tpackage_xml_content = self._read_file_from_epub(self.container.rootfile_path)\n\t\t\tself._package = Package(package_xml_content)\n\t\treturn self._package\n\n\t@cached_property\n\tdef package_href(self):\n\t\treturn os.path.dirname(self.container.rootfile_path)\n\n\t@property\n\tdef toc(self) -> Optional[Navigation]:\n\t\tif self._toc is None:\n\t\t\tif self.nav is not None:\n\t\t\t\t# Default to newer EPUB3 Navigation Document when available\n\t\t\t\tself._toc = self.nav\n\t\t\telif self.ncx is not None:\n\t\t\t\tself._toc = self.ncx\n\n\t\treturn self._toc\n\n\t@property\n\tdef ncx(self) -> Optional[NCXNavigation]:\n\t\t\"\"\"Access the Navigation Control eXtended (EPUB 2)\"\"\"\n\t\tif self._ncx is None:\n\t\t\tpackage = self.package\n\n\t\t\tif not package.toc_href:\n\t\t\t\treturn None\n\n\t\t\ttoc_href = package.toc_href\n\t\t\ttoc_path = os.path.join(self.package_href, toc_href)\n\t\t\ttoc_xml_content = self._read_file_from_epub(toc_path)\n\n\t\t\tself._ncx = NCXNavigation(toc_xml_content)\n\n\t\treturn self._ncx\n\n\t@property\n\tdef nav(self) -> Optional[EPUBNavDocNavigation]:\n\t\t\"\"\"Access the Navigation Document (EPUB 3).\"\"\"\n\t\tif self._nav is None:\n\t\t\tpackage = self.package\n\n\t\t\tif not package.nav_href:\n\t\t\t\treturn None\n\n\t\t\tnav_href = package.nav_href\n\t\t\tnav_path = os.path.join(self.package_href, nav_href)\n\t\t\tnav_xml_content = self._read_file_from_epub(nav_path)\n\n\t\t\tself._nav = EPUBNavDocNavigation(nav_xml_content)\n\n\t\treturn self._nav\n\n\tdef find_content_by_id(self, item_id: str) -> str:\n\t\t\"\"\"\n\t\tFind and return content by its manifest item ID.\n\n\t\tArgs:\n\t\t    item_id: The ID of the item in the manifest.\n\n\t\tReturns:\n\t\t    XHTMLContent: The content object for the specified item.\n\n\t\tRaises:\n\t\t    EPUBFileNotFoundError: If the item ID is not found in spine or manifest.\n\t\t\"\"\"\n\t\tspine_item = self.package.spine.find_by_idref(item_id)\n\t\tif not spine_item:\n\t\t\tspine_ids = [\n\t\t\t\titem.get('idref') for item in self.package.spine.itemrefs if item.get('idref')\n\t\t\t]\n\t\t\tsuggestions = [\n\t\t\t\t'Check that the item ID is correct',\n\t\t\t\t'Verify the item is included in the spine',\n\t\t\t]\n\t\t\tif spine_ids:\n\t\t\t\tavailable_ids = ', '.join(spine_ids[:5])\n\t\t\t\tif len(spine_ids) > 5:\n\t\t\t\t\tavailable_ids += f' (and {len(spine_ids) - 5} more)'\n\t\t\t\tsuggestions.append(f'Available spine IDs: {available_ids}')\n\n\t\t\traise EPUBFileNotFoundError(\n\t\t\t\tf\"spine item '{item_id}'\", epub_path=str(self.path), suggestions=suggestions\n\t\t\t)\n\n\t\tmanifest_item = self.package.manifest.find_by_id(item_id)\n\t\tif not manifest_item:\n\t\t\tmanifest_ids = [\n\t\t\t\titem.get('id') for item in self.package.manifest.items if item.get('id')\n\t\t\t]\n\t\t\tsuggestions = [\n\t\t\t\t'Check that the item ID is correct',\n\t\t\t\t'Verify the item is declared in the manifest',\n\t\t\t]\n\t\t\tif manifest_ids:\n\t\t\t\tavailable_ids = ', '.join(manifest_ids[:5])\n\t\t\t\tif len(manifest_ids) > 5:\n\t\t\t\t\tavailable_ids += f' (and {len(manifest_ids) - 5} more)'\n\t\t\t\tsuggestions.append(f'Available manifest IDs: {available_ids}')\n\n\t\t\traise EPUBFileNotFoundError(\n\t\t\t\tf\"manifest item '{item_id}'\", epub_path=str(self.path), suggestions=suggestions\n\t\t\t)\n\n\t\tcontent_path = os.path.join(self.package_href, manifest_item['href'])\n\t\txml_content = self._read_file_from_epub(content_path)\n\n\t\tcontent = XHTMLContent(xml_content, manifest_item['media_type'], manifest_item['href'])\n\n\t\treturn content\n\n\tdef find_pub_resource_by_id(self, item_id: str) -> str:\n\t\t\"\"\"\n\t\tFind and return a publication resource by its manifest item ID.\n\n\t\tArgs:\n\t\t    item_id: The ID of the item in the manifest.\n\n\t\tReturns:\n\t\t    str: The raw content of the resource.\n\n\t\tRaises:\n\t\t    EPUBFileNotFoundError: If the item ID is not found in manifest.\n\t\t\"\"\"\n\t\tmanifest_item = self.package.manifest.find_by_id(item_id)\n\t\tif not manifest_item:\n\t\t\tmanifest_ids = [\n\t\t\t\titem.get('id') for item in self.package.manifest.items if item.get('id')\n\t\t\t]\n\t\t\tsuggestions = [\n\t\t\t\t'Check that the item ID is correct',\n\t\t\t\t'Verify the item is declared in the manifest',\n\t\t\t]\n\t\t\tif manifest_ids:\n\t\t\t\tavailable_ids = ', '.join(manifest_ids[:5])\n\t\t\t\tif len(manifest_ids) > 5:\n\t\t\t\t\tavailable_ids += f' (and {len(manifest_ids) - 5} more)'\n\t\t\t\tsuggestions.append(f'Available manifest IDs: {available_ids}')\n\n\t\t\traise EPUBFileNotFoundError(\n\t\t\t\tf\"manifest item '{item_id}'\", epub_path=str(self.path), suggestions=suggestions\n\t\t\t)\n\n\t\tcontent_path = os.path.join(self.package_href, manifest_item['href'])\n\t\txml_content = self._read_file_from_epub(content_path)\n\n\t\tcontent = XHTMLContent(xml_content, manifest_item['media_type'], manifest_item['href'])\n\n\t\treturn content\n\n\tdef list_files(self) -> List[Dict[str, str]]:\n\t\t\"\"\"\n\t\tList all files in the EPUB archive.\n\n\t\tReturns:\n\t\t    List[Dict[str, str]]: A list of dictionaries containing file information.\n\t\t\"\"\"\n\t\twith zipfile.ZipFile(self.path, 'r') as epub_zip:\n\t\t\tfile_list = []\n\t\t\tfor zip_info in epub_zip.infolist():\n\t\t\t\tfile_info = {\n\t\t\t\t\t'filename': zip_info.filename,\n\t\t\t\t\t'file_size': zip_info.file_size,\n\t\t\t\t\t'compress_size': zip_info.compress_size,\n\t\t\t\t\t'file_mode': zip_info.external_attr >> 16,\n\t\t\t\t\t'last_modified': datetime(*zip_info.date_time),\n\t\t\t\t}\n\t\t\t\tfile_list.append(file_info)\n\t\t\treturn file_list\n\n\tdef get_files_info(self) -> List[Dict[str, Union[str, int]]]:\n\t\t\"\"\"\n\t\tGet information about all files in the EPUB archive.\n\n\t\tReturns:\n\t\t    List[Dict]: A list of dictionaries containing file information.\n\t\t        Each dictionary contains: 'path', 'size', 'compressed_size', 'modified'.\n\t\t\"\"\"\n\t\tfiles_info = []\n\n\t\twith zipfile.ZipFile(self.path, 'r') as epub_zip:\n\t\t\tfor zip_info in epub_zip.infolist():\n\t\t\t\tif zip_info.filename.endswith('/'):\n\t\t\t\t\tcontinue\n\n\t\t\t\tmodified_time = datetime(*zip_info.date_time).strftime('%Y-%m-%d %H:%M:%S')\n\n\t\t\t\tfile_info = {\n\t\t\t\t\t'path': zip_info.filename,\n\t\t\t\t\t'size': zip_info.file_size,\n\t\t\t\t\t'compressed_size': zip_info.compress_size,\n\t\t\t\t\t'modified': modified_time,\n\t\t\t\t}\n\t\t\t\tfiles_info.append(file_info)\n\n\t\tfiles_info.sort(key=lambda x: x['path'])\n\t\treturn files_info\n\n\tdef get_file_by_path(self, file_path: str):\n\t\t\"\"\"\n\t\tRetrieve a file from the EPUB archive by its path.\n\n\t\tArgs:\n\t\t    file_path (str): Path to the file within the EPUB archive.\n\n\t\tReturns:\n\t\t    XHTMLContent or str: For XHTML files, returns XHTMLContent object.\n\t\t                        For other files, returns raw content as string.\n\n\t\tRaises:\n\t\t    ValueError: If the file is missing from the EPUB archive.\n\t\t\"\"\"\n\t\tfile_content = self._read_file_from_epub(file_path)\n\n\t\tif file_path.lower().endswith(('.xhtml', '.html', '.htm')):\n\t\t\tmedia_type = 'application/xhtml+xml'\n\n\t\t\ttry:\n\t\t\t\tfor item in self.package.manifest.items:\n\t\t\t\t\tmanifest_path = os.path.join(self._Documentpackage_href, item['href'])\n\t\t\t\t\tif os.path.normpath(manifest_path) == os.path.normpath(file_path):\n\t\t\t\t\t\tmedia_type = item.get('media_type', 'application/xhtml+xml')\n\t\t\t\t\t\tbreak\n\t\t\texcept:\n\t\t\t\tpass\n\n\t\t\treturn XHTMLContent(file_content, media_type, file_path)\n\t\telse:\n\t\t\treturn file_content\n"
  },
  {
    "path": "epub_utils/exceptions.py",
    "content": "\"\"\"\nGlobal epub-utils exception classes.\n\nThis module defines custom exceptions for the epub-utils library that provide\nmore descriptive error messages to help users understand what went wrong and\nhow to fix it.\n\"\"\"\n\n\nclass EPUBError(Exception):\n\t\"\"\"Base exception for all epub-utils errors.\"\"\"\n\n\tdef __init__(self, message: str, suggestions: list = None, file_path: str = None):\n\t\t\"\"\"\n\t\tInitialize the EPUBError.\n\n\t\tArgs:\n\t\t\tmessage: The error message describing what went wrong\n\t\t\tsuggestions: Optional list of suggestions for fixing the error\n\t\t\tfile_path: Optional path to the file where the error occurred\n\t\t\"\"\"\n\t\tsuper().__init__(message)\n\t\tself.suggestions = suggestions or []\n\t\tself.file_path = file_path\n\n\tdef __str__(self):\n\t\terror_parts = [super().__str__()]\n\n\t\tif self.file_path:\n\t\t\terror_parts.append(f'File: {self.file_path}')\n\n\t\tif self.suggestions:\n\t\t\terror_parts.append('Suggestions:')\n\t\t\tfor suggestion in self.suggestions:\n\t\t\t\terror_parts.append(f'  • {suggestion}')\n\n\t\treturn '\\n'.join(error_parts)\n\n\nclass ParseError(EPUBError, ValueError):\n\t\"\"\"An error when parsing EPUB content due to invalid formatting.\"\"\"\n\n\tdef __init__(\n\t\tself,\n\t\tmessage: str,\n\t\telement_name: str = None,\n\t\tline_number: int = None,\n\t\tsuggestions: list = None,\n\t\tfile_path: str = None,\n\t):\n\t\t\"\"\"\n\t\tInitialize the ParseError.\n\n\t\tArgs:\n\t\t\tmessage: The error message\n\t\t\telement_name: The XML element that caused the parsing error\n\t\t\tline_number: The line number where the error occurred\n\t\t\tsuggestions: List of suggestions for fixing the error\n\t\t\tfile_path: Path to the file with the parsing error\n\t\t\"\"\"\n\t\tif element_name:\n\t\t\tmessage = f'Error parsing {element_name}: {message}'\n\t\tif line_number:\n\t\t\tmessage = f'{message} (line {line_number})'\n\n\t\tif not suggestions:\n\t\t\tsuggestions = [\n\t\t\t\t'Verify the EPUB file is not corrupted',\n\t\t\t\t'Check that the XML is well-formed',\n\t\t\t\t'Ensure all required elements are present',\n\t\t\t]\n\n\t\tsuper().__init__(message, suggestions, file_path)\n\n\nclass InvalidEPUBError(EPUBError, ValueError):\n\t\"\"\"An error when the EPUB file structure or content is invalid.\"\"\"\n\n\tdef __init__(\n\t\tself,\n\t\tmessage: str,\n\t\tmissing_files: list = None,\n\t\tsuggestions: list = None,\n\t\tfile_path: str = None,\n\t):\n\t\t\"\"\"\n\t\tInitialize the InvalidEPUBError.\n\n\t\tArgs:\n\t\t\tmessage: The error message\n\t\t\tmissing_files: List of missing required files\n\t\t\tsuggestions: List of suggestions for fixing the error\n\t\t\tfile_path: Path to the invalid EPUB file\n\t\t\"\"\"\n\t\tif missing_files:\n\t\t\tfile_list = ', '.join(missing_files)\n\t\t\tmessage = f'{message}. Missing required files: {file_list}'\n\n\t\tif not suggestions:\n\t\t\tsuggestions = [\n\t\t\t\t'Verify the file is a valid EPUB archive',\n\t\t\t\t'Check that all required EPUB files are present',\n\t\t\t\t'Ensure the EPUB was created with a compliant tool',\n\t\t\t]\n\n\t\tsuper().__init__(message, suggestions, file_path)\n\n\nclass UnsupportedFormatError(EPUBError, ValueError):\n\t\"\"\"An error when attempting operations not supported for the EPUB version/format.\"\"\"\n\n\tdef __init__(\n\t\tself,\n\t\tmessage: str,\n\t\tepub_version: str = None,\n\t\trequired_version: str = None,\n\t\tsuggestions: list = None,\n\t\tfile_path: str = None,\n\t):\n\t\t\"\"\"\n\t\tInitialize the UnsupportedFormatError.\n\n\t\tArgs:\n\t\t\tmessage: The error message\n\t\t\tepub_version: The version of the EPUB file\n\t\t\trequired_version: The minimum required version for the operation\n\t\t\tsuggestions: List of suggestions for fixing the error\n\t\t\tfile_path: Path to the EPUB file\n\t\t\"\"\"\n\t\tif epub_version and required_version:\n\t\t\tmessage = f'{message} (EPUB {epub_version} detected, requires EPUB {required_version})'\n\t\telif epub_version:\n\t\t\tmessage = f'{message} (EPUB {epub_version} format)'\n\n\t\tif not suggestions:\n\t\t\tsuggestions = [\n\t\t\t\t'Try using an EPUB file with a compatible version',\n\t\t\t\t'Check the EPUB specification for version requirements',\n\t\t\t]\n\t\t\tif required_version:\n\t\t\t\tsuggestions.insert(0, f'Convert the EPUB to version {required_version} or higher')\n\n\t\tsuper().__init__(message, suggestions, file_path)\n\n\nclass NotImplementedError(EPUBError):\n\t\"\"\"An error when attempting to use functionality not yet implemented.\"\"\"\n\n\tdef __init__(\n\t\tself,\n\t\tmessage: str,\n\t\tfeature_name: str = None,\n\t\tsuggestions: list = None,\n\t\tfile_path: str = None,\n\t):\n\t\t\"\"\"\n\t\tInitialize the NotImplementedError.\n\n\t\tArgs:\n\t\t\tmessage: The error message\n\t\t\tfeature_name: Name of the unimplemented feature\n\t\t\tsuggestions: List of suggestions for fixing the error\n\t\t\tfile_path: Path to the file (if applicable)\n\t\t\"\"\"\n\t\tif feature_name:\n\t\t\tmessage = f\"Feature '{feature_name}' is not yet implemented: {message}\"\n\n\t\tif not suggestions:\n\t\t\tsuggestions = [\n\t\t\t\t'Check the documentation for supported features',\n\t\t\t\t'Consider contributing this feature to the project',\n\t\t\t\t'Use an alternative approach if available',\n\t\t\t]\n\n\t\tsuper().__init__(message, suggestions, file_path)\n\n\nclass FileNotFoundError(EPUBError, ValueError):\n\t\"\"\"An error when a required file is not found in the EPUB archive.\"\"\"\n\n\tdef __init__(self, file_path: str, epub_path: str = None, suggestions: list = None):\n\t\t\"\"\"\n\t\tInitialize the FileNotFoundError.\n\n\t\tArgs:\n\t\t\tfile_path: Path to the missing file within the EPUB\n\t\t\tepub_path: Path to the EPUB file\n\t\t\tsuggestions: List of suggestions for fixing the error\n\t\t\"\"\"\n\t\tmessage = f\"Missing '{file_path}' in EPUB archive\"\n\n\t\tif not suggestions:\n\t\t\tsuggestions = [\n\t\t\t\t'Verify the file path is correct',\n\t\t\t\t'Check that the EPUB file is complete and not corrupted',\n\t\t\t\t'Ensure the file was included when the EPUB was created',\n\t\t\t]\n\n\t\tsuper().__init__(message, suggestions, epub_path)\n\n\nclass ValidationError(EPUBError, ValueError):\n\t\"\"\"An error when EPUB content fails validation.\"\"\"\n\n\tdef __init__(\n\t\tself,\n\t\tmessage: str,\n\t\tvalidation_errors: list = None,\n\t\tsuggestions: list = None,\n\t\tfile_path: str = None,\n\t):\n\t\t\"\"\"\n\t\tInitialize the ValidationError.\n\n\t\tArgs:\n\t\t\tmessage: The error message\n\t\t\tvalidation_errors: List of specific validation errors\n\t\t\tsuggestions: List of suggestions for fixing the error\n\t\t\tfile_path: Path to the file with validation errors\n\t\t\"\"\"\n\t\tif validation_errors:\n\t\t\terror_list = '\\n'.join(f'  • {error}' for error in validation_errors)\n\t\t\tmessage = f'{message}\\nValidation errors:\\n{error_list}'\n\n\t\tif not suggestions:\n\t\t\tsuggestions = [\n\t\t\t\t'Fix the validation errors listed above',\n\t\t\t\t'Use an EPUB validator to check for additional issues',\n\t\t\t\t'Consult the EPUB specification for requirements',\n\t\t\t]\n\n\t\tsuper().__init__(message, suggestions, file_path)\n"
  },
  {
    "path": "epub_utils/navigation/__init__.py",
    "content": "\"\"\"EPUB Navigation module.\"\"\"\n\nfrom .base import Navigation, NavigationItem\nfrom .nav import EPUBNavDocNavigation\nfrom .ncx import NCXNavigation\n\n__all__ = [\n\t'Navigation',\n\t'NavigationItem',\n\t'NCXNavigation',\n\t'EPUBNavDocNavigation',\n]\n"
  },
  {
    "path": "epub_utils/navigation/base.py",
    "content": "from abc import ABC, abstractmethod\nfrom dataclasses import dataclass, field\nfrom typing import Any, Dict, List, Optional\n\n\n@dataclass\nclass NavigationItem:\n\t\"\"\"Universal navigation item representation.\"\"\"\n\n\tid: str\n\tlabel: str\n\ttarget: str  # href/src\n\torder: Optional[int] = None\n\tlevel: int = 0\n\titem_type: Optional[str] = None  # semantic type\n\tchildren: List['NavigationItem'] = field(default_factory=list)\n\n\tdef to_dict(self) -> Dict[str, Any]:\n\t\t\"\"\"Convert NavigationItem to dictionary format with all children recursively converted.\n\n\t\tReturns:\n\t\t\tDictionary representation with children as nested dictionaries.\n\t\t\"\"\"\n\t\tresult = {\n\t\t\t'id': self.id,\n\t\t\t'label': self.label,\n\t\t\t'target': self.target,\n\t\t\t'order': self.order,\n\t\t\t'level': self.level,\n\t\t\t'type': self.item_type,\n\t\t\t'children': [child.to_dict() for child in self.children],\n\t\t}\n\n\t\treturn result\n\n\nclass Navigation(ABC):\n\t\"\"\"\n\tBase class for Navigation Documents.\n\n\tAttributes:\n\t    media_type (str): The MIME type of the content.\n\t    href (str): The path to the content file within the EPUB.\n\t\"\"\"\n\n\tdef __init__(self, media_type: str, href: str) -> None:\n\t\tself.media_type = media_type\n\t\tself.href = href\n\n\t# === Core Abstract Methods ===\n\t@abstractmethod\n\tdef get_toc_items(self) -> List[NavigationItem]:\n\t\t\"\"\"Get table of contents as normalized items.\"\"\"\n\t\tpass\n\n\t@abstractmethod\n\tdef get_page_list(self) -> List[NavigationItem]:\n\t\t\"\"\"Get page list/breaks as normalized items.\"\"\"\n\t\tpass\n\n\t@abstractmethod\n\tdef get_landmarks(self) -> List[NavigationItem]:\n\t\t\"\"\"Get landmarks/guide references as normalized items.\"\"\"\n\t\tpass\n\n\t# === Editing Interface ===\n\t@abstractmethod\n\tdef add_toc_item(self, item: NavigationItem, after_id: Optional[str] = None) -> None:\n\t\t\"\"\"Add item to table of contents.\"\"\"\n\t\tpass\n\n\t@abstractmethod\n\tdef remove_toc_item(self, item_id: str) -> bool:\n\t\t\"\"\"Remove item from table of contents by ID.\"\"\"\n\t\tpass\n\n\t@abstractmethod\n\tdef update_toc_item(self, item_id: str, **kwargs) -> bool:\n\t\t\"\"\"Update existing TOC item properties.\"\"\"\n\t\tpass\n\n\t@abstractmethod\n\tdef reorder_toc_items(self, new_order: List[str]) -> None:\n\t\t\"\"\"Reorder TOC items by list of IDs.\"\"\"\n\t\tpass\n\n\t# === Query Interface ===\n\tdef find_item_by_id(self, item_id: str) -> Optional[NavigationItem]:\n\t\t\"\"\"Find navigation item by ID across all collections.\"\"\"\n\t\tfor item in self.get_all_items():\n\t\t\tif item.id == item_id:\n\t\t\t\treturn item\n\t\treturn None\n\n\tdef find_items_by_target(self, target: str) -> List[NavigationItem]:\n\t\t\"\"\"Find navigation items by target/href.\"\"\"\n\t\treturn [item for item in self.get_all_items() if item.target == target]\n\n\tdef get_all_items(self) -> List[NavigationItem]:\n\t\t\"\"\"Get all navigation items from all collections.\"\"\"\n\t\titems = []\n\t\titems.extend(self.get_toc_items())\n\t\titems.extend(self.get_page_list())\n\t\titems.extend(self.get_landmarks())\n\t\treturn items\n\n\tdef get_toc_items_as_dicts(self) -> List[Dict[str, Any]]:\n\t\t\"\"\"Get TOC items as list of dictionaries with recursive children conversion.\n\n\t\tReturns:\n\t\t\tList of dictionaries representing the TOC structure, where each item\n\t\t\tcontains all its children recursively converted to dictionaries.\n\t\t\"\"\"\n\t\treturn [item.to_dict() for item in self.get_toc_items()]\n\n\tdef get_page_list_as_dicts(self) -> List[Dict[str, Any]]:\n\t\t\"\"\"Get page list items as list of dictionaries.\n\n\t\tReturns:\n\t\t\tList of dictionaries representing the page list structure.\n\t\t\"\"\"\n\t\treturn [item.to_dict() for item in self.get_page_list()]\n\n\tdef get_landmarks_as_dicts(self) -> List[Dict[str, Any]]:\n\t\t\"\"\"Get landmarks as list of dictionaries.\n\n\t\tReturns:\n\t\t\tList of dictionaries representing the landmarks structure.\n\t\t\"\"\"\n\t\treturn [item.to_dict() for item in self.get_landmarks()]\n\n\t# === Format-specific Access ===\n\t@property\n\t@abstractmethod\n\tdef tree(self):\n\t\t\"\"\"Get underlying XML/DOM tree for format-specific operations.\"\"\"\n\t\tpass\n\n\t# === Output Methods ===\n\t@abstractmethod\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\tpass\n\n\t@abstractmethod\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\tpass\n\n\t@abstractmethod\n\tdef to_plain(self) -> str:\n\t\tpass\n"
  },
  {
    "path": "epub_utils/navigation/nav/__init__.py",
    "content": "import re\nfrom typing import List, Optional\n\nfrom lxml import etree\n\nfrom epub_utils.exceptions import ParseError, UnsupportedFormatError\nfrom epub_utils.navigation.base import Navigation, NavigationItem\nfrom epub_utils.printers import XMLPrinter\n\nfrom .dom import NavDocument, NavListItem\n\n\nclass EPUBNavDocNavigation(Navigation):\n\t\"\"\"EPUB 3 Navigation Document implementation.\"\"\"\n\n\tMEDIA_TYPES = ['application/xhtml+xml']\n\n\tdef __init__(\n\t\tself, xml_content: str, media_type: str = 'application/xhtml+xml', href: str = None\n\t) -> None:\n\t\tself.xml_content = xml_content\n\n\t\tself._tree = None\n\n\t\tself.xmlns = None\n\t\tself.lang = None\n\n\t\tif media_type not in self.MEDIA_TYPES:\n\t\t\traise UnsupportedFormatError(\n\t\t\t\tf\"Media type '{media_type}' is not supported for EPUB Navigation Document\",\n\t\t\t\tsuggestions=[\n\t\t\t\t\tf'Use one of the supported media types: {\", \".join(self.MEDIA_TYPES)}',\n\t\t\t\t\t'Check that this is an EPUB 3 Navigation Document',\n\t\t\t\t\t'Verify the manifest declares the correct media type',\n\t\t\t\t],\n\t\t\t)\n\t\tsuper().__init__(media_type, href)\n\n\t\tself._parse(xml_content)\n\n\t\tself._printer = XMLPrinter(self)\n\n\tdef __str__(self) -> str:\n\t\treturn self.xml_content\n\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_str(*args, **kwargs)\n\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_xml(*args, **kwargs)\n\n\tdef to_plain(self) -> str:\n\t\treturn self.inner_text\n\n\tdef _parse(self, xml_content: str) -> None:\n\t\ttry:\n\t\t\tself._tree = etree.fromstring(xml_content.encode('utf-8'))\n\n\t\t\troot = self._tree\n\n\t\t\tself.xmlns = root.nsmap.get(None, '') if root.nsmap else ''\n\t\t\tself.lang = root.get('{http://www.w3.org/XML/1998/namespace}lang', '')\n\n\t\texcept etree.ParseError as e:\n\t\t\traise ParseError(\n\t\t\t\tf'Invalid XML in EPUB Navigation Document: {str(e)}',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the navigation document contains valid XHTML',\n\t\t\t\t\t'Verify the file is not corrupted',\n\t\t\t\t\t'Ensure all XML tags are properly closed',\n\t\t\t\t\t'Check for invalid characters in the XML',\n\t\t\t\t],\n\t\t\t) from e\n\n\t@property\n\tdef tree(self):\n\t\t\"\"\"Lazily parse and cache the XHTML tree.\"\"\"\n\t\tif self._tree is None:\n\t\t\tself._parse(self.xml_content)\n\t\treturn self._tree\n\n\t@property\n\tdef inner_text(self) -> str:\n\t\ttree = self.tree\n\n\t\tbody_elements = tree.xpath(\n\t\t\t'//*[local-name()=\"body\"]', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\n\t\tif body_elements:\n\t\t\tinner_text = ''.join(body_elements[0].itertext())\n\t\telse:\n\t\t\tinner_text = ''.join(tree.itertext())\n\n\t\t# Normalize whitespace\n\t\tinner_text = re.sub(r'\\s+', ' ', inner_text).strip()\n\n\t\treturn inner_text\n\n\t# === Navigation Interface Implementation ===\n\n\tdef get_toc_items(self) -> List[NavigationItem]:\n\t\t\"\"\"Get table of contents as normalized items.\"\"\"\n\t\tnav_doc = NavDocument(self.tree)\n\t\ttoc_nav = nav_doc.toc_nav\n\t\tif not toc_nav:\n\t\t\treturn []\n\n\t\tordered_list = toc_nav.ordered_list\n\t\tif not ordered_list:\n\t\t\treturn []\n\n\t\treturn self._convert_list_items_recursive(ordered_list.list_items, level=0)\n\n\tdef get_page_list(self) -> List[NavigationItem]:\n\t\t\"\"\"Get page list/breaks as normalized items.\"\"\"\n\t\tnav_doc = NavDocument(self.tree)\n\t\tpage_list_nav = nav_doc.page_list_nav\n\t\tif not page_list_nav:\n\t\t\treturn []\n\n\t\tordered_list = page_list_nav.ordered_list\n\t\tif not ordered_list:\n\t\t\treturn []\n\n\t\treturn self._convert_list_items_to_pages(ordered_list.list_items)\n\n\tdef get_landmarks(self) -> List[NavigationItem]:\n\t\t\"\"\"Get landmarks/guide references as normalized items.\"\"\"\n\t\tnav_doc = NavDocument(self.tree)\n\t\tlandmarks_nav = nav_doc.landmarks_nav\n\t\tif not landmarks_nav:\n\t\t\treturn []\n\n\t\tordered_list = landmarks_nav.ordered_list\n\t\tif not ordered_list:\n\t\t\treturn []\n\n\t\treturn self._convert_list_items_to_landmarks(ordered_list.list_items)\n\n\t# === Editing Interface ===\n\n\tdef add_toc_item(self, item: NavigationItem, after_id: Optional[str] = None) -> None:\n\t\t\"\"\"Add item to table of contents.\"\"\"\n\t\tnav_doc = NavDocument(self.tree)\n\t\ttoc_nav = nav_doc.toc_nav\n\n\t\tif not toc_nav:\n\t\t\t# Create TOC nav if it doesn't exist\n\t\t\ttoc_nav = nav_doc.add_nav_section('toc')\n\t\t\ttoc_nav.add_heading(1, 'Table of Contents')\n\t\t\tordered_list = toc_nav.add_ordered_list()\n\t\telse:\n\t\t\tordered_list = toc_nav.ordered_list\n\t\t\tif not ordered_list:\n\t\t\t\tordered_list = toc_nav.add_ordered_list()\n\n\t\t# Create new list item\n\t\tnew_li = ordered_list.add_list_item()\n\t\tif item.id:\n\t\t\tnew_li.id = item.id\n\n\t\t# Add anchor or span based on whether target is provided\n\t\tif item.target:\n\t\t\tanchor = new_li.add_anchor(item.target, item.label)\n\t\t\tif item.item_type:\n\t\t\t\tanchor.epub_type = item.item_type\n\t\telse:\n\t\t\tspan = new_li.add_span(item.label)\n\t\t\tif item.id:\n\t\t\t\tspan.id = item.id\n\n\t\t# TODO: Handle after_id positioning and children\n\n\tdef remove_toc_item(self, item_id: str) -> bool:\n\t\t\"\"\"Remove item from table of contents by ID.\"\"\"\n\t\tnav_doc = NavDocument(self.tree)\n\t\ttoc_nav = nav_doc.toc_nav\n\t\tif not toc_nav:\n\t\t\treturn False\n\n\t\t# Find and remove the list item with the given ID\n\t\titems_to_remove = self.tree.xpath(\n\t\t\tf'.//xhtml:li[@id=\"{item_id}\"]',\n\t\t\tnamespaces={'xhtml': 'http://www.w3.org/1999/xhtml'},\n\t\t)\n\n\t\t# Also check for anchors with the ID\n\t\tif not items_to_remove:\n\t\t\titems_to_remove = self.tree.xpath(\n\t\t\t\tf'.//xhtml:a[@id=\"{item_id}\"]',\n\t\t\t\tnamespaces={'xhtml': 'http://www.w3.org/1999/xhtml'},\n\t\t\t)\n\t\t\t# Remove the parent li element if found\n\t\t\titems_to_remove = [\n\t\t\t\titem.getparent() for item in items_to_remove if item.getparent() is not None\n\t\t\t]\n\n\t\tif items_to_remove:\n\t\t\tfor item in items_to_remove:\n\t\t\t\tif item.getparent() is not None:\n\t\t\t\t\titem.getparent().remove(item)\n\t\t\treturn True\n\n\t\treturn False\n\n\tdef update_toc_item(self, item_id: str, **kwargs) -> bool:\n\t\t\"\"\"Update existing TOC item properties.\"\"\"\n\t\tnav_doc = NavDocument(self.tree)\n\t\ttoc_nav = nav_doc.toc_nav\n\t\tif not toc_nav:\n\t\t\treturn False\n\n\t\t# Find the item by ID (could be on li or a element)\n\t\ttarget_items = self.tree.xpath(\n\t\t\tf'.//xhtml:li[@id=\"{item_id}\"] | .//xhtml:a[@id=\"{item_id}\"]',\n\t\t\tnamespaces={'xhtml': 'http://www.w3.org/1999/xhtml'},\n\t\t)\n\n\t\tif not target_items:\n\t\t\treturn False\n\n\t\ttarget_element = target_items[0]\n\n\t\t# If we found an anchor, work with it; if we found a li, find its anchor\n\t\tif target_element.tag.endswith('}a'):\n\t\t\tanchor_element = target_element\n\t\t\tli_element = target_element.getparent()\n\t\telse:\n\t\t\tli_element = target_element\n\t\t\tanchors = li_element.xpath(\n\t\t\t\t'./xhtml:a', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t\t)\n\t\t\tanchor_element = anchors[0] if anchors else None\n\n\t\t# Update properties\n\t\tif 'label' in kwargs and anchor_element is not None:\n\t\t\tanchor_element.text = kwargs['label']\n\t\telif 'label' in kwargs:\n\t\t\t# Handle span elements or create anchor\n\t\t\tspans = li_element.xpath(\n\t\t\t\t'./xhtml:span', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t\t)\n\t\t\tif spans:\n\t\t\t\tspans[0].text = kwargs['label']\n\n\t\tif 'target' in kwargs and anchor_element is not None:\n\t\t\tanchor_element.set('href', kwargs['target'])\n\n\t\tif 'item_type' in kwargs and anchor_element is not None:\n\t\t\tanchor_element.set('{http://www.idpf.org/2007/ops}type', kwargs['item_type'])\n\n\t\treturn True\n\n\tdef reorder_toc_items(self, new_order: List[str]) -> None:\n\t\t\"\"\"Reorder TOC items by list of IDs.\"\"\"\n\t\t# This is a complex operation that would require rebuilding the list structure\n\t\t# For now, we'll implement a basic version that moves items around\n\t\tnav_doc = NavDocument(self.tree)\n\t\ttoc_nav = nav_doc.toc_nav\n\t\tif not toc_nav:\n\t\t\treturn\n\n\t\tordered_list = toc_nav.ordered_list\n\t\tif not ordered_list:\n\t\t\treturn\n\n\t\t# Collect all items with their IDs\n\t\titems_map = {}\n\t\tfor li_item in ordered_list.list_items:\n\t\t\tif li_item.id:\n\t\t\t\titems_map[li_item.id] = li_item.element\n\t\t\telif li_item.anchor and li_item.anchor.id:\n\t\t\t\titems_map[li_item.anchor.id] = li_item.element\n\n\t\t# Reorder by removing and re-adding in new order\n\t\tfor item_id in new_order:\n\t\t\tif item_id in items_map:\n\t\t\t\telement = items_map[item_id]\n\t\t\t\tparent = element.getparent()\n\t\t\t\tif parent is not None:\n\t\t\t\t\tparent.remove(element)\n\t\t\t\t\tparent.append(element)\n\n\t# === Helper Methods ===\n\n\tdef _convert_list_items_recursive(\n\t\tself, list_items: List[NavListItem], level: int = 0\n\t) -> List[NavigationItem]:\n\t\t\"\"\"Convert navigation list items to NavigationItems recursively.\"\"\"\n\t\titems = []\n\n\t\tfor i, list_item in enumerate(list_items):\n\t\t\tanchor = list_item.anchor\n\t\t\tspan = list_item.span\n\n\t\t\tif anchor:\n\t\t\t\titem = NavigationItem(\n\t\t\t\t\tid=anchor.id or list_item.id or '',\n\t\t\t\t\tlabel=anchor.text,\n\t\t\t\t\ttarget=anchor.href or '',\n\t\t\t\t\torder=i + 1,\n\t\t\t\t\tlevel=level,\n\t\t\t\t\titem_type=anchor.epub_type,\n\t\t\t\t)\n\t\t\telif span:\n\t\t\t\titem = NavigationItem(\n\t\t\t\t\tid=span.id or list_item.id or '',\n\t\t\t\t\tlabel=span.element.text or '',\n\t\t\t\t\ttarget='',\n\t\t\t\t\torder=i + 1,\n\t\t\t\t\tlevel=level,\n\t\t\t\t\titem_type=None,\n\t\t\t\t)\n\t\t\telse:\n\t\t\t\t# Fallback for items without anchor or span\n\t\t\t\tcontinue\n\n\t\t\t# Convert nested items\n\t\t\tnested_list = list_item.nested_list\n\t\t\tif nested_list:\n\t\t\t\titem.children = self._convert_list_items_recursive(\n\t\t\t\t\tnested_list.list_items, level + 1\n\t\t\t\t)\n\n\t\t\titems.append(item)\n\n\t\treturn items\n\n\tdef _convert_list_items_to_pages(self, list_items: List[NavListItem]) -> List[NavigationItem]:\n\t\t\"\"\"Convert navigation list items to page NavigationItems.\"\"\"\n\t\titems = []\n\n\t\tfor i, list_item in enumerate(list_items):\n\t\t\tanchor = list_item.anchor\n\t\t\tif not anchor:\n\t\t\t\tcontinue\n\n\t\t\titem = NavigationItem(\n\t\t\t\tid=anchor.id or list_item.id or '',\n\t\t\t\tlabel=anchor.text,\n\t\t\t\ttarget=anchor.href or '',\n\t\t\t\torder=i + 1,\n\t\t\t\tlevel=0,\n\t\t\t\titem_type=anchor.epub_type or 'page',\n\t\t\t)\n\t\t\titems.append(item)\n\n\t\treturn items\n\n\tdef _convert_list_items_to_landmarks(\n\t\tself, list_items: List[NavListItem]\n\t) -> List[NavigationItem]:\n\t\t\"\"\"Convert navigation list items to landmark NavigationItems.\"\"\"\n\t\titems = []\n\n\t\tfor i, list_item in enumerate(list_items):\n\t\t\tanchor = list_item.anchor\n\t\t\tif not anchor:\n\t\t\t\tcontinue\n\n\t\t\titem = NavigationItem(\n\t\t\t\tid=anchor.id or list_item.id or '',\n\t\t\t\tlabel=anchor.text,\n\t\t\t\ttarget=anchor.href or '',\n\t\t\t\torder=i + 1,\n\t\t\t\tlevel=0,\n\t\t\t\titem_type=anchor.epub_type or 'landmark',\n\t\t\t)\n\t\t\titems.append(item)\n\n\t\treturn items\n"
  },
  {
    "path": "epub_utils/navigation/nav/dom.py",
    "content": "\"\"\"DOM classes for structured access to EPUB 3 Navigation Documents.\"\"\"\n\nfrom typing import List, Optional\n\nfrom lxml import etree\n\n\nclass NavElement:\n\t\"\"\"Base class for navigation document elements.\"\"\"\n\n\tdef __init__(self, element: etree.Element) -> None:\n\t\tself.element = element\n\n\t@property\n\tdef id(self) -> Optional[str]:\n\t\t\"\"\"Get the id attribute.\"\"\"\n\t\treturn self.element.get('id')\n\n\t@id.setter\n\tdef id(self, value: str) -> None:\n\t\t\"\"\"Set the id attribute.\"\"\"\n\t\tself.element.set('id', value)\n\n\nclass NavAnchor(NavElement):\n\t\"\"\"Represents an anchor element (a) in navigation.\"\"\"\n\n\t@property\n\tdef href(self) -> Optional[str]:\n\t\t\"\"\"Get the href attribute.\"\"\"\n\t\treturn self.element.get('href')\n\n\t@href.setter\n\tdef href(self, value: str) -> None:\n\t\t\"\"\"Set the href attribute.\"\"\"\n\t\tself.element.set('href', value)\n\n\t@property\n\tdef text(self) -> str:\n\t\t\"\"\"Get the text content of the anchor.\"\"\"\n\t\treturn self.element.text or ''\n\n\t@text.setter\n\tdef text(self, value: str) -> None:\n\t\t\"\"\"Set the text content of the anchor.\"\"\"\n\t\tself.element.text = value\n\n\t@property\n\tdef epub_type(self) -> Optional[str]:\n\t\t\"\"\"Get the epub:type attribute.\"\"\"\n\t\treturn self.element.get('{http://www.idpf.org/2007/ops}type')\n\n\t@epub_type.setter\n\tdef epub_type(self, value: str) -> None:\n\t\t\"\"\"Set the epub:type attribute.\"\"\"\n\t\tself.element.set('{http://www.idpf.org/2007/ops}type', value)\n\n\nclass NavListItem(NavElement):\n\t\"\"\"Represents a list item (li) in navigation.\"\"\"\n\n\t@property\n\tdef anchor(self) -> Optional[NavAnchor]:\n\t\t\"\"\"Get the first anchor child element.\"\"\"\n\t\tanchors = self.element.xpath(\n\t\t\t'./xhtml:a', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\t\tif anchors:\n\t\t\treturn NavAnchor(anchors[0])\n\t\treturn None\n\n\t@property\n\tdef nested_list(self) -> Optional['NavList']:\n\t\t\"\"\"Get nested ordered list if present.\"\"\"\n\t\tlists = self.element.xpath(\n\t\t\t'./xhtml:ol', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\t\tif lists:\n\t\t\treturn NavList(lists[0])\n\t\treturn None\n\n\t@property\n\tdef span(self) -> Optional[NavElement]:\n\t\t\"\"\"Get span element if present (for non-linked text).\"\"\"\n\t\tspans = self.element.xpath(\n\t\t\t'./xhtml:span', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\t\tif spans:\n\t\t\treturn NavElement(spans[0])\n\t\treturn None\n\n\tdef add_anchor(self, href: str, text: str, epub_type: Optional[str] = None) -> NavAnchor:\n\t\t\"\"\"Add an anchor element to this list item.\"\"\"\n\t\tanchor_element = etree.SubElement(self.element, '{http://www.w3.org/1999/xhtml}a')\n\t\tanchor = NavAnchor(anchor_element)\n\t\tanchor.href = href\n\t\tanchor.text = text\n\t\tif epub_type:\n\t\t\tanchor.epub_type = epub_type\n\t\treturn anchor\n\n\tdef add_span(self, text: str) -> NavElement:\n\t\t\"\"\"Add a span element to this list item.\"\"\"\n\t\tspan_element = etree.SubElement(self.element, '{http://www.w3.org/1999/xhtml}span')\n\t\tspan = NavElement(span_element)\n\t\tspan.element.text = text\n\t\treturn span\n\n\tdef add_nested_list(self) -> 'NavList':\n\t\t\"\"\"Add a nested ordered list to this list item.\"\"\"\n\t\tol_element = etree.SubElement(self.element, '{http://www.w3.org/1999/xhtml}ol')\n\t\treturn NavList(ol_element)\n\n\nclass NavList(NavElement):\n\t\"\"\"Represents an ordered list (ol) in navigation.\"\"\"\n\n\t@property\n\tdef list_items(self) -> List[NavListItem]:\n\t\t\"\"\"Get all list item children.\"\"\"\n\t\titems = self.element.xpath(\n\t\t\t'./xhtml:li', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\t\treturn [NavListItem(item) for item in items]\n\n\tdef add_list_item(self) -> NavListItem:\n\t\t\"\"\"Add a new list item to this list.\"\"\"\n\t\tli_element = etree.SubElement(self.element, '{http://www.w3.org/1999/xhtml}li')\n\t\treturn NavListItem(li_element)\n\n\tdef get_all_items_recursive(self) -> List[NavListItem]:\n\t\t\"\"\"Get all list items recursively.\"\"\"\n\t\titems = []\n\n\t\tdef collect_items(nav_list: NavList):\n\t\t\tfor item in nav_list.list_items:\n\t\t\t\titems.append(item)\n\t\t\t\tnested_list = item.nested_list\n\t\t\t\tif nested_list:\n\t\t\t\t\tcollect_items(nested_list)\n\n\t\tcollect_items(self)\n\t\treturn items\n\n\nclass NavSection(NavElement):\n\t\"\"\"Represents a nav element with specific epub:type.\"\"\"\n\n\t@property\n\tdef epub_type(self) -> Optional[str]:\n\t\t\"\"\"Get the epub:type attribute.\"\"\"\n\t\treturn self.element.get('{http://www.idpf.org/2007/ops}type')\n\n\t@epub_type.setter\n\tdef epub_type(self, value: str) -> None:\n\t\t\"\"\"Set the epub:type attribute.\"\"\"\n\t\tself.element.set('{http://www.idpf.org/2007/ops}type', value)\n\n\t@property\n\tdef heading(self) -> Optional[str]:\n\t\t\"\"\"Get the text of the heading element (h1-h6).\"\"\"\n\t\tfor level in range(1, 7):\n\t\t\theadings = self.element.xpath(\n\t\t\t\tf'./xhtml:h{level}', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t\t)\n\t\t\tif headings:\n\t\t\t\treturn headings[0].text or ''\n\t\treturn None\n\n\t@property\n\tdef ordered_list(self) -> Optional[NavList]:\n\t\t\"\"\"Get the ordered list child element.\"\"\"\n\t\tlists = self.element.xpath(\n\t\t\t'./xhtml:ol', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\t\tif lists:\n\t\t\treturn NavList(lists[0])\n\t\treturn None\n\n\tdef add_heading(self, level: int, text: str) -> NavElement:\n\t\t\"\"\"Add a heading element.\"\"\"\n\t\tif not 1 <= level <= 6:\n\t\t\traise ValueError('Heading level must be between 1 and 6')\n\n\t\theading_element = etree.SubElement(\n\t\t\tself.element, f'{{http://www.w3.org/1999/xhtml}}h{level}'\n\t\t)\n\t\theading = NavElement(heading_element)\n\t\theading.element.text = text\n\t\treturn heading\n\n\tdef add_ordered_list(self) -> NavList:\n\t\t\"\"\"Add an ordered list to this nav section.\"\"\"\n\t\tol_element = etree.SubElement(self.element, '{http://www.w3.org/1999/xhtml}ol')\n\t\treturn NavList(ol_element)\n\n\nclass NavDocument(NavElement):\n\t\"\"\"Represents the root html element of a navigation document.\"\"\"\n\n\t@property\n\tdef toc_nav(self) -> Optional[NavSection]:\n\t\t\"\"\"Get the table of contents nav section.\"\"\"\n\t\tnavs = self.element.xpath(\n\t\t\t'.//xhtml:nav[@epub:type=\"toc\"]',\n\t\t\tnamespaces={\n\t\t\t\t'xhtml': 'http://www.w3.org/1999/xhtml',\n\t\t\t\t'epub': 'http://www.idpf.org/2007/ops',\n\t\t\t},\n\t\t)\n\t\tif navs:\n\t\t\treturn NavSection(navs[0])\n\t\treturn None\n\n\t@property\n\tdef page_list_nav(self) -> Optional[NavSection]:\n\t\t\"\"\"Get the page list nav section.\"\"\"\n\t\tnavs = self.element.xpath(\n\t\t\t'.//xhtml:nav[@epub:type=\"page-list\"]',\n\t\t\tnamespaces={\n\t\t\t\t'xhtml': 'http://www.w3.org/1999/xhtml',\n\t\t\t\t'epub': 'http://www.idpf.org/2007/ops',\n\t\t\t},\n\t\t)\n\t\tif navs:\n\t\t\treturn NavSection(navs[0])\n\t\treturn None\n\n\t@property\n\tdef landmarks_nav(self) -> Optional[NavSection]:\n\t\t\"\"\"Get the landmarks nav section.\"\"\"\n\t\tnavs = self.element.xpath(\n\t\t\t'.//xhtml:nav[@epub:type=\"landmarks\"]',\n\t\t\tnamespaces={\n\t\t\t\t'xhtml': 'http://www.w3.org/1999/xhtml',\n\t\t\t\t'epub': 'http://www.idpf.org/2007/ops',\n\t\t\t},\n\t\t)\n\t\tif navs:\n\t\t\treturn NavSection(navs[0])\n\t\treturn None\n\n\t@property\n\tdef all_nav_sections(self) -> List[NavSection]:\n\t\t\"\"\"Get all nav sections.\"\"\"\n\t\tnavs = self.element.xpath(\n\t\t\t'.//xhtml:nav', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\t\treturn [NavSection(nav) for nav in navs]\n\n\t@property\n\tdef title(self) -> str:\n\t\t\"\"\"Get the document title.\"\"\"\n\t\ttitle_elements = self.element.xpath(\n\t\t\t'.//xhtml:title', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\t\treturn title_elements[0].text if title_elements else ''\n\n\t@property\n\tdef body(self) -> Optional[NavElement]:\n\t\t\"\"\"Get the body element.\"\"\"\n\t\tbodies = self.element.xpath(\n\t\t\t'.//xhtml:body', namespaces={'xhtml': 'http://www.w3.org/1999/xhtml'}\n\t\t)\n\t\tif bodies:\n\t\t\treturn NavElement(bodies[0])\n\t\treturn None\n\n\tdef add_nav_section(self, epub_type: str) -> NavSection:\n\t\t\"\"\"Add a new nav section to the body.\"\"\"\n\t\tbody = self.body\n\t\tif not body:\n\t\t\t# Create body if it doesn't exist\n\t\t\tbody_element = etree.SubElement(self.element, '{http://www.w3.org/1999/xhtml}body')\n\t\t\tbody = NavElement(body_element)\n\n\t\tnav_element = etree.SubElement(body.element, '{http://www.w3.org/1999/xhtml}nav')\n\t\tnav_section = NavSection(nav_element)\n\t\tnav_section.epub_type = epub_type\n\t\treturn nav_section\n"
  },
  {
    "path": "epub_utils/navigation/ncx/__init__.py",
    "content": "import re\nfrom typing import List, Optional\n\nfrom lxml import etree\n\nfrom epub_utils.exceptions import FileNotFoundError as EPUBFileNotFoundError\nfrom epub_utils.exceptions import ParseError, UnsupportedFormatError\nfrom epub_utils.navigation.base import Navigation, NavigationItem\nfrom epub_utils.printers import XMLPrinter\n\nfrom .dom import NCXDocument, NCXNavPoint, NCXNavTarget, NCXPageTarget\n\n\nclass NCXNavigation(Navigation):\n\tMEDIA_TYPES = ['application/x-dtbncx+xml']\n\n\tdef __init__(\n\t\tself, xml_content: str, media_type: str = 'application/x-dtbncx+xml', href: str = None\n\t) -> None:\n\t\tself.xml_content = xml_content\n\n\t\tself._tree = None\n\n\t\tself.xmlns = None\n\t\tself.version = None\n\t\tself.lang = None\n\n\t\tif media_type not in self.MEDIA_TYPES:\n\t\t\traise UnsupportedFormatError(\n\t\t\t\tf\"Media type '{media_type}' is not supported for NCX navigation\",\n\t\t\t\tsuggestions=[\n\t\t\t\t\tf'Use one of the supported media types: {\", \".join(self.MEDIA_TYPES)}',\n\t\t\t\t\t'Check that this is an NCX navigation file',\n\t\t\t\t\t'Verify the manifest declares the correct media type',\n\t\t\t\t],\n\t\t\t)\n\t\tsuper().__init__(media_type, href)\n\n\t\tself._parse(xml_content)\n\n\t\tself._printer = XMLPrinter(self)\n\n\tdef __str__(self) -> str:\n\t\treturn self.xml_content\n\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_str(*args, **kwargs)\n\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_xml(*args, **kwargs)\n\n\tdef to_plain(self) -> str:\n\t\treturn self.inner_text\n\n\tdef _parse(self, xml_content: str) -> None:\n\t\ttry:\n\t\t\tself._tree = etree.fromstring(xml_content.encode('utf-8'))\n\n\t\t\troot = self._tree\n\n\t\t\tself.xmlns = root.nsmap.get(None, '') if root.nsmap else ''\n\t\t\tself.version = root.get('version', '')\n\t\t\tself.lang = root.get('{http://www.w3.org/XML/1998/namespace}lang', '')\n\n\t\texcept etree.ParseError as e:\n\t\t\traise ParseError(\n\t\t\t\tf'Invalid XML in NCX navigation file: {str(e)}',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the NCX file contains valid XML',\n\t\t\t\t\t'Verify the file is not corrupted',\n\t\t\t\t\t'Ensure all XML tags are properly closed',\n\t\t\t\t\t'Check for invalid characters in the XML',\n\t\t\t\t],\n\t\t\t) from e\n\n\t@property\n\tdef tree(self):\n\t\t\"\"\"Lazily parse and cache the XHTML tree.\"\"\"\n\t\tif self._tree is None:\n\t\t\tself._parse(self.xml_content)\n\t\treturn self._tree\n\n\t@property\n\tdef inner_text(self) -> str:\n\t\ttree = self.tree\n\n\t\tbody_elements = tree.xpath('//*[local-name()=\"body\"]')\n\n\t\tif body_elements:\n\t\t\tinner_text = ''.join(body_elements[0].itertext())\n\t\telse:\n\t\t\tinner_text = ''.join(tree.itertext())\n\n\t\t# Normalize whitespace\n\t\tinner_text = re.sub(r'\\s+', ' ', inner_text).strip()\n\n\t\treturn inner_text\n\n\t# === Navigation Interface Implementation ===\n\n\tdef get_toc_items(self) -> List[NavigationItem]:\n\t\t\"\"\"Get table of contents as normalized items.\"\"\"\n\t\tncx_doc = NCXDocument(self.tree)\n\t\tnav_map = ncx_doc.nav_map\n\t\tif not nav_map:\n\t\t\treturn []\n\n\t\treturn self._convert_nav_points_recursive(nav_map.nav_points, level=0)\n\n\tdef get_page_list(self) -> List[NavigationItem]:\n\t\t\"\"\"Get page list/breaks as normalized items.\"\"\"\n\t\tncx_doc = NCXDocument(self.tree)\n\t\tpage_list = ncx_doc.page_list\n\t\tif not page_list:\n\t\t\treturn []\n\n\t\treturn self._convert_page_targets(page_list.page_targets)\n\n\tdef get_landmarks(self) -> List[NavigationItem]:\n\t\t\"\"\"Get landmarks/guide references as normalized items.\"\"\"\n\t\tncx_doc = NCXDocument(self.tree)\n\t\tnav_lists = ncx_doc.nav_lists\n\n\t\titems = []\n\t\tfor nav_list in nav_lists:\n\t\t\tfor nav_target in nav_list.nav_targets:\n\t\t\t\titems.append(self._convert_nav_target(nav_target))\n\n\t\treturn items\n\n\tdef add_toc_item(self, item: NavigationItem, after_id: Optional[str] = None) -> None:\n\t\t\"\"\"Add item to table of contents.\"\"\"\n\t\tncx_doc = NCXDocument(self.tree)\n\t\tnav_map = ncx_doc.nav_map\n\t\tif not nav_map:\n\t\t\traise ParseError(\n\t\t\t\t'NCX document is missing required navMap element',\n\t\t\t\telement_name='navMap',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Ensure the NCX file contains a navMap element',\n\t\t\t\t\t'Check that the NCX structure follows EPUB specifications',\n\t\t\t\t\t'Verify the NCX file was created correctly',\n\t\t\t\t],\n\t\t\t)\n\n\t\t# Find insertion point\n\t\tif after_id:\n\t\t\tall_nav_points = nav_map.get_all_nav_points()\n\t\t\tinsert_index = None\n\t\t\tfor i, nav_point in enumerate(all_nav_points):\n\t\t\t\tif nav_point.id == after_id:\n\t\t\t\t\tinsert_index = i + 1\n\t\t\t\t\tbreak\n\n\t\t\tif insert_index is None:\n\t\t\t\tavailable_ids = [nav_point.id for nav_point in all_nav_points if nav_point.id]\n\t\t\t\tsuggestions = [\n\t\t\t\t\t'Check that the navigation item ID is correct',\n\t\t\t\t\t'Verify the item exists in the navigation structure',\n\t\t\t\t]\n\t\t\t\tif available_ids:\n\t\t\t\t\tid_list = ', '.join(available_ids[:5])\n\t\t\t\t\tif len(available_ids) > 5:\n\t\t\t\t\t\tid_list += f' (and {len(available_ids) - 5} more)'\n\t\t\t\t\tsuggestions.append(f'Available navigation IDs: {id_list}')\n\n\t\t\t\traise EPUBFileNotFoundError(\n\t\t\t\t\tf\"Navigation item with ID '{after_id}' not found\", suggestions=suggestions\n\t\t\t\t)\n\n\t\t\t# For now, append to the end if we can't find the exact position\n\t\t\t# More complex insertion logic would require tree manipulation\n\t\t\tnav_map.add_nav_point(\n\t\t\t\titem.id, item.label, item.target, class_attr=item.item_type, play_order=item.order\n\t\t\t)\n\t\telse:\n\t\t\t# Add to the end\n\t\t\tnav_map.add_nav_point(\n\t\t\t\titem.id, item.label, item.target, class_attr=item.item_type, play_order=item.order\n\t\t\t)\n\n\tdef remove_toc_item(self, item_id: str) -> bool:\n\t\t\"\"\"Remove item from table of contents by ID.\"\"\"\n\t\tncx_doc = NCXDocument(self.tree)\n\t\tnav_map = ncx_doc.nav_map\n\t\tif not nav_map:\n\t\t\treturn False\n\n\t\t# Find and remove the navPoint\n\t\tnav_points = nav_map.element.xpath(\n\t\t\tf'.//ncx:navPoint[@id=\"{item_id}\"]',\n\t\t\tnamespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'},\n\t\t)\n\n\t\tif nav_points:\n\t\t\tnav_points[0].getparent().remove(nav_points[0])\n\t\t\treturn True\n\n\t\treturn False\n\n\tdef update_toc_item(self, item_id: str, **kwargs) -> bool:\n\t\t\"\"\"Update existing TOC item properties.\"\"\"\n\t\tncx_doc = NCXDocument(self.tree)\n\t\tnav_map = ncx_doc.nav_map\n\t\tif not nav_map:\n\t\t\treturn False\n\n\t\t# Find the navPoint\n\t\tnav_points = nav_map.element.xpath(\n\t\t\tf'.//ncx:navPoint[@id=\"{item_id}\"]',\n\t\t\tnamespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'},\n\t\t)\n\n\t\tif not nav_points:\n\t\t\treturn False\n\n\t\tnav_point = NCXNavPoint(nav_points[0])\n\n\t\t# Update properties\n\t\tif 'label' in kwargs:\n\t\t\tnav_label = nav_point.nav_label\n\t\t\tif nav_label:\n\t\t\t\tnav_label.text = kwargs['label']\n\n\t\tif 'target' in kwargs:\n\t\t\tcontent = nav_point.content\n\t\t\tif content:\n\t\t\t\tcontent.src = kwargs['target']\n\n\t\tif 'order' in kwargs:\n\t\t\tnav_point.play_order = kwargs['order']\n\n\t\tif 'item_type' in kwargs:\n\t\t\tnav_point.class_attr = kwargs['item_type']\n\n\t\treturn True\n\n\tdef reorder_toc_items(self, new_order: List[str]) -> None:\n\t\t\"\"\"Reorder TOC items by list of IDs.\"\"\"\n\t\t# This is a complex operation that would require rebuilding the navMap\n\t\t# For now, we'll update the playOrder attributes\n\t\tncx_doc = NCXDocument(self.tree)\n\t\tnav_map = ncx_doc.nav_map\n\t\tif not nav_map:\n\t\t\treturn\n\n\t\tfor i, item_id in enumerate(new_order):\n\t\t\tnav_points = nav_map.element.xpath(\n\t\t\t\tf'.//ncx:navPoint[@id=\"{item_id}\"]',\n\t\t\t\tnamespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'},\n\t\t\t)\n\t\t\tif nav_points:\n\t\t\t\tnav_point = NCXNavPoint(nav_points[0])\n\t\t\t\tnav_point.play_order = i + 1\n\n\t# === Helper Methods ===\n\n\tdef _convert_nav_points_recursive(\n\t\tself, nav_points: List[NCXNavPoint], level: int = 0\n\t) -> List[NavigationItem]:\n\t\t\"\"\"Convert NCX navPoints to NavigationItems recursively.\"\"\"\n\t\titems = []\n\n\t\tfor nav_point in nav_points:\n\t\t\titem = NavigationItem(\n\t\t\t\tid=nav_point.id or '',\n\t\t\t\tlabel=nav_point.label_text,\n\t\t\t\ttarget=nav_point.content_src,\n\t\t\t\torder=nav_point.play_order,\n\t\t\t\tlevel=level,\n\t\t\t\titem_type=nav_point.class_attr,\n\t\t\t)\n\n\t\t\t# Convert child nav points\n\t\t\tchild_nav_points = nav_point.nav_points\n\t\t\tif child_nav_points:\n\t\t\t\titem.children = self._convert_nav_points_recursive(child_nav_points, level + 1)\n\n\t\t\titems.append(item)\n\n\t\treturn items\n\n\tdef _convert_page_targets(self, page_targets: List[NCXPageTarget]) -> List[NavigationItem]:\n\t\t\"\"\"Convert NCX pageTargets to NavigationItems.\"\"\"\n\t\titems = []\n\n\t\tfor page_target in page_targets:\n\t\t\titem = NavigationItem(\n\t\t\t\tid=page_target.id or '',\n\t\t\t\tlabel=page_target.label_text,\n\t\t\t\ttarget=page_target.content_src,\n\t\t\t\torder=page_target.play_order,\n\t\t\t\tlevel=0,\n\t\t\t\titem_type=page_target.type_attr,\n\t\t\t)\n\t\t\titems.append(item)\n\n\t\treturn items\n\n\tdef _convert_nav_target(self, nav_target: NCXNavTarget) -> NavigationItem:\n\t\t\"\"\"Convert NCX navTarget to NavigationItem.\"\"\"\n\t\treturn NavigationItem(\n\t\t\tid=nav_target.id or '',\n\t\t\tlabel=nav_target.nav_label.text if nav_target.nav_label else '',\n\t\t\ttarget=nav_target.content.src if nav_target.content else '',\n\t\t\torder=nav_target.play_order,\n\t\t\tlevel=0,\n\t\t\titem_type=nav_target.class_attr,\n\t\t)\n"
  },
  {
    "path": "epub_utils/navigation/ncx/dom.py",
    "content": "\"\"\"NCX DOM classes for structured access to NCX navigation documents.\"\"\"\n\nfrom typing import List, Optional\n\nfrom lxml import etree\n\n\nclass NCXElement:\n\t\"\"\"Base class for NCX DOM elements.\"\"\"\n\n\tdef __init__(self, element: etree.Element):\n\t\tself.element = element\n\n\t@property\n\tdef id(self) -> Optional[str]:\n\t\t\"\"\"Get the id attribute.\"\"\"\n\t\treturn self.element.get('id')\n\n\t@id.setter\n\tdef id(self, value: str) -> None:\n\t\t\"\"\"Set the id attribute.\"\"\"\n\t\tself.element.set('id', value)\n\n\nclass NCXText(NCXElement):\n\t\"\"\"Represents a text element.\"\"\"\n\n\t@property\n\tdef text(self) -> str:\n\t\t\"\"\"Get the text content.\"\"\"\n\t\treturn self.element.text or ''\n\n\t@text.setter\n\tdef text(self, value: str) -> None:\n\t\t\"\"\"Set the text content.\"\"\"\n\t\tself.element.text = value\n\n\nclass NCXContent(NCXElement):\n\t\"\"\"Represents a content element.\"\"\"\n\n\t@property\n\tdef src(self) -> Optional[str]:\n\t\t\"\"\"Get the src attribute.\"\"\"\n\t\treturn self.element.get('src')\n\n\t@src.setter\n\tdef src(self, value: str) -> None:\n\t\t\"\"\"Set the src attribute.\"\"\"\n\t\tself.element.set('src', value)\n\n\nclass NCXNavLabel(NCXElement):\n\t\"\"\"Represents a navLabel element.\"\"\"\n\n\t@property\n\tdef text_element(self) -> Optional[NCXText]:\n\t\t\"\"\"Get the text child element.\"\"\"\n\t\ttext_elements = self.element.xpath(\n\t\t\t'./ncx:text', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif text_elements:\n\t\t\treturn NCXText(text_elements[0])\n\t\treturn None\n\n\t@property\n\tdef text(self) -> str:\n\t\t\"\"\"Get the text content.\"\"\"\n\t\ttext_elem = self.text_element\n\t\treturn text_elem.text if text_elem else ''\n\n\t@text.setter\n\tdef text(self, value: str) -> None:\n\t\t\"\"\"Set the text content.\"\"\"\n\t\ttext_elem = self.text_element\n\t\tif text_elem:\n\t\t\ttext_elem.text = value\n\t\telse:\n\t\t\t# Create text element if it doesn't exist\n\t\t\ttext_element = etree.SubElement(\n\t\t\t\tself.element, '{http://www.daisy.org/z3986/2005/ncx/}text'\n\t\t\t)\n\t\t\ttext_element.text = value\n\n\nclass NCXNavPoint(NCXElement):\n\t\"\"\"Represents a navPoint element in the navigation hierarchy.\"\"\"\n\n\t@property\n\tdef class_attr(self) -> Optional[str]:\n\t\t\"\"\"Get the class attribute.\"\"\"\n\t\treturn self.element.get('class')\n\n\t@class_attr.setter\n\tdef class_attr(self, value: str) -> None:\n\t\t\"\"\"Set the class attribute.\"\"\"\n\t\tself.element.set('class', value)\n\n\t@property\n\tdef play_order(self) -> Optional[int]:\n\t\t\"\"\"Get the playOrder attribute.\"\"\"\n\t\tplay_order = self.element.get('playOrder')\n\t\treturn int(play_order) if play_order else None\n\n\t@play_order.setter\n\tdef play_order(self, value: int) -> None:\n\t\t\"\"\"Set the playOrder attribute.\"\"\"\n\t\tself.element.set('playOrder', str(value))\n\n\t@property\n\tdef nav_label(self) -> Optional[NCXNavLabel]:\n\t\t\"\"\"Get the navLabel child element.\"\"\"\n\t\tnav_labels = self.element.xpath(\n\t\t\t'./ncx:navLabel', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif nav_labels:\n\t\t\treturn NCXNavLabel(nav_labels[0])\n\t\treturn None\n\n\t@property\n\tdef content(self) -> Optional[NCXContent]:\n\t\t\"\"\"Get the content child element.\"\"\"\n\t\tcontent_elements = self.element.xpath(\n\t\t\t'./ncx:content', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif content_elements:\n\t\t\treturn NCXContent(content_elements[0])\n\t\treturn None\n\n\t@property\n\tdef nav_points(self) -> List['NCXNavPoint']:\n\t\t\"\"\"Get child navPoint elements.\"\"\"\n\t\tnav_point_elements = self.element.xpath(\n\t\t\t'./ncx:navPoint', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\treturn [NCXNavPoint(point) for point in nav_point_elements]\n\n\tdef add_nav_point(\n\t\tself,\n\t\tid: str,\n\t\tlabel_text: str,\n\t\tsrc: str,\n\t\tclass_attr: Optional[str] = None,\n\t\tplay_order: Optional[int] = None,\n\t) -> 'NCXNavPoint':\n\t\t\"\"\"Add a child navPoint element.\"\"\"\n\t\tnav_point_element = etree.SubElement(\n\t\t\tself.element, '{http://www.daisy.org/z3986/2005/ncx/}navPoint'\n\t\t)\n\t\tnav_point = NCXNavPoint(nav_point_element)\n\t\tnav_point.id = id\n\n\t\tif class_attr:\n\t\t\tnav_point.class_attr = class_attr\n\t\tif play_order is not None:\n\t\t\tnav_point.play_order = play_order\n\n\t\t# Add navLabel\n\t\tnav_label_element = etree.SubElement(\n\t\t\tnav_point_element, '{http://www.daisy.org/z3986/2005/ncx/}navLabel'\n\t\t)\n\t\tnav_label = NCXNavLabel(nav_label_element)\n\t\tnav_label.text = label_text\n\n\t\t# Add content\n\t\tcontent_element = etree.SubElement(\n\t\t\tnav_point_element, '{http://www.daisy.org/z3986/2005/ncx/}content'\n\t\t)\n\t\tcontent = NCXContent(content_element)\n\t\tcontent.src = src\n\n\t\treturn nav_point\n\n\t@property\n\tdef label_text(self) -> str:\n\t\t\"\"\"Get the text of the navLabel.\"\"\"\n\t\tnav_label = self.nav_label\n\t\treturn nav_label.text if nav_label else ''\n\n\t@property\n\tdef content_src(self) -> str:\n\t\t\"\"\"Get the src of the content element.\"\"\"\n\t\tcontent = self.content\n\t\treturn content.src if content else ''\n\n\nclass NCXNavMap(NCXElement):\n\t\"\"\"Represents the navMap element.\"\"\"\n\n\t@property\n\tdef nav_points(self) -> List[NCXNavPoint]:\n\t\t\"\"\"Get all direct child navPoint elements.\"\"\"\n\t\tnav_point_elements = self.element.xpath(\n\t\t\t'./ncx:navPoint', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\treturn [NCXNavPoint(point) for point in nav_point_elements]\n\n\tdef add_nav_point(\n\t\tself,\n\t\tid: str,\n\t\tlabel_text: str,\n\t\tsrc: str,\n\t\tclass_attr: Optional[str] = None,\n\t\tplay_order: Optional[int] = None,\n\t) -> NCXNavPoint:\n\t\t\"\"\"Add a navPoint element.\"\"\"\n\t\tnav_point_element = etree.SubElement(\n\t\t\tself.element, '{http://www.daisy.org/z3986/2005/ncx/}navPoint'\n\t\t)\n\t\tnav_point = NCXNavPoint(nav_point_element)\n\t\tnav_point.id = id\n\n\t\tif class_attr:\n\t\t\tnav_point.class_attr = class_attr\n\t\tif play_order is not None:\n\t\t\tnav_point.play_order = play_order\n\n\t\t# Add navLabel\n\t\tnav_label_element = etree.SubElement(\n\t\t\tnav_point_element, '{http://www.daisy.org/z3986/2005/ncx/}navLabel'\n\t\t)\n\t\tnav_label = NCXNavLabel(nav_label_element)\n\t\tnav_label.text = label_text\n\n\t\t# Add content\n\t\tcontent_element = etree.SubElement(\n\t\t\tnav_point_element, '{http://www.daisy.org/z3986/2005/ncx/}content'\n\t\t)\n\t\tcontent = NCXContent(content_element)\n\t\tcontent.src = src\n\n\t\treturn nav_point\n\n\tdef get_all_nav_points(self) -> List[NCXNavPoint]:\n\t\t\"\"\"Get all navPoint elements recursively.\"\"\"\n\t\tnav_point_elements = self.element.xpath(\n\t\t\t'.//ncx:navPoint', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\treturn [NCXNavPoint(point) for point in nav_point_elements]\n\n\nclass NCXPageTarget(NCXElement):\n\t\"\"\"Represents a pageTarget element.\"\"\"\n\n\t@property\n\tdef type_attr(self) -> Optional[str]:\n\t\t\"\"\"Get the type attribute.\"\"\"\n\t\treturn self.element.get('type')\n\n\t@type_attr.setter\n\tdef type_attr(self, value: str) -> None:\n\t\t\"\"\"Set the type attribute.\"\"\"\n\t\tself.element.set('type', value)\n\n\t@property\n\tdef value(self) -> Optional[str]:\n\t\t\"\"\"Get the value attribute.\"\"\"\n\t\treturn self.element.get('value')\n\n\t@value.setter\n\tdef value(self, value: str) -> None:\n\t\t\"\"\"Set the value attribute.\"\"\"\n\t\tself.element.set('value', value)\n\n\t@property\n\tdef play_order(self) -> Optional[int]:\n\t\t\"\"\"Get the playOrder attribute.\"\"\"\n\t\tplay_order = self.element.get('playOrder')\n\t\treturn int(play_order) if play_order else None\n\n\t@play_order.setter\n\tdef play_order(self, value: int) -> None:\n\t\t\"\"\"Set the playOrder attribute.\"\"\"\n\t\tself.element.set('playOrder', str(value))\n\n\t@property\n\tdef nav_label(self) -> Optional[NCXNavLabel]:\n\t\t\"\"\"Get the navLabel child element.\"\"\"\n\t\tnav_labels = self.element.xpath(\n\t\t\t'./ncx:navLabel', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif nav_labels:\n\t\t\treturn NCXNavLabel(nav_labels[0])\n\t\treturn None\n\n\t@property\n\tdef content(self) -> Optional[NCXContent]:\n\t\t\"\"\"Get the content child element.\"\"\"\n\t\tcontent_elements = self.element.xpath(\n\t\t\t'./ncx:content', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif content_elements:\n\t\t\treturn NCXContent(content_elements[0])\n\t\treturn None\n\n\t@property\n\tdef label_text(self) -> str:\n\t\t\"\"\"Get the text of the navLabel.\"\"\"\n\t\tnav_label = self.nav_label\n\t\treturn nav_label.text if nav_label else ''\n\n\t@property\n\tdef content_src(self) -> str:\n\t\t\"\"\"Get the src of the content element.\"\"\"\n\t\tcontent = self.content\n\t\treturn content.src if content else ''\n\n\nclass NCXPageList(NCXElement):\n\t\"\"\"Represents the pageList element.\"\"\"\n\n\t@property\n\tdef page_targets(self) -> List[NCXPageTarget]:\n\t\t\"\"\"Get all pageTarget elements.\"\"\"\n\t\tpage_target_elements = self.element.xpath(\n\t\t\t'./ncx:pageTarget', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\treturn [NCXPageTarget(target) for target in page_target_elements]\n\n\tdef add_page_target(\n\t\tself,\n\t\tid: str,\n\t\ttype_attr: str,\n\t\tvalue: str,\n\t\tlabel_text: str,\n\t\tsrc: str,\n\t\tplay_order: Optional[int] = None,\n\t) -> NCXPageTarget:\n\t\t\"\"\"Add a pageTarget element.\"\"\"\n\t\tpage_target_element = etree.SubElement(\n\t\t\tself.element, '{http://www.daisy.org/z3986/2005/ncx/}pageTarget'\n\t\t)\n\t\tpage_target = NCXPageTarget(page_target_element)\n\t\tpage_target.id = id\n\t\tpage_target.type_attr = type_attr\n\t\tpage_target.value = value\n\n\t\tif play_order is not None:\n\t\t\tpage_target.play_order = play_order\n\n\t\t# Add navLabel\n\t\tnav_label_element = etree.SubElement(\n\t\t\tpage_target_element, '{http://www.daisy.org/z3986/2005/ncx/}navLabel'\n\t\t)\n\t\tnav_label = NCXNavLabel(nav_label_element)\n\t\tnav_label.text = label_text\n\n\t\t# Add content\n\t\tcontent_element = etree.SubElement(\n\t\t\tpage_target_element, '{http://www.daisy.org/z3986/2005/ncx/}content'\n\t\t)\n\t\tcontent = NCXContent(content_element)\n\t\tcontent.src = src\n\n\t\treturn page_target\n\n\nclass NCXNavTarget(NCXElement):\n\t\"\"\"Represents a navTarget element.\"\"\"\n\n\t@property\n\tdef value(self) -> Optional[str]:\n\t\t\"\"\"Get the value attribute.\"\"\"\n\t\treturn self.element.get('value')\n\n\t@value.setter\n\tdef value(self, value: str) -> None:\n\t\t\"\"\"Set the value attribute.\"\"\"\n\t\tself.element.set('value', value)\n\n\t@property\n\tdef class_attr(self) -> Optional[str]:\n\t\t\"\"\"Get the class attribute.\"\"\"\n\t\treturn self.element.get('class')\n\n\t@class_attr.setter\n\tdef class_attr(self, value: str) -> None:\n\t\t\"\"\"Set the class attribute.\"\"\"\n\t\tself.element.set('class', value)\n\n\t@property\n\tdef play_order(self) -> Optional[int]:\n\t\t\"\"\"Get the playOrder attribute.\"\"\"\n\t\tplay_order = self.element.get('playOrder')\n\t\treturn int(play_order) if play_order else None\n\n\t@play_order.setter\n\tdef play_order(self, value: int) -> None:\n\t\t\"\"\"Set the playOrder attribute.\"\"\"\n\t\tself.element.set('playOrder', str(value))\n\n\t@property\n\tdef nav_label(self) -> Optional[NCXNavLabel]:\n\t\t\"\"\"Get the navLabel child element.\"\"\"\n\t\tnav_labels = self.element.xpath(\n\t\t\t'./ncx:navLabel', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif nav_labels:\n\t\t\treturn NCXNavLabel(nav_labels[0])\n\t\treturn None\n\n\t@property\n\tdef content(self) -> Optional[NCXContent]:\n\t\t\"\"\"Get the content child element.\"\"\"\n\t\tcontent_elements = self.element.xpath(\n\t\t\t'./ncx:content', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif content_elements:\n\t\t\treturn NCXContent(content_elements[0])\n\t\treturn None\n\n\nclass NCXNavList(NCXElement):\n\t\"\"\"Represents the navList element.\"\"\"\n\n\t@property\n\tdef nav_label(self) -> Optional[NCXNavLabel]:\n\t\t\"\"\"Get the navLabel child element.\"\"\"\n\t\tnav_labels = self.element.xpath(\n\t\t\t'./ncx:navLabel', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif nav_labels:\n\t\t\treturn NCXNavLabel(nav_labels[0])\n\t\treturn None\n\n\t@property\n\tdef nav_targets(self) -> List[NCXNavTarget]:\n\t\t\"\"\"Get all navTarget elements.\"\"\"\n\t\tnav_target_elements = self.element.xpath(\n\t\t\t'./ncx:navTarget', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\treturn [NCXNavTarget(target) for target in nav_target_elements]\n\n\tdef add_nav_target(\n\t\tself, id: str, label_text: str, src: str, play_order: Optional[int] = None\n\t) -> NCXNavTarget:\n\t\t\"\"\"Add a navTarget element.\"\"\"\n\t\tnav_target_element = etree.SubElement(\n\t\t\tself.element, '{http://www.daisy.org/z3986/2005/ncx/}navTarget'\n\t\t)\n\t\tnav_target = NCXNavTarget(nav_target_element)\n\t\tnav_target.id = id\n\n\t\tif play_order is not None:\n\t\t\tnav_target.play_order = play_order\n\n\t\t# Add navLabel\n\t\tnav_label_element = etree.SubElement(\n\t\t\tnav_target_element, '{http://www.daisy.org/z3986/2005/ncx/}navLabel'\n\t\t)\n\t\tnav_label = NCXNavLabel(nav_label_element)\n\t\tnav_label.text = label_text\n\n\t\t# Add content\n\t\tcontent_element = etree.SubElement(\n\t\t\tnav_target_element, '{http://www.daisy.org/z3986/2005/ncx/}content'\n\t\t)\n\t\tcontent = NCXContent(content_element)\n\t\tcontent.src = src\n\n\t\treturn nav_target\n\n\t@property\n\tdef label_text(self) -> str:\n\t\t\"\"\"Get the text of the navLabel.\"\"\"\n\t\tnav_label = self.nav_label\n\t\treturn nav_label.text if nav_label else ''\n\n\nclass NCXDocument(NCXElement):\n\t\"\"\"Represents the root ncx element.\"\"\"\n\n\t@property\n\tdef nav_map(self) -> Optional[NCXNavMap]:\n\t\t\"\"\"Get the navMap element.\"\"\"\n\t\tnav_map_elements = self.element.xpath(\n\t\t\t'./ncx:navMap', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif nav_map_elements:\n\t\t\treturn NCXNavMap(nav_map_elements[0])\n\t\treturn None\n\n\t@property\n\tdef page_list(self) -> Optional[NCXPageList]:\n\t\t\"\"\"Get the pageList element.\"\"\"\n\t\tpage_list_elements = self.element.xpath(\n\t\t\t'./ncx:pageList', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\tif page_list_elements:\n\t\t\treturn NCXPageList(page_list_elements[0])\n\t\treturn None\n\n\t@property\n\tdef nav_lists(self) -> List[NCXNavList]:\n\t\t\"\"\"Get all navList elements.\"\"\"\n\t\tnav_list_elements = self.element.xpath(\n\t\t\t'./ncx:navList', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\treturn [NCXNavList(nav_list) for nav_list in nav_list_elements]\n\n\t@property\n\tdef title(self) -> str:\n\t\t\"\"\"Get the document title text.\"\"\"\n\t\ttitle_elements = self.element.xpath(\n\t\t\t'.//ncx:docTitle/ncx:text', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\treturn title_elements[0].text if title_elements else ''\n\n\t@property\n\tdef author(self) -> str:\n\t\t\"\"\"Get the document author text.\"\"\"\n\t\tauthor_elements = self.element.xpath(\n\t\t\t'.//ncx:docAuthor/ncx:text', namespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'}\n\t\t)\n\t\treturn author_elements[0].text if author_elements else ''\n\n\tdef get_uid(self) -> Optional[str]:\n\t\t\"\"\"Get the dtb:uid meta content.\"\"\"\n\t\tuid_elements = self.element.xpath(\n\t\t\t'.//ncx:meta[@name=\"dtb:uid\"]/@content',\n\t\t\tnamespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'},\n\t\t)\n\t\treturn uid_elements[0] if uid_elements else None\n\n\tdef get_depth(self) -> Optional[int]:\n\t\t\"\"\"Get the dtb:depth meta content.\"\"\"\n\t\tdepth_elements = self.element.xpath(\n\t\t\t'.//ncx:meta[@name=\"dtb:depth\"]/@content',\n\t\t\tnamespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'},\n\t\t)\n\t\treturn int(depth_elements[0]) if depth_elements else None\n\n\tdef get_total_page_count(self) -> Optional[int]:\n\t\t\"\"\"Get the dtb:totalPageCount meta content.\"\"\"\n\t\tcount_elements = self.element.xpath(\n\t\t\t'.//ncx:meta[@name=\"dtb:totalPageCount\"]/@content',\n\t\t\tnamespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'},\n\t\t)\n\t\treturn int(count_elements[0]) if count_elements else None\n\n\tdef get_max_page_number(self) -> Optional[int]:\n\t\t\"\"\"Get the dtb:maxPageNumber meta content.\"\"\"\n\t\tmax_elements = self.element.xpath(\n\t\t\t'.//ncx:meta[@name=\"dtb:maxPageNumber\"]/@content',\n\t\t\tnamespaces={'ncx': 'http://www.daisy.org/z3986/2005/ncx/'},\n\t\t)\n\t\treturn int(max_elements[0]) if max_elements else None\n"
  },
  {
    "path": "epub_utils/package/__init__.py",
    "content": "\"\"\"\nOpen Packaging Format (OPF): https://www.w3.org/TR/epub/#sec-package-doc\n\nThis file includes the `Package` class, which is responsible for parsing the OPF package file\nof an EPUB archive. The OPF file contains metadata, manifest, spine, and guide information\nabout the EPUB content.\n\nNamespace:\n- The OPF file uses the namespace `http://www.idpf.org/2007/opf`.\n\nFor more details on the structure and requirements of the OPF file, refer to the\nEPUB specification: https://www.w3.org/TR/epub/#sec-package-doc\n\"\"\"\n\ntry:\n\tfrom lxml import etree\nexcept ImportError:\n\timport xml.etree.ElementTree as etree\n\nimport packaging.version\n\nfrom epub_utils.exceptions import InvalidEPUBError, ParseError, UnsupportedFormatError\nfrom epub_utils.package.manifest import Manifest\nfrom epub_utils.package.metadata import Metadata\nfrom epub_utils.package.spine import Spine\nfrom epub_utils.printers import XMLPrinter\n\n\nclass Package:\n\t\"\"\"\n\tRepresents the parsed OPF package file of an EPUB.\n\n\tAttributes:\n\t    xml_content (str): The raw XML content of the OPF package file.\n\t    metadata (dict): The metadata section of the OPF file.\n\t    manifest (dict): The manifest section listing all resources.\n\t    spine (list): The spine section defining the reading order.\n\t    guide (dict): The guide section with navigation references.\n\t    cover (str): The cover image resource ID.\n\t    toc (str): The table of contents resource ID.\n\t    nav (str): The navigation document resource ID.\n\t\"\"\"\n\n\tNAMESPACE = 'http://www.idpf.org/2007/opf'\n\tDC_NAMESPACE = 'http://purl.org/dc/elements/1.1/'\n\tMETADATA_XPATH = f'.//{{{NAMESPACE}}}metadata'\n\tSPINE_XPATH = f'.//{{{NAMESPACE}}}spine'\n\tMANIFEST_XPATH = f'.//{{{NAMESPACE}}}manifest'\n\tITEM_XPATH = f'.//{{{NAMESPACE}}}item'\n\tNCX_MEDIA_TYPE = 'application/x-dtbncx+xml'\n\tTITLE_XPATH = f'.//{{{DC_NAMESPACE}}}title'\n\tCREATOR_XPATH = f'.//{{{DC_NAMESPACE}}}creator'\n\tIDENTIFIER_XPATH = f'.//{{{DC_NAMESPACE}}}identifier'\n\n\tdef __init__(self, xml_content: str) -> None:\n\t\t\"\"\"\n\t\tInitialize the Package by parsing the OPF package file.\n\n\t\tArgs:\n\t\t    xml_content (str): The raw XML content of the OPF package file.\n\t\t\"\"\"\n\t\tself.xml_content = xml_content\n\n\t\tself.metadata = None\n\t\tself.manifest = None\n\t\tself.spine = None\n\t\tself.guide = None\n\t\tself.cover = None\n\t\tself.toc_href = None\n\t\tself.nav_href = None\n\t\tself.version = None\n\n\t\tself._parse(xml_content)\n\n\t\tself._printer = XMLPrinter(self)\n\n\tdef __str__(self) -> str:\n\t\treturn self.xml_content\n\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_str(*args, **kwargs)\n\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_xml(*args, **kwargs)\n\n\tdef _parse(self, xml_content: str) -> None:\n\t\t\"\"\"\n\t\tParses the OPF package file to extract metadata.\n\n\t\tArgs:\n\t\t    xml_content (str): The raw XML content of the OPF package file.\n\n\t\tRaises:\n\t\t    ParseError: If the XML is invalid or cannot be parsed.\n\t\t    InvalidEPUBError: If required OPF elements are missing.\n\t\t\"\"\"\n\t\ttry:\n\t\t\tif isinstance(xml_content, str):\n\t\t\t\txml_content = xml_content.encode('utf-8')\n\t\t\troot = etree.fromstring(xml_content)\n\n\t\t\t# Check for version attribute\n\t\t\tif 'version' not in root.attrib:\n\t\t\t\traise InvalidEPUBError(\n\t\t\t\t\t\"OPF file missing required 'version' attribute\",\n\t\t\t\t\tsuggestions=[\n\t\t\t\t\t\t'Ensure the package element has a version attribute',\n\t\t\t\t\t\t'Check that this is a valid EPUB OPF file',\n\t\t\t\t\t\t'Verify the EPUB was created with compliant tools',\n\t\t\t\t\t],\n\t\t\t\t)\n\n\t\t\tself.version = self._parse_version(root.attrib['version'])\n\n\t\t\t# Parse metadata\n\t\t\tmetadata_el = root.find(self.METADATA_XPATH)\n\t\t\tif metadata_el is None:\n\t\t\t\traise InvalidEPUBError(\n\t\t\t\t\t'OPF file missing required metadata element',\n\t\t\t\t\tsuggestions=[\n\t\t\t\t\t\t'Ensure the OPF file contains a metadata section',\n\t\t\t\t\t\t'Check the EPUB package structure',\n\t\t\t\t\t\t'Verify all required OPF elements are present',\n\t\t\t\t\t],\n\t\t\t\t)\n\t\t\tmetadata_xml = etree.tostring(metadata_el, encoding='unicode')\n\t\t\tself.metadata = Metadata(metadata_xml)\n\n\t\t\t# Parse manifest\n\t\t\tmanifest_el = root.find(self.MANIFEST_XPATH)\n\t\t\tif manifest_el is not None:\n\t\t\t\tmanifest_xml = etree.tostring(manifest_el, encoding='unicode')\n\t\t\t\tself.manifest = Manifest(manifest_xml)\n\t\t\telse:\n\t\t\t\traise InvalidEPUBError(\n\t\t\t\t\t'OPF file missing required manifest element',\n\t\t\t\t\tsuggestions=[\n\t\t\t\t\t\t'Ensure the OPF file contains a manifest section',\n\t\t\t\t\t\t'Check that all resources are declared in the manifest',\n\t\t\t\t\t\t'Verify the EPUB package structure is complete',\n\t\t\t\t\t],\n\t\t\t\t)\n\n\t\t\t# Parse spine\n\t\t\tspine_el = root.find(self.SPINE_XPATH)\n\t\t\tif spine_el is not None:\n\t\t\t\tspine_xml = etree.tostring(spine_el, encoding='unicode')\n\t\t\t\tself.spine = Spine(spine_xml)\n\t\t\telse:\n\t\t\t\traise InvalidEPUBError(\n\t\t\t\t\t'OPF file missing required spine element',\n\t\t\t\t\tsuggestions=[\n\t\t\t\t\t\t'Ensure the OPF file contains a spine section',\n\t\t\t\t\t\t'Check that reading order is defined in the spine',\n\t\t\t\t\t\t'Verify the EPUB package structure is complete',\n\t\t\t\t\t],\n\t\t\t\t)\n\n\t\t\t# Parse TOC references\n\t\t\tif self.version.major == 3:\n\t\t\t\tself.nav_href = self._find_nav_href(root)\n\t\t\telse:\n\t\t\t\tself.toc_href = self._find_toc_href(root)\n\n\t\texcept etree.ParseError as e:\n\t\t\traise ParseError(\n\t\t\t\tf'Invalid XML in OPF file: {str(e)}',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the OPF file contains valid XML',\n\t\t\t\t\t'Verify the file is not corrupted',\n\t\t\t\t\t'Ensure all XML tags are properly closed',\n\t\t\t\t\t'Check for invalid characters in the XML',\n\t\t\t\t],\n\t\t\t) from e\n\n\tdef _get_text(self, root: etree.Element, xpath: str) -> str:\n\t\t\"\"\"\n\t\tHelper method to extract text content from an XML element.\n\n\t\tArgs:\n\t\t    root (etree.Element): The root element to search within.\n\t\t    xpath (str): The XPath expression to locate the element.\n\n\t\tReturns:\n\t\t    str: The text content of the element, or None if not found.\n\t\t\"\"\"\n\t\telement = root.find(xpath)\n\t\treturn element.text.strip() if element is not None and element.text else None\n\n\tdef _find_toc_href(self, root: etree.Element) -> str:\n\t\t\"\"\"\n\t\tFind the publication navigation control file.\n\n\t\tArgs:\n\t\t    root (etree.Element): The root element of the OPF document.\n\n\t\tReturns:\n\t\t    str: The href to the NCX document, or None if not found.\n\t\t\"\"\"\n\t\t# First check for NCX media-type in manifest\n\t\tfor item in root.findall(self.ITEM_XPATH):\n\t\t\tif item.get('media-type') == self.NCX_MEDIA_TYPE:\n\t\t\t\treturn item.get('href')\n\n\t\t# Then check spine toc attribute\n\t\tspine = root.find(self.SPINE_XPATH)\n\t\tif spine is not None:\n\t\t\ttoc_id = spine.get('toc')\n\t\t\tif toc_id:\n\t\t\t\tfor item in root.findall(self.ITEM_XPATH):\n\t\t\t\t\tif item.get('id') == toc_id:\n\t\t\t\t\t\thref = item.get('href')\n\t\t\t\t\t\tif href:\n\t\t\t\t\t\t\t# Remove fragment identifier if present\n\t\t\t\t\t\t\treturn href.split('#')[0]\n\n\t\treturn None\n\n\tdef _find_nav_href(self, root: etree.Element) -> str:\n\t\t\"\"\"\n\t\tFind the publication navigation file.\n\n\t\tArgs:\n\t\t    root (etree.Element): The root element of the OPF document.\n\n\t\tReturns:\n\t\t    str: The href to navigation file, or None if not found.\n\t\t\"\"\"\n\t\t# Check for item with nav properties\n\t\tfor item in root.findall(self.ITEM_XPATH):\n\t\t\tif item.get('properties') == 'nav':\n\t\t\t\thref = item.get('href')\n\t\t\t\tif href:\n\t\t\t\t\treturn href.split('#')[0]\n\n\t\t# Fall back to guide TOC reference\n\t\tguide = root.find(f'.//{{{self.NAMESPACE}}}guide')\n\t\tif guide is not None:\n\t\t\tfor reference in guide.findall(f'.//{{{self.NAMESPACE}}}reference'):\n\t\t\t\tif reference.get('type') == 'toc':\n\t\t\t\t\thref = reference.get('href')\n\t\t\t\t\tif href:\n\t\t\t\t\t\treturn href.split('#')[0]\n\n\t\treturn None\n\n\tdef _parse_version(self, version):\n\t\t\"\"\"\n\t\tParse and validate the EPUB version.\n\n\t\tArgs:\n\t\t    version (str): Version string from the OPF file.\n\n\t\tReturns:\n\t\t    packaging.version.Version: Parsed version object.\n\n\t\tRaises:\n\t\t    UnsupportedFormatError: If the EPUB version is not supported.\n\t\t\"\"\"\n\t\ttry:\n\t\t\tversion_obj = packaging.version.Version(version)\n\t\texcept packaging.version.InvalidVersion as e:\n\t\t\traise InvalidEPUBError(\n\t\t\t\tf\"Invalid version format in OPF file: '{version}'\",\n\t\t\t\tsuggestions=[\n\t\t\t\t\t\"Ensure the version follows semantic versioning (e.g., '3.0', '2.0')\",\n\t\t\t\t\t'Check that the version attribute is correctly formatted',\n\t\t\t\t\t'Verify the EPUB was created with compliant tools',\n\t\t\t\t],\n\t\t\t) from e\n\n\t\tif version_obj.major not in (1, 2, 3):\n\t\t\tsupported_versions = '1.x, 2.x, 3.x'\n\t\t\traise UnsupportedFormatError(\n\t\t\t\tf'EPUB version {version_obj.major}.x is not supported',\n\t\t\t\tepub_version=str(version_obj),\n\t\t\t\tsuggestions=[\n\t\t\t\t\tf'Use an EPUB with a supported version ({supported_versions})',\n\t\t\t\t\t'Convert the EPUB to a supported version',\n\t\t\t\t\t'Check the EPUB specification for version requirements',\n\t\t\t\t],\n\t\t\t)\n\n\t\treturn version_obj\n"
  },
  {
    "path": "epub_utils/package/manifest.py",
    "content": "try:\n\tfrom lxml import etree\nexcept ImportError:\n\timport xml.etree.ElementTree as etree\n\nfrom epub_utils.exceptions import ParseError\nfrom epub_utils.printers import XMLPrinter\n\n\nclass Manifest:\n\t\"\"\"\n\tRepresents the manifest section of an EPUB package document.\n\tThe manifest element provides an exhaustive list of the publication resources.\n\t\"\"\"\n\n\tNAMESPACE = 'http://www.idpf.org/2007/opf'\n\tITEM_XPATH = f'.//{{{NAMESPACE}}}item'\n\n\tdef __init__(self, xml_content: str):\n\t\tself.xml_content = xml_content\n\t\tself.items = []\n\n\t\tself._parse(xml_content)\n\n\t\tself._printer = XMLPrinter(self)\n\n\tdef __str__(self) -> str:\n\t\treturn self.xml_content\n\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_str(*args, **kwargs)\n\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_xml(*args, **kwargs)\n\n\tdef _parse(self, xml_content: str) -> None:\n\t\t\"\"\"\n\t\tParses the manifest XML content.\n\t\t\"\"\"\n\t\ttry:\n\t\t\tif isinstance(xml_content, str):\n\t\t\t\txml_content = xml_content.encode('utf-8')\n\t\t\troot = etree.fromstring(xml_content)\n\n\t\t\tfor item in root.findall(self.ITEM_XPATH):\n\t\t\t\titem_data = {\n\t\t\t\t\t'id': item.get('id'),\n\t\t\t\t\t'href': item.get('href'),\n\t\t\t\t\t'media_type': item.get('media-type'),\n\t\t\t\t\t'properties': item.get('properties', '').split(),\n\t\t\t\t}\n\t\t\t\tif all(\n\t\t\t\t\tv is not None\n\t\t\t\t\tfor v in [item_data['id'], item_data['href'], item_data['media_type']]\n\t\t\t\t):\n\t\t\t\t\tself.items.append(item_data)\n\n\t\texcept etree.ParseError as e:\n\t\t\traise ParseError(\n\t\t\t\tf'Invalid XML in manifest element: {str(e)}',\n\t\t\t\telement_name='manifest',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the manifest contains valid XML',\n\t\t\t\t\t'Verify all manifest items are properly formatted',\n\t\t\t\t\t'Ensure required attributes (id, href, media-type) are present',\n\t\t\t\t\t'Check for invalid characters in the XML',\n\t\t\t\t],\n\t\t\t) from e\n\n\tdef find_by_property(self, property_name: str) -> dict:\n\t\t\"\"\"Find the first item with the given property.\"\"\"\n\t\tfor item in self.items:\n\t\t\tif property_name in item['properties']:\n\t\t\t\treturn item\n\t\treturn None\n\n\tdef find_by_id(self, item_id: str) -> dict:\n\t\t\"\"\"Find an item by its ID.\"\"\"\n\t\tfor item in self.items:\n\t\t\tif item['id'] == item_id:\n\t\t\t\treturn item\n\t\treturn None\n\n\tdef find_by_media_type(self, media_type: str) -> list:\n\t\t\"\"\"Find all items with the given media type.\"\"\"\n\t\treturn [item for item in self.items if item['media_type'] == media_type]\n"
  },
  {
    "path": "epub_utils/package/metadata.py",
    "content": "try:\n\tfrom lxml import etree\nexcept ImportError:\n\timport xml.etree.ElementTree as etree\n\nfrom epub_utils.exceptions import ParseError, ValidationError\nfrom epub_utils.printers import XMLPrinter\n\n\nclass Metadata:\n\t\"\"\"\n\tRepresents the metadata section of an EPUB package document.\n\tHandles Dublin Core (DC) and Dublin Core Terms (DCTERMS) metadata elements.\n\t\"\"\"\n\n\tDC_NAMESPACE = 'http://purl.org/dc/elements/1.1/'\n\tDCTERMS_NAMESPACE = 'http://purl.org/dc/terms/'\n\tREQUIRED_FIELDS = ['identifier', 'title', 'creator']\n\n\tNSMAP = {'dc': DC_NAMESPACE, 'dcterms': DCTERMS_NAMESPACE}\n\n\tdef __init__(self, xml_content: str):\n\t\tself.xml_content = xml_content\n\t\tself.fields = {}\n\n\t\tself._parse(xml_content)\n\n\t\tself._printer = XMLPrinter(self)\n\n\tdef _parse(self, xml_content: str) -> None:\n\t\ttry:\n\t\t\tif isinstance(xml_content, str):\n\t\t\t\txml_content = xml_content.encode('utf-8')\n\t\t\troot = etree.fromstring(xml_content)\n\n\t\t\tfor ns_prefix, ns_uri in self.NSMAP.items():\n\t\t\t\tfor element in root.findall(f'.//{{{ns_uri}}}*'):\n\t\t\t\t\tname = element.tag.split('}')[-1]\n\t\t\t\t\ttext = element.text.strip() if element.text else None\n\t\t\t\t\tif text:\n\t\t\t\t\t\tself._add_field(name, text)\n\n\t\t\tfor meta in root.findall('.//meta[@property]'):\n\t\t\t\tprop = meta.get('property', '')\n\t\t\t\tif prop.startswith('dcterms:'):\n\t\t\t\t\tname = prop.split(':')[1]\n\t\t\t\t\ttext = meta.text.strip() if meta.text else None\n\t\t\t\t\tif text:\n\t\t\t\t\t\tself._add_field(name, text)\n\n\t\t\tself._validate()\n\n\t\texcept etree.ParseError as e:\n\t\t\traise ParseError(\n\t\t\t\tf'Invalid XML in metadata element: {str(e)}',\n\t\t\t\telement_name='metadata',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the metadata contains valid XML',\n\t\t\t\t\t'Verify all metadata elements are properly formatted',\n\t\t\t\t\t'Ensure required Dublin Core elements are present',\n\t\t\t\t\t'Check for invalid characters in metadata values',\n\t\t\t\t],\n\t\t\t) from e\n\n\tdef _add_field(self, name: str, value: str) -> None:\n\t\tif name in self.fields:\n\t\t\tif isinstance(self.fields[name], list):\n\t\t\t\tself.fields[name].append(value)\n\t\t\telse:\n\t\t\t\tself.fields[name] = [self.fields[name], value]\n\t\telse:\n\t\t\tself.fields[name] = value\n\n\tdef _validate(self, raise_exception=False) -> None:\n\t\t\"\"\"\n\t\tValidate all required fields and raise ValidationError if validation fails.\n\t\t\"\"\"\n\t\terrors = {}\n\n\t\tfor field in self.REQUIRED_FIELDS:\n\t\t\ttry:\n\t\t\t\tself._validate_field(field)\n\t\t\texcept ValueError as e:\n\t\t\t\terrors[field] = str(e)\n\n\t\tif errors and raise_exception:\n\t\t\terror_messages = [f'{field}: {msg}' for field, msg in errors.items()]\n\t\t\tvalidation_errors = [f\"Missing or invalid '{field}' element\" for field in errors.keys()]\n\n\t\t\traise ValidationError(\n\t\t\t\t'EPUB metadata validation failed',\n\t\t\t\tvalidation_errors=validation_errors,\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Ensure all required Dublin Core metadata elements are present',\n\t\t\t\t\t'Check that metadata values are not empty',\n\t\t\t\t\t'Verify the metadata follows EPUB specification requirements',\n\t\t\t\t\t'Use proper Dublin Core namespace for metadata elements',\n\t\t\t\t],\n\t\t\t)\n\n\tdef _validate_field(self, field_name: str) -> None:\n\t\t\"\"\"\n\t\tValidate an individual field.\n\n\t\tArgs:\n\t\t    field_name: Name of the field to validate\n\n\t\tRaises:\n\t\t    ValueError: If the field validation fails\n\t\t\"\"\"\n\t\tvalue = self.fields.get(field_name)\n\t\tif value is None or (isinstance(value, str) and not value.strip()):\n\t\t\traise ValueError('This field is required')\n\n\tdef __str__(self) -> str:\n\t\treturn self.xml_content\n\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_str(*args, **kwargs)\n\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_xml(*args, **kwargs)\n\n\tdef _get_text(self, root: etree.Element, xpath: str) -> str:\n\t\telement = root.find(xpath)\n\t\treturn element.text.strip() if element is not None and element.text else None\n\n\tdef __getattr__(self, name: str) -> str:\n\t\treturn self.fields.get(name)\n\n\tdef to_kv(self) -> str:\n\t\tif not self.fields:\n\t\t\treturn ''\n\n\t\tmax_key_length = max(len(k) for k in self.fields.keys())\n\n\t\tlines = [f'{k.rjust(max_key_length)}: {str(v)}' for k, v in self.fields.items()]\n\n\t\treturn '\\n'.join(lines)\n"
  },
  {
    "path": "epub_utils/package/spine.py",
    "content": "try:\n\tfrom lxml import etree\nexcept ImportError:\n\timport xml.etree.ElementTree as etree\n\nfrom epub_utils.exceptions import ParseError\nfrom epub_utils.printers import XMLPrinter\n\n\nclass Spine:\n\t\"\"\"\n\tRepresents the spine section of an EPUB package document.\n\tThe spine element defines the default reading order of the content.\n\t\"\"\"\n\n\tNAMESPACE = 'http://www.idpf.org/2007/opf'\n\tITEMREF_XPATH = f'.//{{{NAMESPACE}}}itemref'\n\n\tdef __init__(self, xml_content: str):\n\t\tself.xml_content = xml_content\n\n\t\tself.itemrefs = []\n\t\tself.toc = None\n\t\tself.page_progression_direction = None\n\n\t\tself._parse(xml_content)\n\n\t\tself._printer = XMLPrinter(self)\n\n\tdef __str__(self) -> str:\n\t\treturn self.xml_content\n\n\tdef to_str(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_str(*args, **kwargs)\n\n\tdef to_xml(self, *args, **kwargs) -> str:\n\t\treturn self._printer.to_xml(*args, **kwargs)\n\n\tdef _parse(self, xml_content: str) -> None:\n\t\t\"\"\"\n\t\tParses the spine XML content.\n\t\t\"\"\"\n\t\ttry:\n\t\t\tif isinstance(xml_content, str):\n\t\t\t\txml_content = xml_content.encode('utf-8')\n\t\t\troot = etree.fromstring(xml_content)\n\n\t\t\tself.toc = root.get('toc')\n\t\t\tself.page_progression_direction = root.get('page-progression-direction', 'default')\n\n\t\t\tfor itemref in root.findall(self.ITEMREF_XPATH):\n\t\t\t\tidref = itemref.get('idref')\n\t\t\t\tlinear = itemref.get('linear', 'yes')\n\t\t\t\tproperties = itemref.get('properties', '').split()\n\n\t\t\t\tif idref:\n\t\t\t\t\tself.itemrefs.append(\n\t\t\t\t\t\t{'idref': idref, 'linear': linear == 'yes', 'properties': properties}\n\t\t\t\t\t)\n\n\t\texcept etree.ParseError as e:\n\t\t\traise ParseError(\n\t\t\t\tf'Invalid XML in spine element: {str(e)}',\n\t\t\t\telement_name='spine',\n\t\t\t\tsuggestions=[\n\t\t\t\t\t'Check that the spine contains valid XML',\n\t\t\t\t\t'Verify all spine items are properly formatted',\n\t\t\t\t\t'Ensure required attributes (idref) are present',\n\t\t\t\t\t'Check that spine defines the reading order correctly',\n\t\t\t\t],\n\t\t\t) from e\n\n\tdef find_by_idref(self, itemref_idref: str) -> dict:\n\t\t\"\"\"Find an itemref by its idref.\"\"\"\n\t\tfor item in self.itemrefs:\n\t\t\tif item['idref'] == itemref_idref:\n\t\t\t\treturn item\n\t\treturn None\n"
  },
  {
    "path": "epub_utils/printers.py",
    "content": "try:\n\tfrom lxml import etree\nexcept ImportError:\n\timport xml.etree.ElementTree as etree\n\nfrom pygments import highlight\nfrom pygments.formatters import TerminalFormatter\nfrom pygments.lexers import XmlLexer\n\n\ndef highlight_xml(xml_content: str) -> str:\n\treturn highlight(xml_content, XmlLexer(), TerminalFormatter())\n\n\ndef pretty_print_xml(xml_content: str) -> str:\n\ttry:\n\t\toriginal_content = xml_content\n\t\tif isinstance(xml_content, str):\n\t\t\txml_content_bytes = xml_content.encode('utf-8')\n\t\telse:\n\t\t\txml_content_bytes = xml_content\n\t\t\toriginal_content = (\n\t\t\t\txml_content.decode('utf-8') if isinstance(xml_content, bytes) else xml_content\n\t\t\t)\n\n\t\txml_declaration = ''\n\t\tdoctype_declaration = ''\n\n\t\tif original_content.strip().startswith('<?xml'):\n\t\t\txml_decl_end = original_content.find('?>') + 2\n\t\t\txml_declaration = original_content[:xml_decl_end]\n\n\t\tdoctype_start = original_content.find('<!DOCTYPE')\n\t\tif doctype_start != -1:\n\t\t\tdoctype_end = original_content.find('>', doctype_start) + 1\n\t\t\tdoctype_declaration = original_content[doctype_start:doctype_end]\n\n\t\tparser = etree.XMLParser(remove_blank_text=True)\n\t\troot = etree.fromstring(xml_content_bytes, parser)\n\t\tpretty_xml = etree.tostring(root, pretty_print=True, encoding='unicode')\n\n\t\tresult = ''\n\t\tif xml_declaration:\n\t\t\tresult += xml_declaration + '\\n'\n\t\tif doctype_declaration:\n\t\t\tresult += doctype_declaration + '\\n'\n\t\tresult += pretty_xml\n\n\t\treturn result\n\texcept etree.ParseError:\n\t\treturn original_content if isinstance(original_content, str) else xml_content\n\n\ndef print_to_str(xml_content: bool, pretty_print: bool) -> str:\n\tif pretty_print:\n\t\txml_content = pretty_print_xml(xml_content)\n\n\treturn xml_content\n\n\ndef print_to_xml(xml_content: str, pretty_print: bool, highlight_syntax: bool) -> str:\n\tif pretty_print:\n\t\txml_content = pretty_print_xml(xml_content)\n\n\tif highlight_syntax:\n\t\txml_content = highlight_xml(xml_content)\n\n\treturn xml_content\n\n\nclass XMLPrinter:\n\t\"\"\"Handles XML printing operations for objects with xml_content.\"\"\"\n\n\tdef __init__(self, xml_content_provider):\n\t\t\"\"\"\n\t\tInitialize the XMLPrinter with an object that provides xml_content.\n\n\t\tArgs:\n\t\t\txml_content_provider: Object that has an xml_content attribute\n\t\t\"\"\"\n\t\tself._xml_content_provider = xml_content_provider\n\n\tdef to_str(self, pretty_print: bool = False) -> str:\n\t\t\"\"\"\n\t\tGet string representation of the XML content.\n\n\t\tArgs:\n\t\t\tpretty_print: Whether to format the XML with proper indentation\n\n\t\tReturns:\n\t\t\tString representation of the XML content\n\t\t\"\"\"\n\t\treturn print_to_str(self._xml_content_provider.xml_content, pretty_print)\n\n\tdef to_xml(self, pretty_print: bool = False, highlight_syntax: bool = True) -> str:\n\t\t\"\"\"\n\t\tGet formatted XML representation with optional syntax highlighting.\n\n\t\tArgs:\n\t\t\tpretty_print: Whether to format the XML with proper indentation\n\t\t\thighlight_syntax: Whether to apply syntax highlighting\n\n\t\tReturns:\n\t\t\tFormatted XML string with optional syntax highlighting\n\t\t\"\"\"\n\t\treturn print_to_xml(self._xml_content_provider.xml_content, pretty_print, highlight_syntax)\n"
  },
  {
    "path": "pytest.ini",
    "content": "[pytest]\npythonpath = .\npython_files = tests.py test_*.py *_tests.py\naddopts = -p no:warnings"
  },
  {
    "path": "requirements/requirements-docs.txt",
    "content": "sphinx==6.2.0\nsphinx-copybutton==0.5.1\nsphinx-issues==3.0.1\nfuro==2022.12.7"
  },
  {
    "path": "requirements/requirements-linting.txt",
    "content": "ruff==0.11.9"
  },
  {
    "path": "requirements/requirements-testing.txt",
    "content": "coverage==6.4.1\ncoverage-badge==1.1.0\npytest==7.2.0\npytest-cov==3.0.0"
  },
  {
    "path": "requirements/requirements.txt",
    "content": "click==8.1.8\nlxml==5.4.0\npygments==2.19.1\nPyYAML==6.0.2"
  },
  {
    "path": "requirements.txt",
    "content": "-r requirements/requirements-docs.txt\n-r requirements/requirements-linting.txt\n-r requirements/requirements-testing.txt\n-r requirements/requirements.txt"
  },
  {
    "path": "ruff.toml",
    "content": "line-length = 100\n\n[format]\nquote-style = \"single\"\nindent-style = \"tab\"\ndocstring-code-format = true"
  },
  {
    "path": "setup.py",
    "content": "import os\n\nfrom setuptools import find_packages, setup\n\nVERSION = '0.1.0a1'\n\n\ndef get_long_description():\n\twith open(\n\t\tos.path.join(os.path.dirname(os.path.abspath(__file__)), 'README.md'),\n\t\tencoding='utf8',\n\t) as fp:\n\t\treturn fp.read()\n\n\nsetup(\n\tname='epub-utils',\n\tdescription='A Python CLI and utility library for manipulating EPUB files',\n\tlong_description=get_long_description(),\n\tlong_description_content_type='text/markdown',\n\tauthor='Ernesto González',\n\turl='https://github.com/ernestofgonzalez/epub-utils',\n\tproject_urls={\n\t\t'Source code': 'https://github.com/ernestofgonzalez/epub-utils',\n\t\t'Issues': 'https://github.com/ernestofgonzalez/epub-utils/issues',\n\t\t'CI': 'https://github.com/ernestofgonzalez/epub-utils/actions',\n\t\t'Changelog': 'https://github.com/ernestofgonzalez/epub-utils/releases',\n\t},\n\tlicense='Apache License, Version 2.0',\n\tversion=VERSION,\n\tpackages=find_packages(),\n\tentry_points={\n\t\t'console_scripts': [\n\t\t\t'epub-utils = epub_utils.cli:main',\n\t\t]\n\t},\n\tinstall_requires=[\n\t\t'click',\n\t\t'lxml',\n\t\t'packaging',\n\t\t'pygments',\n\t\t'PyYAML',\n\t],\n\textras_require={\n\t\t'test': ['pytest'],\n\t\t'docs': [\n\t\t\t'sphinx',\n\t\t\t'sphinx-copybutton',\n\t\t\t'sphinx-issues',\n\t\t\t'furo',\n\t\t],\n\t},\n\tpython_requires='>=3.8',\n\tclassifiers=[\n\t\t'Intended Audience :: Developers',\n\t\t'Topic :: Software Development :: Libraries',\n\t\t'Topic :: Utilities',\n\t\t'Programming Language :: Python :: 3.8',\n\t\t'Programming Language :: Python :: 3.9',\n\t\t'Programming Language :: Python :: 3.10',\n\t\t'Programming Language :: Python :: 3.11',\n\t\t'Programming Language :: Python :: 3.12',\n\t\t'Programming Language :: Python :: 3.13',\n\t\t'Operating System :: Microsoft :: Windows',\n\t\t'Operating System :: POSIX',\n\t\t'Operating System :: Unix',\n\t\t'Operating System :: MacOS',\n\t],\n)\n"
  },
  {
    "path": "tests/conftest.py",
    "content": "import pytest\n\n\n@pytest.fixture\ndef doc_path():\n\tpath = str('tests/assets/roads.epub')\n\treturn path\n"
  },
  {
    "path": "tests/test_cli.py",
    "content": "import pytest\nfrom click.testing import CliRunner\n\nfrom epub_utils import cli\n\n\n@pytest.mark.parametrize(\n\t'options',\n\t(\n\t\t['-h'],\n\t\t['--help'],\n\t),\n)\ndef test_help(options):\n\tresult = CliRunner().invoke(cli.main, options)\n\tassert result.exit_code == 0\n\tassert result.output.startswith('Usage: ')\n\tassert '-h, --help' in result.output\n\n\n@pytest.mark.parametrize(\n\t'options',\n\t(\n\t\t['-v'],\n\t\t['--version'],\n\t),\n)\ndef test_version(options):\n\tresult = CliRunner().invoke(cli.main, options)\n\tassert result.exit_code == 0\n\tassert result.output.strip() == cli.VERSION\n\n\ndef test_files_command_with_file_path_xhtml_xml(doc_path):\n\t\"\"\"Test the files command with XHTML file path in XML format.\"\"\"\n\tresult = CliRunner().invoke(\n\t\tcli.main, [str(doc_path), 'files', 'GoogleDoc/Roads.xhtml', '--format', 'xml']\n\t)\n\tassert result.exit_code == 0\n\tassert len(result.output) > 0\n\n\ndef test_files_command_with_file_path_missing_file(doc_path):\n\t\"\"\"Test the files command with missing file path.\"\"\"\n\tresult = CliRunner().invoke(cli.main, [str(doc_path), 'files', 'nonexistent/file.xhtml'])\n\tassert result.exit_code == 1\n\tassert 'Missing' in result.output\n\n\ndef test_files_command_without_file_path_table(doc_path):\n\t\"\"\"Test the files command without file path (list files) in table format.\"\"\"\n\tresult = CliRunner().invoke(cli.main, [str(doc_path), 'files', '--format', 'table'])\n\tassert result.exit_code == 0\n\tassert len(result.output) > 0\n\tassert 'Path' in result.output\n\tassert 'Size' in result.output\n\n\ndef test_files_command_without_file_path_raw(doc_path):\n\t\"\"\"Test the files command without file path (list files) in raw format.\"\"\"\n\tresult = CliRunner().invoke(cli.main, [str(doc_path), 'files', '--format', 'raw'])\n\tassert result.exit_code == 0\n\tassert len(result.output) > 0\n\tassert 'GoogleDoc/Roads.xhtml' in result.output\n\n\ndef test_toc_command_default(doc_path):\n\t\"\"\"Test the toc command with default behavior (auto-detect).\"\"\"\n\tresult = CliRunner().invoke(cli.main, [str(doc_path), 'toc'])\n\tassert result.exit_code == 0\n\tassert len(result.output) > 0\n\n\ndef test_toc_command_nav_flag(doc_path):\n\t\"\"\"Test the toc command with --nav flag.\"\"\"\n\tresult = CliRunner().invoke(cli.main, [str(doc_path), 'toc', '--nav'])\n\tassert result.exit_code == 0\n\tassert len(result.output) > 0\n\n\ndef test_toc_command_mutually_exclusive_flags(doc_path):\n\t\"\"\"Test that --ncx and --nav flags are mutually exclusive.\"\"\"\n\tresult = CliRunner().invoke(cli.main, [str(doc_path), 'toc', '--ncx', '--nav'])\n\tassert result.exit_code == 1\n\tassert '--ncx and --nav flags cannot be used together' in result.output\n"
  },
  {
    "path": "tests/test_container.py",
    "content": "import pytest\n\nfrom epub_utils.container import Container\nfrom epub_utils.exceptions import InvalidEPUBError\n\nCONTAINER_XML = \"\"\"<?xml version=\"1.0\"?>\n<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n    <rootfiles>\n        <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\n    </rootfiles>\n</container>\n\"\"\"\n\n\ndef test_container_initialization():\n\t\"\"\"\n\tTest that the Container class initializes correctly with valid XML content.\n\t\"\"\"\n\tcontainer = Container(CONTAINER_XML)\n\tassert container is not None\n\tassert container.rootfile_path == 'OEBPS/content.opf'\n\n\ndef test_invalid_container_xml():\n\t\"\"\"\n\tTest that the Container class raises an error for invalid XML content.\n\t\"\"\"\n\tinvalid_xml = '<invalid></invalid>'\n\twith pytest.raises(InvalidEPUBError, match='Invalid container.xml: Missing rootfile element'):\n\t\tContainer(invalid_xml)\n\n\n@pytest.mark.parametrize(\n\t'xml_content,pretty_print,expected',\n\t[\n\t\t(\n\t\t\t'<?xml version=\"1.0\"?>\\n<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\\n    <rootfiles>\\n\\n        <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\\n    </rootfiles>\\n</container>',\n\t\t\tFalse,\n\t\t\t'<?xml version=\"1.0\"?>\\n<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\\n    <rootfiles>\\n\\n        <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\\n    </rootfiles>\\n</container>',\n\t\t),\n\t\t(\n\t\t\t'<?xml version=\"1.0\"?>\\n<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\\n    <rootfiles>\\n\\n        <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\\n    </rootfiles>\\n</container>',\n\t\t\tTrue,\n\t\t\t'<?xml version=\"1.0\"?>\\n<container xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\" version=\"1.0\">\\n  <rootfiles>\\n    <rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\"/>\\n  </rootfiles>\\n</container>\\n',\n\t\t),\n\t],\n)\ndef test_container_to_str_pretty_print_parameter(xml_content, pretty_print, expected):\n\t\"\"\"Test XML output with and without pretty printing for Container.\"\"\"\n\tcontainer = Container(xml_content)\n\n\tassert container.to_str(pretty_print=pretty_print) == expected\n"
  },
  {
    "path": "tests/test_doc.py",
    "content": "import unittest\n\nfrom epub_utils.container import Container\nfrom epub_utils.doc import Document\nfrom epub_utils.navigation import EPUBNavDocNavigation, Navigation\nfrom epub_utils.package import Manifest, Package\n\n\ndef test_document_container(doc_path):\n\t\"\"\"\n\tTest that the Document class correctly parses the container.xml file.\n\t\"\"\"\n\tdoc = Document(doc_path)\n\tassert isinstance(doc.container, Container)\n\n\ndef test_document_package(doc_path):\n\t\"\"\"\n\tTest that the Document class correctly parses the package file.\n\t\"\"\"\n\tcase = unittest.TestCase()\n\n\tdoc = Document(doc_path)\n\tassert isinstance(doc.package, Package)\n\tassert isinstance(doc.package.manifest, Manifest)\n\tcase.assertCountEqual(\n\t\tdoc.package.manifest.items,\n\t\t[\n\t\t\t{\n\t\t\t\t'id': 'toc',\n\t\t\t\t'href': 'nav.xhtml',\n\t\t\t\t'media_type': 'application/xhtml+xml',\n\t\t\t\t'properties': ['nav'],\n\t\t\t},\n\t\t\t{\n\t\t\t\t'id': 'main',\n\t\t\t\t'href': 'Roads.xhtml',\n\t\t\t\t'media_type': 'application/xhtml+xml',\n\t\t\t\t'properties': [],\n\t\t\t},\n\t\t],\n\t)\n\n\ndef test_document_toc(doc_path):\n\t\"\"\"\n\tTest that the Document class correctly parses the table of contents file.\n\t\"\"\"\n\tdoc = Document(doc_path)\n\tassert isinstance(doc.toc, Navigation)\n\n\ndef test_document_find_content_by_id(doc_path):\n\tdoc = Document(doc_path)\n\tcontent = doc.find_content_by_id('main')\n\tassert content is not None\n\n\ndef test_document_get_file_by_path_xhtml(doc_path):\n\t\"\"\"\n\tTest that the Document class can retrieve XHTML files by path.\n\t\"\"\"\n\tdoc = Document(doc_path)\n\tcontent = doc.get_file_by_path('GoogleDoc/Roads.xhtml')\n\n\t# Should return XHTMLContent object for XHTML files\n\tassert hasattr(content, 'to_str')\n\tassert hasattr(content, 'to_xml')\n\tassert hasattr(content, 'to_plain')\n\n\t# Content should not be empty\n\tcontent_str = content.to_str()\n\tassert len(content_str) > 0\n\tassert 'xhtml' in content_str.lower()\n\n\ndef test_document_get_file_by_path_missing_file(doc_path):\n\t\"\"\"\n\tTest that the Document class raises an error for missing files.\n\t\"\"\"\n\tdoc = Document(doc_path)\n\n\ttry:\n\t\tdoc.get_file_by_path('nonexistent/file.xhtml')\n\t\tassert False, 'Expected ValueError for missing file'\n\texcept ValueError as e:\n\t\tassert 'Missing' in str(e)\n\n\ndef test_document_nav_property(doc_path):\n\t\"\"\"\n\tTest that the Document class correctly accesses the Navigation Document via nav property.\n\t\"\"\"\n\tdoc = Document(doc_path)\n\tnav = doc.nav\n\n\tassert nav is not None\n\tassert isinstance(nav, EPUBNavDocNavigation)\n"
  },
  {
    "path": "tests/test_manifest.py",
    "content": "import pytest\n\nfrom epub_utils.package.manifest import Manifest\n\nVALID_MANIFEST_XML = \"\"\"\n<manifest xmlns=\"http://www.idpf.org/2007/opf\">\n    <item id=\"nav\" href=\"nav.xhtml\" media-type=\"application/xhtml+xml\" properties=\"nav\"/>\n    <item id=\"chapter1\" href=\"chapter1.xhtml\" media-type=\"application/xhtml+xml\"/>\n    <item id=\"style\" href=\"style.css\" media-type=\"text/css\"/>\n    <item id=\"image1\" href=\"image1.jpg\" media-type=\"image/jpeg\"/>\n</manifest>\n\"\"\"\n\nMINIMAL_MANIFEST_XML = \"\"\"\n<manifest xmlns=\"http://www.idpf.org/2007/opf\">\n    <item id=\"content\" href=\"content.xhtml\" media-type=\"application/xhtml+xml\"/>\n</manifest>\n\"\"\"\n\n\ndef test_manifest_initialization():\n\tmanifest = Manifest(VALID_MANIFEST_XML)\n\n\tassert len(manifest.items) == 4\n\n\tassert manifest.items[0]['id'] == 'nav'\n\tassert manifest.items[0]['href'] == 'nav.xhtml'\n\tassert manifest.items[0]['media_type'] == 'application/xhtml+xml'\n\tassert manifest.items[0]['properties'] == ['nav']\n\n\tassert manifest.items[2]['id'] == 'style'\n\tassert manifest.items[2]['href'] == 'style.css'\n\tassert manifest.items[2]['media_type'] == 'text/css'\n\tassert manifest.items[2]['properties'] == []\n\n\ndef test_minimal_manifest():\n\tmanifest = Manifest(MINIMAL_MANIFEST_XML)\n\n\tassert len(manifest.items) == 1\n\tassert manifest.items[0]['id'] == 'content'\n\tassert manifest.items[0]['href'] == 'content.xhtml'\n\tassert manifest.items[0]['media_type'] == 'application/xhtml+xml'\n\tassert manifest.items[0]['properties'] == []\n\n\ndef test_find_by_property():\n\tmanifest = Manifest(VALID_MANIFEST_XML)\n\tnav_item = manifest.find_by_property('nav')\n\tassert nav_item['id'] == 'nav'\n\tassert nav_item['href'] == 'nav.xhtml'\n\n\ndef test_find_by_id():\n\tmanifest = Manifest(VALID_MANIFEST_XML)\n\tchapter = manifest.find_by_id('chapter1')\n\tassert chapter['href'] == 'chapter1.xhtml'\n\tassert chapter['media_type'] == 'application/xhtml+xml'\n\n\ndef test_find_by_media_type():\n\tmanifest = Manifest(VALID_MANIFEST_XML)\n\txhtml_items = manifest.find_by_media_type('application/xhtml+xml')\n\tassert len(xhtml_items) == 2\n\tassert all(item['media_type'] == 'application/xhtml+xml' for item in xhtml_items)\n\n\n@pytest.mark.parametrize(\n\t'xml_content,pretty_print,expected',\n\t[\n\t\t(\n\t\t\t'<manifest xmlns=\"http://www.idpf.org/2007/opf\">\\n    <item id=\"nav\" href=\"nav.xhtml\" media-type=\"application/xhtml+xml\" properties=\"nav\"/>\\n\\n    <item id=\"chapter1\" href=\"chapter1.xhtml\" media-type=\"application/xhtml+xml\"/>\\n</manifest>',\n\t\t\tFalse,\n\t\t\t'<manifest xmlns=\"http://www.idpf.org/2007/opf\">\\n    <item id=\"nav\" href=\"nav.xhtml\" media-type=\"application/xhtml+xml\" properties=\"nav\"/>\\n\\n    <item id=\"chapter1\" href=\"chapter1.xhtml\" media-type=\"application/xhtml+xml\"/>\\n</manifest>',\n\t\t),\n\t\t(\n\t\t\t'<manifest xmlns=\"http://www.idpf.org/2007/opf\">\\n    <item id=\"nav\" href=\"nav.xhtml\" media-type=\"application/xhtml+xml\" properties=\"nav\"/>\\n\\n    <item id=\"chapter1\" href=\"chapter1.xhtml\" media-type=\"application/xhtml+xml\"/>\\n</manifest>',\n\t\t\tTrue,\n\t\t\t'<manifest xmlns=\"http://www.idpf.org/2007/opf\">\\n  <item id=\"nav\" href=\"nav.xhtml\" media-type=\"application/xhtml+xml\" properties=\"nav\"/>\\n  <item id=\"chapter1\" href=\"chapter1.xhtml\" media-type=\"application/xhtml+xml\"/>\\n</manifest>\\n',\n\t\t),\n\t],\n)\ndef test_manifest_to_str_pretty_print_parameter(xml_content, pretty_print, expected):\n\t\"\"\"Test XML output with and without pretty printing for Manifest.\"\"\"\n\tmanifest = Manifest(xml_content)\n\n\tassert manifest.to_str(pretty_print=pretty_print) == expected\n"
  },
  {
    "path": "tests/test_metadata.py",
    "content": "import pytest\n\nfrom epub_utils.exceptions import ValidationError\nfrom epub_utils.package.metadata import Metadata\n\nVALID_METADATA_XML = \"\"\"\n<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:dcterms=\"http://purl.org/dc/terms/\">\n    <dc:title>Test Book</dc:title>\n    <dc:creator>Test Author</dc:creator>\n    <dc:identifier>test-id-123</dc:identifier>\n    <dc:language>en</dc:language>\n    <dc:subject>Fiction</dc:subject>\n    <dc:subject>Science Fiction</dc:subject>\n    <dc:date>2024-01-01</dc:date>\n    <dc:publisher>Test Publisher</dc:publisher>\n    <meta property=\"dcterms:modified\">2023-11-28T14:50:13Z</meta>\n    <meta property=\"dcterms:source\">Original Source</meta>\n</metadata>\n\"\"\"\n\nINVALID_METADATA_XML = \"\"\"\n<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n    <dc:title>Test Book</dc:title>\n    <dc:creator>Test Author</dc:creator>\n</metadata>\n\"\"\"\n\n\ndef test_metadata_parse_valid_element():\n\t\"\"\"Test parsing valid metadata XML with both required and optional DC terms.\"\"\"\n\tmetadata = Metadata(VALID_METADATA_XML)\n\n\tassert metadata.title == 'Test Book'\n\tassert metadata.creator == 'Test Author'\n\tassert metadata.identifier == 'test-id-123'\n\n\tassert metadata.language == 'en'\n\tassert metadata.subject == ['Fiction', 'Science Fiction']\n\tassert metadata.date == '2024-01-01'\n\tassert metadata.publisher == 'Test Publisher'\n\n\tassert metadata.modified == '2023-11-28T14:50:13Z'\n\tassert metadata.source == 'Original Source'\n\n\ndef test_metadata_validate_missing_identifier_with_raise_exception():\n\t\"\"\"Test that parsing metadata without identifier raises error.\"\"\"\n\twith pytest.raises(ValidationError):\n\t\tMetadata(INVALID_METADATA_XML)._validate(raise_exception=True)\n\n\n@pytest.mark.parametrize(\n\t'xml_content,pretty_print,expected',\n\t[\n\t\t(\n\t\t\t'<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\\n    <dc:title>Test Book</dc:title>\\n\\n    <dc:creator>Test Author</dc:creator>\\n\\n    <dc:identifier>test-id-123</dc:identifier>\\n</metadata>',\n\t\t\tFalse,\n\t\t\t'<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\\n    <dc:title>Test Book</dc:title>\\n\\n    <dc:creator>Test Author</dc:creator>\\n\\n    <dc:identifier>test-id-123</dc:identifier>\\n</metadata>',\n\t\t),\n\t\t(\n\t\t\t'<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\\n    <dc:title>Test Book</dc:title>\\n\\n    <dc:creator>Test Author</dc:creator>\\n\\n    <dc:identifier>test-id-123</dc:identifier>\\n</metadata>',\n\t\t\tTrue,\n\t\t\t'<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\\n  <dc:title>Test Book</dc:title>\\n  <dc:creator>Test Author</dc:creator>\\n  <dc:identifier>test-id-123</dc:identifier>\\n</metadata>\\n',\n\t\t),\n\t],\n)\ndef test_metadata_to_str_pretty_print_parameter(xml_content, pretty_print, expected):\n\t\"\"\"Test XML output with and without pretty printing for Metadata.\"\"\"\n\tmetadata = Metadata(xml_content)\n\n\tassert metadata.to_str(pretty_print=pretty_print) == expected\n"
  },
  {
    "path": "tests/test_nav_navigation.py",
    "content": "import pytest\n\nfrom epub_utils.navigation.nav import EPUBNavDocNavigation\n\nNAV_XML = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n    <nav epub:type=\"toc\" id=\"toc\">\n        <h1>Table of Contents</h1>\n        <ol>\n            <li id=\"ch1-li\">\n                <a href=\"chapter1.xhtml\" id=\"ch1\">Chapter 1</a>\n            </li>\n        </ol>\n    </nav>\n</body>\n</html>\"\"\"\n\n\ndef test_nav_doc_navigation_initialization():\n\t\"\"\"Test that the EPUBNavDocNavigation class initializes correctly.\"\"\"\n\tnav = EPUBNavDocNavigation(NAV_XML, 'application/xhtml+xml', 'nav.xhtml')\n\tassert nav is not None\n\tassert nav.xml_content == NAV_XML\n\tassert nav.media_type == 'application/xhtml+xml'\n\tassert nav.href == 'nav.xhtml'\n\n\tassert nav.xmlns == 'http://www.w3.org/1999/xhtml'\n\tassert nav.lang == 'en'\n\n\ndef test_nav_doc_navigation_interface():\n\t\"\"\"Test the new navigation interface methods.\"\"\"\n\tnav = EPUBNavDocNavigation(NAV_XML, 'application/xhtml+xml', 'nav.xhtml')\n\n\t# Test get_toc_items\n\ttoc_items = nav.get_toc_items()\n\tassert len(toc_items) == 1\n\n\titem = toc_items[0]\n\tassert item.id == 'ch1'\n\tassert item.label == 'Chapter 1'\n\tassert item.target == 'chapter1.xhtml'\n\tassert item.order == 1\n\tassert item.level == 0\n\n\t# Test get_page_list (should be empty for this sample)\n\tpage_list = nav.get_page_list()\n\tassert len(page_list) == 0\n\n\t# Test get_landmarks (should be empty for this sample)\n\tlandmarks = nav.get_landmarks()\n\tassert len(landmarks) == 0\n\n\t# Test find_item_by_id\n\tfound_item = nav.find_item_by_id('ch1')\n\tassert found_item is not None\n\tassert found_item.label == 'Chapter 1'\n\n\t# Test find_items_by_target\n\tfound_items = nav.find_items_by_target('chapter1.xhtml')\n\tassert len(found_items) == 1\n\tassert found_items[0].id == 'ch1'\n\n\ndef test_nav_doc_navigation_toc_items_as_dicts():\n\t\"\"\"Test hierarchical navigation structure.\"\"\"\n\tnav_xml_hierarchical = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n    <nav epub:type=\"toc\" id=\"toc\">\n        <h1>Table of Contents</h1>\n        <ol>\n            <li id=\"ch1-li\">\n                <a href=\"chapter1.xhtml\" id=\"ch1\">Chapter 1</a>\n                <ol>\n                    <li id=\"ch1-1-li\">\n                        <a href=\"chapter1.xhtml#section1\" id=\"ch1-1\">Section 1.1</a>\n                    </li>\n                </ol>\n            </li>\n            <li id=\"ch2-li\">\n                <a href=\"chapter2.xhtml\" id=\"ch2\">Chapter 2</a>\n            </li>\n        </ol>\n    </nav>\n</body>\n</html>\"\"\"\n\n\tnav = EPUBNavDocNavigation(nav_xml_hierarchical, 'application/xhtml+xml', 'nav.xhtml')\n\n\ttoc_items = nav.get_toc_items_as_dicts()\n\n\tassert toc_items == [\n\t\t{\n\t\t\t'id': 'ch1',\n\t\t\t'label': 'Chapter 1',\n\t\t\t'target': 'chapter1.xhtml',\n\t\t\t'order': 1,\n\t\t\t'level': 0,\n\t\t\t'type': None,\n\t\t\t'children': [\n\t\t\t\t{\n\t\t\t\t\t'id': 'ch1-1',\n\t\t\t\t\t'label': 'Section 1.1',\n\t\t\t\t\t'target': 'chapter1.xhtml#section1',\n\t\t\t\t\t'order': 1,\n\t\t\t\t\t'level': 1,\n\t\t\t\t\t'type': None,\n\t\t\t\t\t'children': [],\n\t\t\t\t}\n\t\t\t],\n\t\t},\n\t\t{\n\t\t\t'id': 'ch2',\n\t\t\t'label': 'Chapter 2',\n\t\t\t'target': 'chapter2.xhtml',\n\t\t\t'order': 2,\n\t\t\t'level': 0,\n\t\t\t'type': None,\n\t\t\t'children': [],\n\t\t},\n\t]\n\n\ndef test_nav_doc_navigation_page_list():\n\t\"\"\"Test page list functionality.\"\"\"\n\tnav_xml_with_pages = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n    <nav epub:type=\"toc\" id=\"toc\">\n        <h1>Table of Contents</h1>\n        <ol>\n            <li><a href=\"chapter1.xhtml\" id=\"ch1\">Chapter 1</a></li>\n        </ol>\n    </nav>\n    <nav epub:type=\"page-list\" id=\"page-list\">\n        <h1>List of Pages</h1>\n        <ol>\n            <li><a href=\"chapter1.xhtml#page1\" id=\"page1\">1</a></li>\n            <li><a href=\"chapter1.xhtml#page2\" id=\"page2\">2</a></li>\n            <li><a href=\"chapter2.xhtml#page3\" id=\"page3\">3</a></li>\n        </ol>\n    </nav>\n</body>\n</html>\"\"\"\n\n\tnav = EPUBNavDocNavigation(nav_xml_with_pages, 'application/xhtml+xml', 'nav.xhtml')\n\n\t# Test get_page_list\n\tpage_list = nav.get_page_list()\n\tassert len(page_list) == 3\n\n\tpage1 = page_list[0]\n\tassert page1.id == 'page1'\n\tassert page1.label == '1'\n\tassert page1.target == 'chapter1.xhtml#page1'\n\tassert page1.order == 1\n\tassert page1.level == 0\n\tassert page1.item_type in [None, 'page']  # Could be None or 'page'\n\n\tpage2 = page_list[1]\n\tassert page2.id == 'page2'\n\tassert page2.label == '2'\n\tassert page2.target == 'chapter1.xhtml#page2'\n\n\tpage3 = page_list[2]\n\tassert page3.id == 'page3'\n\tassert page3.label == '3'\n\tassert page3.target == 'chapter2.xhtml#page3'\n\n\ndef test_nav_doc_navigation_landmarks():\n\t\"\"\"Test landmarks functionality.\"\"\"\n\tnav_xml_with_landmarks = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n    <nav epub:type=\"toc\" id=\"toc\">\n        <h1>Table of Contents</h1>\n        <ol>\n            <li><a href=\"chapter1.xhtml\" id=\"ch1\">Chapter 1</a></li>\n        </ol>\n    </nav>\n    <nav epub:type=\"landmarks\" id=\"landmarks\">\n        <h1>Landmarks</h1>\n        <ol>\n            <li><a href=\"cover.xhtml\" epub:type=\"cover\" id=\"cover\">Cover</a></li>\n            <li><a href=\"toc.xhtml\" epub:type=\"toc\" id=\"toc-landmark\">Table of Contents</a></li>\n            <li><a href=\"chapter1.xhtml\" epub:type=\"bodymatter\" id=\"start\">Start of Content</a></li>\n        </ol>\n    </nav>\n</body>\n</html>\"\"\"\n\n\tnav = EPUBNavDocNavigation(nav_xml_with_landmarks, 'application/xhtml+xml', 'nav.xhtml')\n\n\t# Test get_landmarks\n\tlandmarks = nav.get_landmarks()\n\tassert len(landmarks) == 3\n\n\tcover_landmark = landmarks[0]\n\tassert cover_landmark.id == 'cover'\n\tassert cover_landmark.label == 'Cover'\n\tassert cover_landmark.target == 'cover.xhtml'\n\tassert cover_landmark.item_type == 'cover'\n\n\ttoc_landmark = landmarks[1]\n\tassert toc_landmark.id == 'toc-landmark'\n\tassert toc_landmark.label == 'Table of Contents'\n\tassert toc_landmark.target == 'toc.xhtml'\n\tassert toc_landmark.item_type == 'toc'\n\n\tstart_landmark = landmarks[2]\n\tassert start_landmark.id == 'start'\n\tassert start_landmark.label == 'Start of Content'\n\tassert start_landmark.target == 'chapter1.xhtml'\n\tassert start_landmark.item_type == 'bodymatter'\n\n\ndef test_nav_doc_navigation_editing():\n\t\"\"\"Test the editing capabilities of the navigation interface.\"\"\"\n\tfrom epub_utils.navigation.base import NavigationItem\n\n\tnav = EPUBNavDocNavigation(NAV_XML, 'application/xhtml+xml', 'nav.xhtml')\n\n\t# Test adding a new item\n\tnew_item = NavigationItem(id='ch2', label='Chapter 2', target='chapter2.xhtml', order=2)\n\n\tnav.add_toc_item(new_item)\n\n\t# Verify it was added\n\ttoc_items = nav.get_toc_items()\n\tassert len(toc_items) == 2\n\n\tnew_toc_item = nav.find_item_by_id('ch2')\n\tassert new_toc_item is not None\n\tassert new_toc_item.label == 'Chapter 2'\n\n\t# Test updating an item\n\tsuccess = nav.update_toc_item(\n\t\t'ch2', label='Chapter Two Updated', target='chapter2_updated.xhtml'\n\t)\n\tassert success\n\n\tupdated_item = nav.find_item_by_id('ch2')\n\tassert updated_item.label == 'Chapter Two Updated'\n\tassert updated_item.target == 'chapter2_updated.xhtml'\n\n\t# Test removing an item\n\tsuccess = nav.remove_toc_item('ch2')\n\tassert success\n\n\t# Verify it was removed\n\ttoc_items = nav.get_toc_items()\n\tassert len(toc_items) == 1\n\tassert nav.find_item_by_id('ch2') is None\n\n\ndef test_nav_doc_navigation_span_elements():\n\t\"\"\"Test navigation with span elements (non-linked text).\"\"\"\n\tnav_xml_with_spans = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n    <nav epub:type=\"toc\" id=\"toc\">\n        <h1>Table of Contents</h1>\n        <ol>\n            <li id=\"part1-li\">\n                <span id=\"part1\">Part 1</span>\n                <ol>\n                    <li><a href=\"chapter1.xhtml\" id=\"ch1\">Chapter 1</a></li>\n                    <li><a href=\"chapter2.xhtml\" id=\"ch2\">Chapter 2</a></li>\n                </ol>\n            </li>\n        </ol>\n    </nav>\n</body>\n</html>\"\"\"\n\n\tnav = EPUBNavDocNavigation(nav_xml_with_spans, 'application/xhtml+xml', 'nav.xhtml')\n\n\ttoc_items = nav.get_toc_items()\n\tassert len(toc_items) == 1\n\n\tpart1_item = toc_items[0]\n\tassert part1_item.id == 'part1'\n\tassert part1_item.label == 'Part 1'\n\tassert part1_item.target == ''  # span elements don't have targets\n\tassert len(part1_item.children) == 2\n\n\tch1_item = part1_item.children[0]\n\tassert ch1_item.id == 'ch1'\n\tassert ch1_item.label == 'Chapter 1'\n\tassert ch1_item.target == 'chapter1.xhtml'\n\n\tch2_item = part1_item.children[1]\n\tassert ch2_item.id == 'ch2'\n\tassert ch2_item.label == 'Chapter 2'\n\tassert ch2_item.target == 'chapter2.xhtml'\n\n\ndef test_nav_doc_navigation_item_types():\n\t\"\"\"Test navigation with epub:type attributes.\"\"\"\n\tnav_xml_with_types = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n    <nav epub:type=\"toc\" id=\"toc\">\n        <h1>Table of Contents</h1>\n        <ol>\n            <li><a href=\"preface.xhtml\" epub:type=\"preface\" id=\"preface\">Preface</a></li>\n            <li><a href=\"chapter1.xhtml\" epub:type=\"chapter\" id=\"ch1\">Chapter 1</a></li>\n            <li><a href=\"appendix.xhtml\" epub:type=\"appendix\" id=\"appendix\">Appendix</a></li>\n        </ol>\n    </nav>\n</body>\n</html>\"\"\"\n\n\tnav = EPUBNavDocNavigation(nav_xml_with_types, 'application/xhtml+xml', 'nav.xhtml')\n\n\ttoc_items = nav.get_toc_items()\n\tassert len(toc_items) == 3\n\n\tpreface_item = toc_items[0]\n\tassert preface_item.item_type == 'preface'\n\n\tchapter_item = toc_items[1]\n\tassert chapter_item.item_type == 'chapter'\n\n\tappendix_item = toc_items[2]\n\tassert appendix_item.item_type == 'appendix'\n\n\ndef test_nav_doc_navigation_invalid_media_type():\n\t\"\"\"Test that invalid media types raise ValueError.\"\"\"\n\twith pytest.raises(ValueError) as excinfo:\n\t\tEPUBNavDocNavigation(NAV_XML, 'application/x-dtbncx+xml', 'nav.xhtml')\n\tassert (\n\t\t\"Media type 'application/x-dtbncx+xml' is not supported for EPUB Navigation Document\"\n\t\tin str(excinfo.value)\n\t)\n\n\ndef test_nav_doc_navigation_malformed_xml():\n\t\"\"\"Test handling of malformed XML.\"\"\"\n\timport pytest\n\n\tfrom epub_utils.exceptions import ParseError\n\n\tmalformed_xml = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n    <nav epub:type=\"toc\">\n        <ol>\n            <li><a href=\"chapter1.xhtml\">Chapter 1</a>\n        </ol>\n    </nav>\n</body>\n\"\"\"  # Missing closing </li> and </html>\n\n\twith pytest.raises(ParseError):\n\t\tEPUBNavDocNavigation(malformed_xml, 'application/xhtml+xml', 'nav.xhtml')\n\n\ndef test_nav_doc_navigation_output_methods():\n\t\"\"\"Test the various output methods.\"\"\"\n\tnav = EPUBNavDocNavigation(NAV_XML, 'application/xhtml+xml', 'nav.xhtml')\n\n\t# Test __str__\n\tstr_output = str(nav)\n\tassert str_output == NAV_XML\n\n\t# Test to_str (should use XMLPrinter)\n\tto_str_output = nav.to_str()\n\tassert isinstance(to_str_output, str)\n\tassert 'Chapter 1' in to_str_output\n\n\t# Test to_xml (may include ANSI color codes)\n\tto_xml_output = nav.to_xml()\n\tassert isinstance(to_xml_output, str)\n\t# Remove ANSI escape codes for testing\n\timport re\n\n\tclean_output = re.sub(r'\\x1b\\[[0-9;]*m', '', to_xml_output)\n\tassert 'Chapter 1' in clean_output\n\n\t# Test to_plain\n\tto_plain_output = nav.to_plain()\n\tassert isinstance(to_plain_output, str)\n\tassert 'Chapter 1' in to_plain_output\n\n\ndef test_nav_doc_navigation_reorder_items():\n\t\"\"\"Test reordering TOC items.\"\"\"\n\tnav_xml_multiple = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n    <nav epub:type=\"toc\" id=\"toc\">\n        <h1>Table of Contents</h1>\n        <ol>\n            <li><a href=\"chapter1.xhtml\" id=\"ch1\">Chapter 1</a></li>\n            <li><a href=\"chapter2.xhtml\" id=\"ch2\">Chapter 2</a></li>\n            <li><a href=\"chapter3.xhtml\" id=\"ch3\">Chapter 3</a></li>\n        </ol>\n    </nav>\n</body>\n</html>\"\"\"\n\n\tnav = EPUBNavDocNavigation(nav_xml_multiple, 'application/xhtml+xml', 'nav.xhtml')\n\n\t# Get original order\n\toriginal_items = nav.get_toc_items()\n\tassert [item.id for item in original_items] == ['ch1', 'ch2', 'ch3']\n\n\t# Reorder items\n\tnav.reorder_toc_items(['ch3', 'ch1', 'ch2'])\n\n\t# Check that the method completed without error\n\t# Note: The actual reordering implementation may vary\n\t# and this test mainly ensures the method can be called\n\n\ndef test_nav_doc_navigation_empty_document():\n\t\"\"\"Test handling of empty navigation document.\"\"\"\n\tempty_nav_xml = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"en\">\n<head>\n    <title>Navigation Document</title>\n</head>\n<body>\n</body>\n</html>\"\"\"\n\n\tnav = EPUBNavDocNavigation(empty_nav_xml, 'application/xhtml+xml', 'nav.xhtml')\n\n\t# All lists should be empty\n\tassert len(nav.get_toc_items()) == 0\n\tassert len(nav.get_page_list()) == 0\n\tassert len(nav.get_landmarks()) == 0\n\n\t# find methods should return None/empty\n\tassert nav.find_item_by_id('nonexistent') is None\n\tassert len(nav.find_items_by_target('nonexistent.xhtml')) == 0\n"
  },
  {
    "path": "tests/test_ncx_navigation.py",
    "content": "from epub_utils.navigation.ncx import NCXNavigation\n\nNCX_XML = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<ncx xmlns=\"http://www.daisy.org/z3986/2005/ncx/\" version=\"2005-1\" xml:lang=\"en\">\n    <head>\n        <meta name=\"dtb:uid\" content=\"urn:uuid:12345\"/>\n        <meta name=\"dtb:depth\" content=\"1\"/>\n        <meta name=\"dtb:totalPageCount\" content=\"0\"/>\n        <meta name=\"dtb:maxPageNumber\" content=\"0\"/>\n    </head>\n    <docTitle>\n        <text>Sample Book</text>\n    </docTitle>\n    <navMap>\n        <navPoint id=\"navpoint-1\" playOrder=\"1\">\n            <navLabel>\n                <text>Chapter 1</text>\n            </navLabel>\n            <content src=\"chapter1.xhtml\"/>\n        </navPoint>\n    </navMap>\n</ncx>\"\"\"\n\n\ndef test_ncx_navigation_initialization():\n\t\"\"\"Test that the NCXNavigation class initializes correctly.\"\"\"\n\tncx = NCXNavigation(NCX_XML, 'application/x-dtbncx+xml', 'toc.ncx')\n\tassert ncx is not None\n\tassert ncx.xml_content == NCX_XML\n\tassert ncx.media_type == 'application/x-dtbncx+xml'\n\tassert ncx.href == 'toc.ncx'\n\n\tassert ncx.xmlns == 'http://www.daisy.org/z3986/2005/ncx/'\n\tassert ncx.version == '2005-1'\n\tassert ncx.lang == 'en'\n\n\ndef test_ncx_navigation_interface():\n\t\"\"\"Test the new navigation interface methods.\"\"\"\n\tncx = NCXNavigation(NCX_XML, 'application/x-dtbncx+xml', 'toc.ncx')\n\n\t# Test get_toc_items\n\ttoc_items = ncx.get_toc_items()\n\tassert len(toc_items) == 1\n\n\titem = toc_items[0]\n\tassert item.id == 'navpoint-1'\n\tassert item.label == 'Chapter 1'\n\tassert item.target == 'chapter1.xhtml'\n\tassert item.order == 1\n\tassert item.level == 0\n\n\t# Test get_page_list (should be empty for this sample)\n\tpage_list = ncx.get_page_list()\n\tassert len(page_list) == 0\n\n\t# Test get_landmarks (should be empty for this sample)\n\tlandmarks = ncx.get_landmarks()\n\tassert len(landmarks) == 0\n\n\t# Test find_item_by_id\n\tfound_item = ncx.find_item_by_id('navpoint-1')\n\tassert found_item is not None\n\tassert found_item.label == 'Chapter 1'\n\n\t# Test find_items_by_target\n\tfound_items = ncx.find_items_by_target('chapter1.xhtml')\n\tassert len(found_items) == 1\n\tassert found_items[0].id == 'navpoint-1'\n\n\ndef test_ncx_navigation_hierarchy():\n\t\"\"\"Test hierarchical navigation structure.\"\"\"\n\tncx_xml_hierarchical = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<ncx xmlns=\"http://www.daisy.org/z3986/2005/ncx/\" version=\"2005-1\" xml:lang=\"en\">\n    <head>\n        <meta name=\"dtb:uid\" content=\"urn:uuid:12345\"/>\n        <meta name=\"dtb:depth\" content=\"2\"/>\n        <meta name=\"dtb:totalPageCount\" content=\"0\"/>\n        <meta name=\"dtb:maxPageNumber\" content=\"0\"/>\n    </head>\n    <docTitle>\n        <text>Sample Book</text>\n    </docTitle>\n    <navMap>\n        <navPoint id=\"ch1\" playOrder=\"1\">\n            <navLabel>\n                <text>Chapter 1</text>\n            </navLabel>\n            <content src=\"chapter1.xhtml\"/>\n            <navPoint id=\"ch1-1\" playOrder=\"2\">\n                <navLabel>\n                    <text>Section 1.1</text>\n                </navLabel>\n                <content src=\"chapter1.xhtml#section1\"/>\n            </navPoint>\n        </navPoint>\n        <navPoint id=\"ch2\" playOrder=\"3\">\n            <navLabel>\n                <text>Chapter 2</text>\n            </navLabel>\n            <content src=\"chapter2.xhtml\"/>\n        </navPoint>\n    </navMap>\n</ncx>\"\"\"\n\n\tncx = NCXNavigation(ncx_xml_hierarchical, 'application/x-dtbncx+xml', 'toc.ncx')\n\n\ttoc_items = ncx.get_toc_items_as_dicts()\n\n\tassert toc_items == [\n\t\t{\n\t\t\t'id': 'ch1',\n\t\t\t'label': 'Chapter 1',\n\t\t\t'target': 'chapter1.xhtml',\n\t\t\t'order': 1,\n\t\t\t'level': 0,\n\t\t\t'type': None,\n\t\t\t'children': [\n\t\t\t\t{\n\t\t\t\t\t'id': 'ch1-1',\n\t\t\t\t\t'label': 'Section 1.1',\n\t\t\t\t\t'target': 'chapter1.xhtml#section1',\n\t\t\t\t\t'order': 2,\n\t\t\t\t\t'level': 1,\n\t\t\t\t\t'type': None,\n\t\t\t\t\t'children': [],\n\t\t\t\t}\n\t\t\t],\n\t\t},\n\t\t{\n\t\t\t'id': 'ch2',\n\t\t\t'label': 'Chapter 2',\n\t\t\t'target': 'chapter2.xhtml',\n\t\t\t'order': 3,\n\t\t\t'level': 0,\n\t\t\t'type': None,\n\t\t\t'children': [],\n\t\t},\n\t]\n\n\ndef test_ncx_navigation_editing():\n\t\"\"\"Test the editing capabilities of the navigation interface.\"\"\"\n\tfrom epub_utils.navigation.base import NavigationItem\n\n\tncx = NCXNavigation(NCX_XML, 'application/x-dtbncx+xml', 'toc.ncx')\n\n\t# Test adding a new item\n\tnew_item = NavigationItem(id='ch2', label='Chapter 2', target='chapter2.xhtml', order=2)\n\n\tncx.add_toc_item(new_item)\n\n\t# Verify it was added\n\ttoc_items = ncx.get_toc_items()\n\tassert len(toc_items) == 2\n\n\tnew_toc_item = ncx.find_item_by_id('ch2')\n\tassert new_toc_item is not None\n\tassert new_toc_item.label == 'Chapter 2'\n\n\t# Test updating an item\n\tsuccess = ncx.update_toc_item(\n\t\t'ch2', label='Chapter Two Updated', target='chapter2_updated.xhtml'\n\t)\n\tassert success\n\n\tupdated_item = ncx.find_item_by_id('ch2')\n\tassert updated_item.label == 'Chapter Two Updated'\n\tassert updated_item.target == 'chapter2_updated.xhtml'\n\n\t# Test removing an item\n\tsuccess = ncx.remove_toc_item('ch2')\n\tassert success\n\n\t# Verify it was removed\n\ttoc_items = ncx.get_toc_items()\n\tassert len(toc_items) == 1\n\tassert ncx.find_item_by_id('ch2') is None\n"
  },
  {
    "path": "tests/test_package.py",
    "content": "import pytest\n\nfrom epub_utils.exceptions import InvalidEPUBError, UnsupportedFormatError\nfrom epub_utils.package import Package\n\nVALID_OPF_XML = \"\"\"<?xml version=\"1.0\"?>\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"3.0\">\n    <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n        <dc:title>Sample EPUB</dc:title>\n        <dc:creator>John Doe</dc:creator>\n        <dc:identifier>12345</dc:identifier>\n    </metadata>\n    <manifest>\n        <item id=\"nav\" href=\"nav.xhtml\" media-type=\"application/xhtml+xml\" properties=\"nav\"/>\n    </manifest>\n\t<spine>\n\t\t<itemref idref=\"nav\" />\n\t</spine>\n</package>\n\"\"\"\n\nINVALID_OPF_XML_MISSING_METADATA = \"\"\"<?xml version=\"1.0\"?>\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"3.0\">\n</package>\n\"\"\"\n\nVALID_EPUB3_XML_WITHOUT_TOC = \"\"\"<?xml version=\"1.0\"?>\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"3.0\">\n    <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n        <dc:title>Sample EPUB</dc:title>\n    </metadata>\n\t<manifest>\n        <item id=\"roads\" href=\"roads.xhtml\" media-type=\"application/xhtml+xml\"/>\n    </manifest>\n\t<spine>\n\t\t<itemref idref=\"roads\" />\n\t</spine>\n</package>\n\"\"\"\n\nVALID_EPUB2_XML = \"\"\"<?xml version=\"1.0\"?>\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"2.0\">\n\t<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n\t\t<dc:title>Sample EPUB</dc:title>\n\t</metadata>\n\t<manifest>\n\t\t<item id=\"ncx\" href=\"toc.ncx\" media-type=\"application/x-dtbncx+xml\"/>\n\t\t<item id=\"roads\" href=\"roads.xhtml\" media-type=\"application/xhtml+xml\"/>\n\t</manifest>\n\t<spine toc=\"ncx\">\n\t\t<itemref idref=\"roads\" />\n\t</spine>\n</package>\n\"\"\"\n\nVALID_EPUB2_XML_WITHOUT_TOC = \"\"\"<?xml version=\"1.0\"?>\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"2.0\">\n\t<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n\t\t<dc:title>Sample EPUB</dc:title>\n\t</metadata>\n\t<manifest>\n\t\t<item id=\"roads\" href=\"roads.xhtml\" media-type=\"application/xhtml+xml\"/>\n\t</manifest>\n\t<spine>\n\t\t<itemref idref=\"roads\" />\n\t</spine>\n</package>\n\"\"\"\n\nVALID_OEPBS1_XML_WITH_TOC = \"\"\"<?xml version=\"1.0\"?>\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"1.0\">\n\t<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n\t\t<dc:title>Sample EPUB</dc:title>\n\t</metadata>\n\t<manifest>\n\t\t<item id=\"ncx\" href=\"toc.ncx\" media-type=\"application/x-dtbncx+xml\"/>\n\t\t<item id=\"roads\" href=\"roads.xhtml\" media-type=\"application/xhtml+xml\"/>\n\t</manifest>\n\t<spine toc=\"ncx\">\n\t\t<itemref idref=\"roads\" />\n\t</spine>\n</package>\n\"\"\"\n\nINVALID_VERSION = \"\"\"<?xml version=\"1.0\"?>\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"4.0\">\n\t<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" />\n</package>\n\"\"\"\n\n\ndef test_package_initialization():\n\t\"\"\"\n\tTest that the Package class initializes correctly with valid OPF XML content.\n\t\"\"\"\n\tpackage = Package(VALID_OPF_XML)\n\tassert package.metadata.title == 'Sample EPUB'\n\tassert package.metadata.creator == 'John Doe'\n\tassert package.metadata.identifier == '12345'\n\n\ndef test_package_invalid_xml():\n\twith pytest.raises(InvalidEPUBError) as excinfo:\n\t\tPackage(INVALID_OPF_XML_MISSING_METADATA)\n\tassert 'OPF file missing required metadata element' in str(excinfo.value)\n\n\ndef test_epub3():\n\tpackage = Package(VALID_OPF_XML)\n\tassert package.version.public == '3.0'\n\tassert package.version.major == 3\n\tassert package.nav_href == 'nav.xhtml'\n\n\ndef test_epub3_without_toc():\n\tpackage = Package(VALID_EPUB3_XML_WITHOUT_TOC)\n\tassert package.version.public == '3.0'\n\tassert package.version.major == 3\n\tassert not package.nav_href\n\n\ndef test_epub2():\n\tpackage = Package(VALID_EPUB2_XML)\n\tassert package.version.public == '2.0'\n\tassert package.version.major == 2\n\tassert package.toc_href == 'toc.ncx'\n\n\ndef test_epub2_without_toc():\n\tpackage = Package(VALID_EPUB2_XML_WITHOUT_TOC)\n\tassert package.version.public == '2.0'\n\tassert package.version.major == 2\n\tassert not package.toc_href\n\n\ndef test_epub1():\n\tpackage = Package(VALID_OEPBS1_XML_WITH_TOC)\n\tassert package.version.public == '1.0'\n\tassert package.version.major == 1\n\tassert package.toc_href == 'toc.ncx'\n\n\ndef test_invalid_version():\n\twith pytest.raises(UnsupportedFormatError) as excinfo:\n\t\tpackage = Package(INVALID_VERSION)\n\tassert 'EPUB version 4.x is not supported (EPUB 4.0 format)' in str(excinfo.value)\n\n\n@pytest.mark.parametrize(\n\t'xml_content,pretty_print,expected',\n\t[\n\t\t(\n\t\t\t'<?xml version=\"1.0\"?>\\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"3.0\">\\n\\n    <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\\n\\n        <dc:title>Sample EPUB</dc:title>\\n    </metadata>\\n\\n    <manifest>\\n        <item id=\"roads\" href=\"roads.xhtml\" media-type=\"application/xhtml+xml\"/>\\n    </manifest>\\n\\n    <spine>\\n        <itemref idref=\"roads\"/>\\n    </spine></package>',\n\t\t\tFalse,\n\t\t\t'<?xml version=\"1.0\"?>\\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"3.0\">\\n\\n    <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\\n\\n        <dc:title>Sample EPUB</dc:title>\\n    </metadata>\\n\\n    <manifest>\\n        <item id=\"roads\" href=\"roads.xhtml\" media-type=\"application/xhtml+xml\"/>\\n    </manifest>\\n\\n    <spine>\\n        <itemref idref=\"roads\"/>\\n    </spine></package>',\n\t\t),\n\t\t(\n\t\t\t'<?xml version=\"1.0\"?>\\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"3.0\">\\n\\n    <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\\n\\n        <dc:title>Sample EPUB</dc:title>\\n    </metadata>\\n\\n    <manifest>\\n        <item id=\"roads\" href=\"roads.xhtml\" media-type=\"application/xhtml+xml\"/>\\n    </manifest>\\n\\n    <spine>\\n        <itemref idref=\"roads\"/>\\n    </spine></package>',\n\t\t\tTrue,\n\t\t\t'<?xml version=\"1.0\"?>\\n<package xmlns=\"http://www.idpf.org/2007/opf\" version=\"3.0\">\\n  <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\\n    <dc:title>Sample EPUB</dc:title>\\n  </metadata>\\n  <manifest>\\n    <item id=\"roads\" href=\"roads.xhtml\" media-type=\"application/xhtml+xml\"/>\\n  </manifest>\\n  <spine>\\n    <itemref idref=\"roads\"/>\\n  </spine>\\n</package>\\n',\n\t\t),\n\t],\n)\ndef test_package_to_str_pretty_print_parameter(xml_content, pretty_print, expected):\n\t\"\"\"Test XML output with and without pretty printing for Package.\"\"\"\n\tpackage = Package(xml_content)\n\n\tassert package.to_str(pretty_print=pretty_print) == expected\n"
  },
  {
    "path": "tests/test_spine.py",
    "content": "import pytest\n\nfrom epub_utils.package.spine import Spine\n\nVALID_SPINE_XML = \"\"\"\n<spine xmlns=\"http://www.idpf.org/2007/opf\" toc=\"ncx\" page-progression-direction=\"ltr\">\n    <itemref idref=\"cover\" linear=\"no\"/>\n    <itemref idref=\"nav\" linear=\"yes\"/>\n    <itemref idref=\"chapter1\" properties=\"page-spread-left\"/>\n    <itemref idref=\"chapter2\"/>\n</spine>\n\"\"\"\n\nMINIMAL_SPINE_XML = \"\"\"\n<spine xmlns=\"http://www.idpf.org/2007/opf\">\n    <itemref idref=\"content\"/>\n</spine>\n\"\"\"\n\n\ndef test_spine_initialization():\n\tspine = Spine(VALID_SPINE_XML)\n\n\tassert spine.toc == 'ncx'\n\tassert spine.page_progression_direction == 'ltr'\n\tassert len(spine.itemrefs) == 4\n\n\t# Test first itemref (cover)\n\tassert spine.itemrefs[0]['idref'] == 'cover'\n\tassert spine.itemrefs[0]['linear'] == False\n\tassert spine.itemrefs[0]['properties'] == []\n\n\t# Test third itemref (chapter1)\n\tassert spine.itemrefs[2]['idref'] == 'chapter1'\n\tassert spine.itemrefs[2]['linear'] == True\n\tassert spine.itemrefs[2]['properties'] == ['page-spread-left']\n\n\ndef test_minimal_spine():\n\tspine = Spine(MINIMAL_SPINE_XML)\n\n\tassert spine.toc is None\n\tassert spine.page_progression_direction == 'default'\n\tassert len(spine.itemrefs) == 1\n\tassert spine.itemrefs[0]['idref'] == 'content'\n\tassert spine.itemrefs[0]['linear'] == True\n\tassert spine.itemrefs[0]['properties'] == []\n\n\n@pytest.mark.parametrize(\n\t'xml_content,pretty_print,expected',\n\t[\n\t\t(\n\t\t\t'<spine xmlns=\"http://www.idpf.org/2007/opf\" toc=\"ncx\">\\n\\n    <itemref idref=\"cover\" linear=\"no\"/>\\n\\n    <itemref idref=\"chapter1\"/>\\n</spine>',\n\t\t\tFalse,\n\t\t\t'<spine xmlns=\"http://www.idpf.org/2007/opf\" toc=\"ncx\">\\n\\n    <itemref idref=\"cover\" linear=\"no\"/>\\n\\n    <itemref idref=\"chapter1\"/>\\n</spine>',\n\t\t),\n\t\t(\n\t\t\t'<spine xmlns=\"http://www.idpf.org/2007/opf\" toc=\"ncx\">\\n\\n    <itemref idref=\"cover\" linear=\"no\"/>\\n\\n    <itemref idref=\"chapter1\"/>\\n</spine>',\n\t\t\tTrue,\n\t\t\t'<spine xmlns=\"http://www.idpf.org/2007/opf\" toc=\"ncx\">\\n  <itemref idref=\"cover\" linear=\"no\"/>\\n  <itemref idref=\"chapter1\"/>\\n</spine>\\n',\n\t\t),\n\t],\n)\ndef test_spine_to_str_pretty_print_parameter(xml_content, pretty_print, expected):\n\t\"\"\"Test XML output with and without pretty printing for Spine.\"\"\"\n\tspine = Spine(xml_content)\n\n\tassert spine.to_str(pretty_print=pretty_print) == expected\n"
  },
  {
    "path": "tests/test_xhtml_content.py",
    "content": "import pytest\n\nfrom epub_utils.content.xhtml import XHTMLContent\n\n\ndef test_simple_paragraph():\n\t\"\"\"Test extraction from a simple paragraph.\"\"\"\n\txml_content = \"\"\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n    <body>\n        <p>This is a simple paragraph.</p>\n\n    </body>\n</html>\"\"\"\n\n\tcontent = XHTMLContent(xml_content, 'application/xhtml+xml', 'test.xhtml')\n\n\tassert content.inner_text == 'This is a simple paragraph.'\n\n\n@pytest.mark.parametrize(\n\t'xml_content,pretty_print,expected',\n\t[\n\t\t(\n\t\t\t'<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t\tFalse,\n\t\t\t'<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t),\n\t\t(\n\t\t\t'<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t\tTrue,\n\t\t\t'<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n  <body>\\n    <p>This is a simple paragraph.</p>\\n  </body>\\n</html>\\n',\n\t\t),\n\t\t(\n\t\t\t'<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t\tFalse,\n\t\t\t'<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t),\n\t\t(\n\t\t\t'<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t\tTrue,\n\t\t\t'<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n  <body>\\n    <p>This is a simple paragraph.</p>\\n  </body>\\n</html>\\n',\n\t\t),\n\t\t(\n\t\t\t'<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t\tFalse,\n\t\t\t'<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t),\n\t\t(\n\t\t\t'<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n    <body>\\n        <p>This is a simple paragraph.</p>\\n\\n    </body>\\n</html>',\n\t\t\tTrue,\n\t\t\t'<html xmlns=\"http://www.w3.org/1999/xhtml\">\\n  <body>\\n    <p>This is a simple paragraph.</p>\\n  </body>\\n</html>\\n',\n\t\t),\n\t],\n)\ndef test_to_str_pretty_print_parameter(xml_content, pretty_print, expected):\n\t\"\"\"Test XML output with and without pretty printing.\"\"\"\n\tcontent = XHTMLContent(xml_content, 'application/xhtml+xml', 'test.xhtml')\n\n\tassert content.to_str(pretty_print=pretty_print) == expected\n"
  }
]