[
  {
    "path": ".github/FUNDING.yml",
    "content": "# These are supported funding model platforms\n\ngithub: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]\npatreon: # Replace with a single Patreon username\nopen_collective: # Replace with a single Open Collective username\nko_fi: # Replace with a single Ko-fi username\ntidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel\ncommunity_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry\nliberapay: # Replace with a single Liberapay username\nissuehunt: # Replace with a single IssueHunt username\notechie: # Replace with a single Otechie username\nlfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry\ncustom: https://raw.githubusercontent.com/RapidAI/.github/6db6b6b9273f3151094a462a61fbc8e88564562c/assets/Sponsor.png\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/01-feature_request.md",
    "content": "---\nname: Feature Request\nabout: requests for new RapidOCR features\ntitle: 'Feature Request'\nlabels: 'Feature Request'\nassignees: ''\n\n---\n\n请您详细描述想要添加的新功能或者是新特性\n(Please describe in detail the new function or new feature you want to add)\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/02-bug.md",
    "content": "---\nname: 🐞 Bug\nabout: Bug\ntitle: 'Bug'\nlabels: 'Bug'\nassignees: ''\n\n---\n\n#### 问题描述 / Problem Description\n\n#### 运行环境 / Runtime Environment\n\n#### 复现代码 / Reproduction Code\n\n```python\n\n```\n\n#### 可能解决方案 / Possible solutions\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/03-blank.md",
    "content": "---\nname: Blank Template\nabout: Blank Template\ntitle: 'Blank Template'\nlabels: 'Blank Template'\nassignees: ''\n\n---"
  },
  {
    "path": ".github/workflows/SyncToGitee.yml",
    "content": "name: SyncToGitee\non:\n  push:\n    branches:\n      - main\njobs:\n  repo-sync:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout source codes\n        uses: actions/checkout@v3\n\n      - name: Mirror the Github organization repos to Gitee.\n        uses: Yikun/hub-mirror-action@v1.4\n        with:\n          src: 'github/RapidAI'\n          dst: 'gitee/RapidAI'\n          dst_key: ${{ secrets.GITEE_PRIVATE_KEY }}\n          dst_token:  ${{ secrets.GITEE_TOKEN }}\n          force_update: true\n          # only sync this repo\n          static_list: \"RapidLayout\"\n          debug: true\n"
  },
  {
    "path": ".github/workflows/docs_build_develop.yml",
    "content": "name: Build/Publish Develop Docs\non:\n  push:\n    branches:\n      - main\n    paths:\n      - \"docs/**\"\n      - \".github/workflows/docs_build_develop.yml\"\n      - \".github/workflows/docs_build_release.yml\"\n      - \"mkdocs.yml\"\npermissions:\n  contents: write\njobs:\n  deploy:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n      - name: Configure Git Credentials\n        run: |\n          git config user.name github-actions[bot]\n          git config user.email 41898282+github-actions[bot]@users.noreply.github.com\n      - uses: actions/setup-python@v5\n        with:\n          python-version: 3.x\n      - run: echo \"cache_id=$(date --utc '+%V')\" >> $GITHUB_ENV\n      - uses: actions/cache@v4\n        with:\n          key: mkdocs-material-${{ env.cache_id }}\n          path: .cache\n          restore-keys: |\n            mkdocs-material-\n      - run: pip install mike mkdocs mkdocs-material jieba mkdocs-git-revision-date-localized-plugin mkdocs-git-committers-plugin-2\n      - run: |\n          git fetch origin gh-pages --depth=1 || true\n          mkdocs build\n          ls -la site/\n          mike deploy --push --update-aliases main latest\n          mike set-default --push latest\n"
  },
  {
    "path": ".github/workflows/docs_build_release.yml",
    "content": "name: Build/Publish Release Docs\non:\n  push:\n    tags:\n      - v*\n\npermissions:\n  contents: write\njobs:\n  deploy:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n        with:\n          fetch-depth: 0\n      - name: Configure Git Credentials\n        run: |\n          git config user.name github-actions[bot]\n          git config user.email github-actions[bot]@users.noreply.github.com\n      - uses: actions/setup-python@v5\n        with:\n          python-version: 3.x\n      - run: echo \"cache_id=$(date --utc '+%V')\" >> $GITHUB_ENV\n      - uses: actions/cache@v4\n        with:\n          key: mkdocs-material-${{ env.cache_id }}\n          path: .cache\n          restore-keys: |\n            mkdocs-material-\n      - run: pip install mike mkdocs mkdocs-material jieba mkdocs-git-revision-date-localized-plugin mkdocs-git-committers-plugin-2\n      - run: |\n          git fetch origin gh-pages --depth=1\n          mike deploy --push \"${{ github.ref_name }}\"\n"
  },
  {
    "path": ".github/workflows/publish_whl.yml",
    "content": "name: Push rapidocr_layout to pypi\n\non:\n  push:\n    tags:\n      - v*\n\nenv:\n  RESOURCES_URL: https://github.com/RapidAI/RapidLayout/releases/download/v0.0.0/rapid_layout_models.zip\n\njobs:\n  UnitTesting:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Pull latest code\n        uses: actions/checkout@v3\n\n      - name: Set up Python 3.10\n        uses: actions/setup-python@v4\n        with:\n          python-version: '3.10'\n          architecture: 'x64'\n\n      - name: Display Python version\n        run: python -c \"import sys; print(sys.version)\"\n\n      - name: Unit testings\n        run: |\n          pip install -r requirements.txt\n          pip install pytest wheel onnxruntime\n          pytest tests/test*.py\n\n  GenerateWHL_PushPyPi:\n    needs: UnitTesting\n    runs-on: ubuntu-latest\n\n    steps:\n      - uses: actions/checkout@v3\n\n      - name: Run setup\n        run: |\n          pip install -r requirements.txt\n          pip install get_pypi_latest_version\n          wget $RESOURCES_URL\n          ZIP_NAME=${RESOURCES_URL##*/}\n          DIR_NAME=${ZIP_NAME%.*}\n          unzip $ZIP_NAME\n          mv $DIR_NAME/*.onnx rapid_layout/models/\n          python setup.py bdist_wheel ${{ github.ref_name }}\n\n      - name: Publish distribution 📦 to PyPI\n        uses: pypa/gh-action-pypi-publish@release/v1\n        with:\n          password: ${{ secrets.PYPI_API_TOKEN }}\n          packages_dir: dist/\n"
  },
  {
    "path": ".github/workflows/push_discord.yml",
    "content": "name: discord message\non:\n  release:\n    types: [published]\n\njobs:\n  notify:\n    runs-on: ubuntu-latest\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@v4\n\n      - name: Prepare Discord message\n        id: prepare_message\n        run: |\n          full_msg=\"🚀 **New Release!** ${{ github.event.release.name }}\n\n          **Tag:** ${{ github.event.release.tag_name }}\n          **Author:** ${{ github.event.release.author.login }}\n\n          **Release Notes:**\n          ${{ github.event.release.body }}\"\n\n              if [ ${#full_msg} -gt 1990 ]; then\n                truncated_msg=\"${full_msg:0:1987}...\"\n              else\n                truncated_msg=\"$full_msg\"\n              fi\n\n              echo \"message<<EOF\" >> \"$GITHUB_OUTPUT\"\n              echo \"$truncated_msg\" >> \"$GITHUB_OUTPUT\"\n              echo \"EOF\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Send to Discord\n        env:\n          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_URL }}\n          DISCORD_USERNAME: Github Actions\n          DISCORD_AVATAR: https://cdn.discordapp.com/avatars/1460099944252702846/e57fd67dc7ca0cc840a0e87a82281bc5.webp?size=80\n        uses: Ilshidur/action-discord@0.4.0\n        with:\n          args: ${{ steps.prepare_message.outputs.message }}"
  },
  {
    "path": ".gitignore",
    "content": "# Created by .ignore support plugin (hsz.mobi)\n### Python template\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n.pytest_cache\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\npip-wheel-metadata/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n# *.manifest\n# *.spec\n*.res\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n.python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n#idea\n.vs\n.vscode\n.idea\n/images\n/models\n\n#models\n*.onnx\n\n*.ttf\n*.ttc\n\nlong1.jpg\n\n*.bin\n*.mapping\n*.xml\n\n*.pdiparams\n*.pdiparams.info\n*.pdmodel\n\n.DS_Store\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "repos:\n- repo: https://gitee.com/SWHL/autoflake\n  rev: v2.1.1\n  hooks:\n    - id: autoflake\n      args:\n        [\n          \"--recursive\",\n          \"--in-place\",\n          \"--remove-all-unused-imports\",\n          \"--remove-unused-variable\",\n          \"--ignore-init-module-imports\",\n        ]\n      files: \\.py$\n- repo: https://gitee.com/SWHL/black\n  rev: 23.1.0\n  hooks:\n    - id: black\n      files: \\.py$"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2024 RapidAI\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\">\n  <h1><b>Rapid <img src=\"https://cdn.jsdelivr.net/gh/twitter/twemoji@latest/assets/svg/1f4c4.svg\" width=\"28\" height=\"28\" alt=\"📄\" style=\"vertical-align: middle\"> Layout</b></h1>\n  <div>&nbsp;</div>\n  <b><font size=\"4\"><i>文档版面分析 - 定位标题、段落、表格与图片等版面元素</i></font></b>\n  <div>&nbsp;</div>\n\n<a href=\"https://huggingface.co/spaces/RapidAI/RapidLayoutv1\" target=\"_blank\"><img src=\"https://img.shields.io/badge/%F0%9F%A4%97-Hugging Face Demo-blue\"></a>\n<a href=\"\"><img src=\"https://img.shields.io/badge/Python->=3.6-aff.svg\"></a>\n<a href=\"\"><img src=\"https://img.shields.io/badge/OS-Linux%2C%20Win%2C%20Mac-pink.svg\"></a>\n<a href=\"https://pypi.org/project/rapid-layout/\"><img alt=\"PyPI\" src=\"https://img.shields.io/pypi/v/rapid-layout\"></a>\n<a href=\"https://pepy.tech/project/rapid-layout\"><img src=\"https://static.pepy.tech/personalized-badge/rapid-layout?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads\"></a>\n<a href=\"https://semver.org/\"><img alt=\"SemVer2.0\" src=\"https://img.shields.io/badge/SemVer-2.0-brightgreen\"></a>\n<a href=\"https://github.com/psf/black\"><img src=\"https://img.shields.io/badge/code%20style-black-000000.svg\"></a>\n\n</div>\n\n### 📝 简介\n\nRapid Layout 汇集全网开源的版面分析能力，对文档类图像（论文截图、研报等）进行分析，定位其中的**类别与位置**，如标题、段落、表格、图片等版面元素。\n\n**支持场景概览：** 支持表格、中文、英文、论文、研报及通用版面等多种类型，内置 PP 系列、YOLOv8 系列以及推荐的 DocLayout-YOLO 等模型。不同场景版面差异较大，暂无单一模型覆盖所有场景；若业务效果不佳，建议自建训练集微调。完整模型列表与下载见[文档站](https://rapidai.github.io/RapidLayout/)。\n\n如果您觉得本仓库对您有帮助，欢迎给个 ⭐ 支持一下。\n\n### 🎥 效果展示\n\n<div align=\"center\">\n    <img src=\"docs/images/layout_vis.jpg\" width=\"50%\">\n</div>\n\n### 🛠️ 安装\n\n```bash\npip install rapid-layout onnxruntime\n```\n\n### 📋 使用\n\n```python\nfrom rapid_layout import RapidLayout\n\nlayout_engine = RapidLayout()\n\nimg_path = \"https://raw.githubusercontent.com/RapidAI/RapidLayout/718b60e927ab893c2fad67c98f753b2105a6f421/tests/test_files/layout.jpg\"\nresults = layout_engine(img_path)\nprint(results)\n\nresults.vis(\"layout_res.png\")\n```\n\n终端运行：`rapid_layout test_images/layout.png`\n\n### 📚 文档\n\n完整文档（安装、使用方式、模型列表、GPU/NPU 配置、参考项目等）请移步：[**Rapid Layout 文档**](https://rapidai.github.io/RapidLayout/)\n\n### 📋 更新日志\n\n版本更新与发布说明请查看：[**Releases**](https://github.com/RapidAI/RapidLayout/releases)。\n\n### 🙏 致谢\n\n- [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)\n- [PaddleOCR 版面分析](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/layout/README_ch.md)\n- [360LayoutAnalysis](https://github.com/360AILAB-NLP/360LayoutAnalysis)\n- [ONNX-YOLOv8-Object-Detection](https://github.com/ibaiGorordo/ONNX-YOLOv8-Object-Detection)\n- [ChineseDocumentPDF](https://github.com/SWHL/ChineseDocumentPDF)\n\n### 🤝 贡献指南\n\n欢迎通过 Issue 反馈问题与建议，或通过 Pull Request 参与代码与文档贡献。完整流程请参阅：[贡献指南](https://rapidai.github.io/RapidLayout/main/contributing/)。\n\n### 🎖 贡献者\n\n<p align=\"left\">\n  <a href=\"https://github.com/RapidAI/RapidLayout/graphs/contributors\">\n    <img src=\"https://contrib.rocks/image?repo=RapidAI/RapidLayout&max=400&columns=10\" width=\"40%\"/>\n  </a>\n</p>\n\n### 📜 引用\n\n若该项目对您的研究有帮助，可考虑引用：\n\n```bibtex\n@misc{RapidLayout,\n    title={{Rapid Layout}: Document Layout Analysis},\n    author={RapidAI Team},\n    howpublished = {\\url{https://github.com/RapidAI/RapidLayout}},\n    year={2024}\n}\n```\n\n### ⭐️ Star history\n\n[![Stargazers over time](https://starchart.cc/RapidAI/RapidLayout.svg?variant=adaptive)](https://starchart.cc/RapidAI/RapidLayout)\n\n### ⚖️ 开源许可证\n\n本项目采用 [Apache 2.0 license](LICENSE) 开源许可证。\n"
  },
  {
    "path": "cliff.toml",
    "content": "[changelog]\nbody = \"\"\"\n{% for group, commits in commits | group_by(attribute=\"group\") %}\n    ### {{ group | striptags | trim | upper_first }}\n    {% for commit in commits\n    | filter(attribute=\"scope\")\n    | sort(attribute=\"scope\") %}\n        - **({{commit.scope}})**{% if commit.breaking %} [**breaking**]{% endif %} \\\n            {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end=\"\") }}]($REPO/commit/{{ commit.id }})\n    {%- endfor -%}\n    {% raw %}\\n{% endraw %}\\\n    {%- for commit in commits %}\n        {%- if commit.scope -%}\n        {% else -%}\n            - {% if commit.breaking %} [**breaking**]{% endif %}\\\n                {{ commit.message }} by [@{{ commit.author.name }}](https://github.com/{{ commit.author.name }}) in [{{ commit.id | truncate(length=7, end=\"\") }}]($REPO/commit/{{ commit.id }})\n        {% endif -%}\n    {% endfor -%}\n{% endfor %}\n\n\n{% if github.contributors | length > 0 %}\n### 🎉 Contributors\n\n{% for contributor in github.contributors %}\n  - [@{{ contributor.username }}](https://github.com/{{ contributor.username }})\n{%- endfor -%}\n{% endif %}\n\n\n{% if version %}\n    {% if previous.version %}\\\n        **Full Changelog**:  [{{ version | trim_start_matches(pat=\"v\") }}]($REPO/compare/{{ previous.version }}..{{ version }})\n    {% else %}\\\n        **Full Changelog**:  [{{ version | trim_start_matches(pat=\"v\") }}]\n    {% endif %}\\\n{% else %}\\\n    ## [unreleased]\n{% endif %}\n\"\"\"\n\nfooter = \"\"\"\n\n\"\"\"\n\n# Remove leading and trailing whitespaces from the changelog's body.\ntrim = true\npostprocessors = [\n    # Replace the placeholder `<REPO>` with a URL.\n    { pattern = '\\$REPO', replace = \"https://github.com/RapidAI/RapidLayout\" }, # replace repository URL\n    # 去掉每行末尾的无效空格\n    { pattern = \"(?m)[ \\t]+$\", replace = \"\" },\n    # 将连续多个空行压缩为最多一个空行\n    { pattern = \"\\n{3,}\", replace = \"\\n\\n\" },\n]\n\n[git]\n# Parse commits according to the conventional commits specification.\n# See https://www.conventionalcommits.org\nconventional_commits = true\n# Exclude commits that do not match the conventional commits specification.\nfilter_unconventional = true\n# Split commits on newlines, treating each line as an individual commit.\nsplit_commits = false\n# An array of regex based parsers to modify commit messages prior to further processing.\ncommit_preprocessors = [\n    # Replace issue numbers with link templates to be updated in `changelog.postprocessors`.\n    #{ pattern = '\\((\\w+\\s)?#([0-9]+)\\)', replace = \"([#${2}](https://github.com/orhun/git-cliff/issues/${2}))\"},\n]\n# An array of regex based parsers for extracting data from the commit message.\n# Assigns commits to groups.\n# Optionally sets the commit's scope and can decide to exclude commits from further processing.\ncommit_parsers = [\n  { message = \"^feat\", group = \"<!-- 0 -->🚀 Features\" },\n  { message = \"^fix\", group = \"<!-- 1 -->🐛 Bug Fixes\" },\n  { message = \"^doc\", group = \"<!-- 3 -->📚 Documentation\" },\n  { message = \"^perf\", group = \"<!-- 4 -->⚡ Performance\" },\n  { message = \"^refactor\", group = \"<!-- 2 -->🚜 Refactor\" },\n  { message = \"^style\", group = \"<!-- 5 -->🎨 Styling\" },\n  { message = \"^test\", group = \"<!-- 6 -->🧪 Testing\" },\n  { message = \"^chore\\\\(release\\\\): prepare for\", skip = true },\n  { message = \"^chore\\\\(deps.*\\\\)\", skip = true },\n  { message = \"^chore\\\\(pr\\\\)\", skip = true },\n  { message = \"^chore\\\\(pull\\\\)\", skip = true },\n  { message = \"^chore|^ci\", group = \"<!-- 7 -->⚙️ Miscellaneous Tasks\" },\n  { body = \".*security\", group = \"<!-- 8 -->🛡️ Security\" },\n  { message = \"^revert\", group = \"<!-- 9 -->◀️ Revert\" },\n  { message = \".*\", group = \"<!-- 10 -->💼 Other\" },\n]\n# Exclude commits that are not matched by any commit parser.\nfilter_commits = false\n# Order releases topologically instead of chronologically.\ntopo_order = false\n# Order of commits in each group/release within the changelog.\n# Allowed values: newest, oldest\nsort_commits = \"newest\""
  },
  {
    "path": "demo.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom rapid_layout import EngineType, ModelType, RapidLayout\n\nlayout_engine = RapidLayout(\n    engine_type=EngineType.ONNXRUNTIME,\n    model_type=ModelType.PP_DOC_LAYOUTV2,\n)\n\nimg_url = \"https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/master/resources/test_files/pp_doc_layoutv2_layout.jpg\"\nresults = layout_engine(img_url)\nprint(results)\n\nresults.vis(\"layout_res.png\")\n"
  },
  {
    "path": "docs/blog/.authors.yml",
    "content": "authors:\n  SWHL:\n    name: SWHL\n    description: Creator\n    avatar: https://avatars.githubusercontent.com/u/28639377?v=4\n    url: https://swhl.github.io/latest/"
  },
  {
    "path": "docs/blog/.meta.yml",
    "content": "comments: true\nhide:\n  - feedback"
  },
  {
    "path": "docs/blog/index.md",
    "content": "# Blog\n\n"
  },
  {
    "path": "docs/blog/posts/support_pp_doc_layout.md",
    "content": "---\ntitle: 支持 PP-DocLayoutV2/V3 系列模型\ndate:\n  created: 2026-02-10\n  updated: 2026-03-24\nauthors: [SWHL]\nslug: support-PP-DocLayoutv2-v3\ncategories:\n  - General\ncomments: true\n---\n\n本篇文章主要记录如何集成 PP-DocLayoutV2/V3 模型的\n\n<!-- more -->\n\n### 引言\n\nPP-DocLayout 系列模型在版面分析方面效果很好，目前已经作为 PaddleOCR-VL 系列模型的前置，起着至关重要的作用。\n\n文档智能的关键地方就在于此。因此，想着将该模型纳入 RapidLayout 系列模型中，方便小伙伴们快速使用。\n\n### 运行环境\n\n- 操作系统：Ubuntu\n- Python：3.10.14\n- 其他依赖环境：\n\n    ```text linenums=\"1\"\n    paddle2onnx==2.1.0\n    paddlepaddle==3.3.0\n    onnx==1.17.0\n    onnxruntime==1.23.2\n    ```\n\n### 转换命令\n\n```bash\npaddle2onnx  --model_dir=models/PP-DocLayoutV2  --model_filename inference.json --params_filename inference.pdiparams  --save_file=./models/PP-DocLayoutV2/inference.onnx  --enable_onnx_checker=True\n```\n\n### 比较结果\n\n我在 `/xxxx/miniforge3/envs/wjh_debug/lib/python3.10/site-packages/paddlex/inference/models/layout_analysis/predictor.py` 中插入以下代码（在 **L103** 行左右），来保证输入相同，比较输出。\n\n#### PP-DocLayoutV2\n\n⚠️注意：按照上面直接转换后，在相同输入下，ONNX 模型和 Paddle 模型推理结果误差为 **14.8%**。在我看来，这个误差其实挺大的。\n\n但是从可视化示例图结果来看，两者并无明显区别。可能在某些图上会有较大区别。\n\n```python linenums=\"1\" title=\"比较两种格式模型推理结果\"\n\n# 省略前面代码... ...\n\nimport onnxruntime\nimport numpy as np\n\nmodel_path = \"models/PP-DocLayoutV2/inference.onnx\"\nort_session = onnxruntime.InferenceSession(model_path)\nort_inputs = {\n    \"im_shape\": batch_inputs[0],\n    \"image\": batch_inputs[1],\n    \"scale_factor\": batch_inputs[2],\n}\nort_outputs = ort_session.run(None, ort_inputs)\n\n# do infer\nbatch_preds = self.infer(batch_inputs)\n\n# 千分位是否相同\nnp.testing.assert_allclose(batch_preds[0], ort_outputs[0], atol=1e-3, rtol=0)\n```\n\n输出结果如下：\n\n```bash linenums=\"1\" hl_lines=\"21-23\"\nTraceback (most recent call last):\n  File \"/xxxx/paddleocr/test_pp_doc_layoutv2.py\", line 4, in <module>\n    output = model.predict(\n  File \"/xxxx/lib/python3.10/site-packages/paddleocr/_models/base.py\", line 57, in predict\n    result = list(self.predict_iter(*args, **kwargs))\n  File \"/xxxx/lib/python3.10/site-packages/paddlex/inference/models/base/predictor/base_predictor.py\", line 281, in __call__\n    yield from self.apply(input, **kwargs)\n  File \"/xxxx/lib/python3.10/site-packages/paddlex/inference/models/base/predictor/base_predictor.py\", line 338, in apply\n    prediction = self.process(batch_data, **kwargs)\n  File \"/xxxx/lib/python3.10/site-packages/paddlex/inference/models/layout_analysis/predictor.py\", line 119, in process\n    np.testing.assert_allclose(batch_preds[0], ort_outputs[0], atol=1e-3, rtol=0)\n  File \"/xxxx/lib/python3.10/site-packages/numpy/testing/_private/utils.py\", line 1504, in assert_allclose\n    assert_array_compare(compare, actual, desired, err_msg=str(err_msg),\n  File \"/xxxx/lib/python3.10/contextlib.py\", line 79, in inner\n    return func(*args, **kwds)\n  File \"/xxxx/lib/python3.10/site-packages/numpy/testing/_private/utils.py\", line 797, in assert_array_compare\n    raise AssertionError(msg)\nAssertionError:\nNot equal to tolerance rtol=0, atol=0.001\n\nMismatched elements: 354 / 2400 (14.8%)\nMax absolute difference: 196.\nMax relative difference: 194.\n x: array([[2.200000e+01, 9.889924e-01, 3.354079e+01, ..., 6.150450e+02,\n        2.900000e+02, 2.900000e+02],\n       [2.200000e+01, 9.888635e-01, 3.372379e+01, ..., 8.526023e+02,...\n y: array([[2.200000e+01, 9.889925e-01, 3.354081e+01, ..., 6.150450e+02,\n        2.900000e+02, 2.900000e+02],\n       [2.200000e+01, 9.888635e-01, 3.372382e+01, ..., 8.526024e+02,...\n```\n\n暂时先用这个 ONNX 模型，该问题已经反馈到了 Paddle2ONNX issue [#1608](https://github.com/PaddlePaddle/Paddle2ONNX/issues/1608#issuecomment-3875561303)\n\n#### PP-DocLayoutV3\n\n和 PP-DocLayoutV2 相同环境，相同转换代码，这个模型误差就小很多了，仅有 **1.57%** 了。\n\n```bash\nAssertionError:\nNot equal to tolerance rtol=0, atol=0.001\n\nMismatched elements: 33 / 2100 (1.57%)\nMax absolute difference among violations: 1.\nMax relative difference among violations: 0.01754386\n ACTUAL: array([[2.200000e+01, 9.658169e-01, 3.387792e+01, ..., 3.626684e+02,\n        8.528884e+02, 1.540000e+02],\n       [2.200000e+01, 9.657925e-01, 3.363610e+01, ..., 3.633332e+02,...\n DESIRED: array([[2.200000e+01, 9.658167e-01, 3.387791e+01, ..., 3.626685e+02,\n        8.528885e+02, 1.530000e+02],\n       [2.200000e+01, 9.657924e-01, 3.363615e+01, ..., 3.633333e+02,...\n```\n\n### 剥离推理代码\n\n因为 PaddleOCR 库中需要兼容的推理代码较多，大而全。这也导致了有些臃肿。这是难以避免的。但是如果只看 PP-DocLayout 推理代码的话，很多问题就很简单了。\n\n完整的推理代码，我放到了 Gist 上 → [link](https://gist.github.com/SWHL/c9455e8947f4abdfbbd8439c0bb83410)\n\n### 字典写入 ONNX\n\n```python linenums=\"1\" title=\"write_dict.py\"\nfrom pathlib import Path\nfrom typing import List, Union\n\nimport onnx\nimport onnxruntime as ort\nfrom onnx import ModelProto\n\n\nclass ONNXMetaOp:\n    @classmethod\n    def add_meta(\n        cls,\n        model_path: Union[str, Path],\n        key: str,\n        value: List[str],\n        delimiter: str = \"\\n\",\n    ) -> ModelProto:\n        model = onnx.load_model(model_path)\n        meta = model.metadata_props.add()\n        meta.key = key\n        meta.value = delimiter.join(value)\n        return model\n\n    @classmethod\n    def get_meta(\n        cls, model_path: Union[str, Path], key: str, split_sym: str = \"\\n\"\n    ) -> List[str]:\n        sess = ort.InferenceSession(model_path)\n        meta_map = sess.get_modelmeta().custom_metadata_map\n        key_content = meta_map.get(key)\n        key_list = key_content.split(split_sym)\n        return key_list\n\n    @classmethod\n    def del_meta(cls, model_path: Union[str, Path]) -> ModelProto:\n        model = onnx.load_model(model_path)\n        del model.metadata_props[:]\n        return model\n\n    @classmethod\n    def save_model(cls, save_path: Union[str, Path], model: ModelProto):\n        onnx.save_model(model, save_path)\n\n\npaper_label = [\n    \"abstract\",\n    \"algorithm\",\n    \"aside_text\",\n    \"chart\",\n    \"content\",\n    \"display_formula\",\n    \"doc_title\",\n    \"figure_title\",\n    \"footer\",\n    \"footer_image\",\n    \"footnote\",\n    \"formula_number\",\n    \"header\",\n    \"header_image\",\n    \"image\",\n    \"inline_formula\",\n    \"number\",\n    \"paragraph_title\",\n    \"reference\",\n    \"reference_content\",\n    \"seal\",\n    \"table\",\n    \"text\",\n    \"vertical_text\",\n    \"vision_footnote\",\n]\nmodel_path = \"models/inference.onnx\"\nmodel = ONNXMetaOp.add_meta(model_path, key=\"character\", value=paper_label)\n\nnew_model_path = \"models/pp_doc_layoutv2.onnx\"\nONNXMetaOp.save_model(new_model_path, model)\n\nt = ONNXMetaOp.get_meta(new_model_path, key=\"character\")\nprint(t)\n```\n\n输出以下 `label`，则认为成功：\n\n```bash linenums=\"1\"\n$ python write_dict.py\n['abstract', 'algorithm', 'aside_text', 'chart', 'content', 'display_formula', 'doc_title', 'figure_title', 'footer', 'footer_image', 'footnote', 'formula_number', 'header', 'header_image', 'image', 'inline_formula', 'number', 'paragraph_title', 'reference', 'reference_content', 'seal', 'table', 'text', 'vertical_text', 'vision_footnote']\n```\n\nPP-DocLayoutV2 和 PP-DocLayoutV3 字典是一样的。\n\n### 使用\n\n目前 PP-DocLayoutV2 在 `rapid_layout>=1.1.0` 已经支持。PP-DocLayoutV3 在 `rapid_layout>=1.2.0` 中支持。使用示例：\n\n```python linenums=\"1\"\nfrom rapid_layout import EngineType, ModelType, RapidLayout\n\nlayout_engine = RapidLayout(\n    engine_type=EngineType.ONNXRUNTIME,\n    model_type=ModelType.PP_DOC_LAYOUTV2,\n)\n\nimg_url = \"https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/master/resources/test_files/pp_doc_layoutv2_layout.jpg\"\nresults = layout_engine(img_url)\nprint(results)\n\nresults.vis(\"layout_res.png\")\n```\n"
  },
  {
    "path": "docs/contributing.md",
    "content": "---\ncomments: true\ntitle: 贡献指南\nhide:\n  - navigation\n#   - toc\n---\n\n感谢你对 Rapid Layout 的关注与贡献！本文档说明如何参与项目的代码开发与文档贡献，包括环境准备、开发流程和提交流程。\n\n## 前置要求\n\n- Python >= 3.6（推荐 3.8+）\n- Git\n- 已注册的 GitHub 账号\n\n---\n\n## 一、克隆源码\n\n从 Rapid Layout 主仓库克隆项目到本地：\n\n```bash\ngit clone https://github.com/RapidAI/RapidLayout.git\ncd RapidLayout\n```\n\n若网络受限，可先 Fork 到个人账号后再克隆（见后文「准备提交」部分）。\n\n---\n\n## 二、配置开发环境\n\n建议使用虚拟环境，避免与系统 Python 冲突：\n\n```bash\n# 使用 venv\npython -m venv .venv\nsource .venv/bin/activate   # Linux/macOS\n# .venv\\Scripts\\activate    # Windows\n\n# 或使用 conda\nconda create -n rapidlayout python=3.10\nconda activate rapidlayout\n```\n\n安装依赖（开发时建议可编辑安装以便本地修改生效）：\n\n```bash\npip install -r requirements.txt\npip install pytest   # 运行单元测试需要\npip install -e .\n```\n\n如需使用 ONNX Runtime 等推理后端，请按 [安装文档](https://rapidai.github.io/RapidLayout/install_usage/installation/) 安装对应依赖。\n\n---\n\n## 三、安装代码格式化与 pre-commit 钩子\n\n在已激活的虚拟环境中安装 pre-commit，并在 **仓库根目录** 启用 Git 提交前钩子，以便自动做代码格式检查与整理（如 black, autoflake 等）：\n\n```bash\npip install pre-commit\npre-commit install\n```\n\n安装成功后，每次执行 `git commit` 时会自动运行配置好的格式化工具；若检查未通过，提交会被拒绝，请根据提示修改后再次提交。也可在提交前手动跑一遍：\n\n```bash\npre-commit run --all-files\n```\n\n---\n\n## 四、运行单元测试\n\n在 **仓库根目录** 下执行：\n\n```bash\n# 运行全部测试\npytest tests/ -v\n\n# 仅运行部分测试文件\npytest tests/test_main.py -v\n\n# 查看测试覆盖率（需先安装 pytest-cov）\npytest tests/ -v --cov=rapid_layout\n```\n\n确认当前主分支在你本机环境下测试通过，再进行修改。\n\n---\n\n## 五、复现问题 / 增加新功能\n\n### 反馈问题与建议\n\n- **Bug 反馈**：在 [Issues](https://github.com/RapidAI/RapidLayout/issues) 中提交 Bug 报告，请尽量包含复现步骤、环境信息与报错信息。\n- **功能建议**：在 Issues 中使用 Feature Request 模板描述你的需求或使用场景。\n- **文档与示例**：发现文档错误或希望补充示例时，可直接提 Issue 或 PR。\n\n### 复现 Bug\n\n1. 在 [Issues](https://github.com/RapidAI/RapidLayout/issues) 中选定或创建对应 issue。\n2. 根据 issue 描述与报错信息，在本地用仓库代码复现问题。\n3. 在 `rapid_layout/` 或 `tests/` 下定位并修改代码，直到问题消失。\n\n### 增加新功能\n\n1. 与 maintainer 或现有 issue 讨论需求与实现方式（可选但推荐）。\n2. 在 `rapid_layout/` 下实现新逻辑，保持与现有代码风格一致（项目使用 [black](https://github.com/psf/black) 等规范）。\n3. 新功能应有对应单元测试覆盖。\n\n---\n\n## 六、编写对应单元测试\n\n- 测试文件放在 **`tests/`** 下，命名建议 `test_*.py`。\n- 使用 **pytest** 编写用例，可参考现有 `test_main.py`。\n- 测试用图片等资源放在 `tests/test_files/`。\n- 新增测试应：\n    - 能稳定复现你要验证的行为（Bug 修复或新功能）；\n    - 不依赖未在仓库或文档中说明的外部服务（必要时用 mock 或跳过）。\n\n示例：\n\n```python\n# tests/test_xxx.py\nimport pytest\nfrom pathlib import Path\n\ncur_dir = Path(__file__).resolve().parent\nroot_dir = cur_dir.parent\ntest_dir = cur_dir / \"test_files\"\n\ndef get_engine():\n    from rapid_layout import RapidLayout\n    return RapidLayout()\n\ndef test_your_new_feature():\n    engine = get_engine()\n    img_path = test_dir / \"layout.jpg\"\n    result = engine(img_path)\n    assert result is not None\n    # 更多断言...\n```\n\n---\n\n## 七、运行所有单元测试\n\n在 **仓库根目录** 下再次全量跑测，确保无回归：\n\n```bash\npytest tests/ -v\n```\n\n若有测试被跳过（如缺少某推理引擎），请确认你修改或新增的测试在现有环境下已执行并通过。\n\n---\n\n## 八、准备提交到仓库\n\n### 8.1 Fork Rapid Layout 主仓库到个人账号\n\n1. 打开 [Rapid Layout 主仓库](https://github.com/RapidAI/RapidLayout)。\n2. 点击右上角 **Fork**，将仓库 fork 到你自己的 GitHub 账号下（例如 `https://github.com/你的用户名/RapidLayout`）。\n\n### 8.2 将代码提交到个人 Fork\n\n若最初是克隆的主仓库，需要添加你的 fork 为远程，并推送到 fork：\n\n```bash\n# 在项目根目录 RapidLayout 下执行\ngit remote add myfork https://github.com/你的用户名/RapidLayout.git\n# 若已有 origin 且就是主仓库，可保留；推送时用 myfork\n\n# 创建分支（推荐为每个 issue/功能单独分支）\ngit checkout -b fix/xxx   # 或 feat/xxx、docs/xxx\n\n# 添加并提交修改\ngit add .\ngit status   # 确认只提交预期文件\ngit commit -m \"fix: 简短描述\"\n\n# 推送到你的 fork\ngit push myfork fix/xxx\n```\n\n**请按约定式提交规范（Conventional Commits）书写 commit 信息**，便于维护者阅读与自动生成 Changelog。格式为：\n\n```text\n<类型>[可选范围]: <简短描述>\n\n[可选正文]\n[可选脚注]\n```\n\n常用类型示例：\n\n| 类型       | 说明                   |\n|------------|------------------------|\n| `feat`     | 新功能                 |\n| `fix`      | Bug 修复               |\n| `docs`     | 文档变更               |\n| `style`    | 代码格式（不影响逻辑） |\n| `refactor` | 重构                   |\n| `test`     | 测试相关               |\n| `chore`    | 构建 / 工具等            |\n\n示例：`fix: 修复某条件下版面结果为空`、`feat: 支持 xxx 输入格式`、`docs: 更新安装说明`。\n\n### 8.3 向 Rapid Layout 主仓库提交 Pull Request（PR）\n\n1. 打开你 fork 后的仓库页面（如 `https://github.com/你的用户名/RapidLayout`）。\n2. 若刚推送分支，页面上通常会出现 **Compare & pull request**，点击即可；否则在 **Branches** 里选择你刚推送的分支，再点 **New pull request**。\n3. 确认 **base 仓库** 为 `RapidAI/RapidLayout`、**base 分支** 为 `main`（或仓库默认主分支），**head 仓库** 为你的 fork、**head 分支** 为你的分支（如 `fix/xxx`）。\n4. 填写 PR 标题和说明：\n   - 标题：简要概括修改内容（如「Fix: 修复 xxx 问题」）。\n   - 说明中建议包含：\n     - 对应 Issue 编号（若有）：`Fixes #123` 或 `Related to #123`。\n     - 修改原因与主要改动。\n     - 如何验证：例如「在仓库根目录执行 `pytest tests/ -v` 通过」。\n5. 提交 PR，等待 maintainer 审查；根据反馈再在本地修改并推送同一分支，PR 会自动更新。\n\n---\n\n## 流程小结\n\n| 步骤 | 说明 |\n|------|------|\n| 1 | 克隆 Rapid Layout 源码 |\n| 2 | 配置虚拟环境并安装依赖与 pytest，可编辑安装 `pip install -e .` |\n| 3 | 安装 pre-commit（`pip install pre-commit`），在仓库根目录执行 `pre-commit install` |\n| 4 | 运行单元测试（`pytest tests/ -v`），确认基线通过 |\n| 5 | 复现问题或实现新功能 |\n| 6 | 编写 / 补充对应单元测试 |\n| 7 | 在仓库根目录运行全部测试并确认通过 |\n| 8 | Fork 主仓库到个人账号 |\n| 9 | 按约定式提交规范编写 commit，将修改提交并推送到个人 Fork 的对应分支 |\n| 10 | 在主仓库创建 PR，从个人 Fork 分支指向主仓库 main |\n\n---\n\n## 文档本地预览\n\n修改 `docs/` 下内容后，可使用 MkDocs 本地预览：\n\n```bash\npip install mkdocs mkdocs-material\nmkdocs serve\n```\n\n在浏览器中打开提示的地址（一般为 `http://127.0.0.1:8000`）即可查看效果。\n\n---\n\n## 其他说明\n\n- **代码风格**：项目采用 [black](https://github.com/psf/black)、autoflake 等规范，已通过 pre-commit 钩子在提交时自动检查；也可在仓库根目录执行 `pre-commit run --all-files` 手动跑一遍。\n- **文档**：更多安装与使用说明见 [Rapid Layout 文档](https://rapidai.github.io/RapidLayout/)。\n- **问题与讨论**：Bug 与功能建议可通过 [GitHub Issues](https://github.com/RapidAI/RapidLayout/issues) 反馈。\n- 提交 Issue 或 PR 时，请使用清晰、简洁的标题与描述，便于维护者处理。若你希望参与长期维护或较大功能开发，欢迎在 Issue 中说明，我们会与你沟通协作方式。\n\n再次感谢你的贡献！\n"
  },
  {
    "path": "docs/doc_whl_rapid_layout.md",
    "content": "See [link](https://github.com/RapidAI/RapidLayout) for details.\n"
  },
  {
    "path": "docs/hooks/change_copyright.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom datetime import datetime\n\n\ndef on_config(config, **kwargs):\n    config.copyright = f\"Copyright &copy; {datetime.now().year} Maintained by SWHL.\"\n"
  },
  {
    "path": "docs/hooks/expiry.py",
    "content": "import re\nfrom datetime import datetime\n\n\ndef on_page_context(context, page, config, nav):\n    expiry_days = config.get(\"extra\", {}).get(\"expiry_days\", 365)\n\n    def compute_expiry(meta):\n        revision = (\n            meta.get(\"git_revision_date_localized\")\n            or meta.get(\"git_creation_date_localized\")\n            or meta.get(\"revision_date\")\n        )\n        is_expired = False\n        last_update = None\n        if revision:\n            m = re.search(r\"(\\d{4}-\\d{2}-\\d{2})\", str(revision))\n            if m:\n                last_update = m.group(1)\n                try:\n                    dt = datetime.strptime(last_update, \"%Y-%m-%d\")\n                    if (datetime.now() - dt).days > expiry_days:\n                        is_expired = True\n                except Exception:\n                    # 无法解析日期时，保持不显示过期提示\n                    pass\n        return is_expired, last_update\n\n    page.is_expired, page.last_update = compute_expiry(page.meta)\n    context[\"is_expired\"] = page.is_expired\n    context[\"last_update\"] = page.last_update\n    context[\"expiry_days\"] = expiry_days\n\n    return context\n"
  },
  {
    "path": "docs/hooks/link.py",
    "content": "import fnmatch\nimport re\n\n\ndef on_page_markdown(markdown, page, config, files):\n    \"\"\"\n    将 'issue #数字'、'PR #数字'、'commit 哈希' 替换为 GitHub 链接\n    （忽略代码块和行内代码，只在指定页面生效，支持通配符）\n    \"\"\"\n\n    repo_url = config.get(\"repo_url\", \"\").rstrip(\"/\")\n    if not repo_url:\n        return markdown\n\n    # 页面白名单，支持通配符\n    allowed_pages = config.get(\"link_pages\", [])\n    page_src = page.file.src_path  # 相对于 docs/ 的路径\n    if allowed_pages:\n        matched = any(fnmatch.fnmatch(page_src, pattern) for pattern in allowed_pages)\n        if not matched:\n            return markdown\n\n    # 保存代码块和行内代码\n    placeholders = {}\n\n    def store_placeholder(match):\n        key = f\"__PLACEHOLDER_{len(placeholders)}__\"\n        placeholders[key] = match.group(0)\n        return key\n\n    # 提取代码块（```...``` 或 ~~~...~~~）\n    markdown = re.sub(r\"```.*?```\", store_placeholder, markdown, flags=re.DOTALL)\n    markdown = re.sub(r\"~~~.*?~~~\", store_placeholder, markdown, flags=re.DOTALL)\n    # 提取行内代码（`...`）\n    markdown = re.sub(r\"`.*?`\", store_placeholder, markdown)\n\n    # --- issue 替换 ---\n    # 支持 issue#123 / issue: #123 / issue #123\n    def issue_replacer(match):\n        num = match.group(1)\n        return f\"issue [#{num}]({repo_url}/issues/{num})\"\n\n    markdown = re.sub(r\"(?i)issue\\s*[:#]?\\s*#?(\\d+)\", issue_replacer, markdown)\n\n    # --- PR 替换 ---\n    def pr_replacer(match):\n        num = match.group(1)\n        return f\"PR [#{num}]({repo_url}/pull/{num})\"\n\n    markdown = re.sub(r\"(?i)PR\\s*[:#]?\\s*#?(\\d+)\", pr_replacer, markdown)\n\n    # --- commit 替换 ---\n    def commit_replacer(match):\n        sha = match.group(1)\n        short_sha = sha[:7]\n        return f\"commit [{short_sha}]({repo_url}/commit/{sha})\"\n\n    markdown = re.sub(r\"(?i)commit\\s+([0-9a-f]{6,40})\", commit_replacer, markdown)\n\n    # 还原代码块和行内代码\n    for key, value in placeholders.items():\n        markdown = markdown.replace(key, value)\n\n    return markdown\n"
  },
  {
    "path": "docs/index.md",
    "content": "---\ncomments: true\nhide:\n  - navigation\n  - toc\n---\n\n<div align=\"center\">\n  <div align=\"center\">\n    <h1><b>Rapid <img src=\"https://cdn.jsdelivr.net/gh/twitter/twemoji@latest/assets/svg/1f4c4.svg\" width=\"28\" height=\"28\" alt=\"📄\" style=\"vertical-align: middle\"> Layout</b></h1>\n  </div>\n\n<a href=\"https://huggingface.co/spaces/RapidAI/RapidLayoutv1\" target=\"_blank\"><img src=\"https://img.shields.io/badge/%F0%9F%A4%97-Hugging Face Demo-blue\"></a>\n<a href=\"\"><img src=\"https://img.shields.io/badge/Python->=3.6-aff.svg\"></a>\n<a href=\"\"><img src=\"https://img.shields.io/badge/OS-Linux%2C%20Win%2C%20Mac-pink.svg\"></a>\n<a href=\"https://pypi.org/project/rapid-layout/\"><img alt=\"PyPI\" src=\"https://img.shields.io/pypi/v/rapid-layout\"></a>\n<a href=\"https://pepy.tech/project/rapid-layout\"><img src=\"https://static.pepy.tech/personalized-badge/rapid-layout?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads\"></a>\n<a href=\"https://semver.org/\"><img alt=\"SemVer2.0\" src=\"https://img.shields.io/badge/SemVer-2.0-brightgreen\"></a>\n<a href=\"https://github.com/psf/black\"><img src=\"https://img.shields.io/badge/code%20style-black-000000.svg\"></a>\n\n</div>\n\n### 简介\n\n该项目主要是汇集全网开源的版面分析的项目，具体来说，就是分析给定的文档类别图像（论文截图、研报等），定位其中类别和位置，如标题、段落、表格和图片等各个部分。\n\n### TODO\n\n- [ ] [PP-DocLayout](https://github.com/PaddlePaddle/PaddleX/blob/release/3.0-rc/docs/module_usage/tutorials/ocr_modules/layout_detection.md) 整理\n"
  },
  {
    "path": "docs/install_usage/how_to_use_other_engine.md",
    "content": "---\ncomments: true\nhide:\n#   - navigation\n  - toc\n---\n\n## 引言\n\n版面分析支持多种推理引擎与设备：\n\n- **ONNX Runtime**：默认引擎，支持 CPU / CUDA / DirectML / CANN，需按需安装对应包。\n- **OpenVINO**：可选，`pip install openvino` 后通过 `engine_type=EngineType.OPENVINO` 使用。\n\n默认依赖为 CPU 版 `onnxruntime`；使用 GPU 推理需手动安装 `onnxruntime-gpu`。详细使用和评测可参见 [AI Studio](https://aistudio.baidu.com/projectdetail/8094594)。\n\n## 使用 ONNX Runtime + GPU (CUDA)\n\n```bash\npip install rapid_layout\n# 请确保 onnxruntime-gpu 与当前 GPU/CUDA 版本对应\n# 参见 https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements\npip install onnxruntime-gpu\n```\n\n```python linenums=\"1\"\nfrom rapid_layout import EngineType, ModelType, RapidLayout, RapidLayoutInput\n\ncfg = RapidLayoutInput(\n    model_type=ModelType.PP_LAYOUT_CDLA,\n    engine_type=EngineType.ONNXRUNTIME,\n    engine_cfg={\"use_cuda\": True, \"cuda_ep_cfg\": {\"device_id\": 0}},\n)\nlayout_engine = RapidLayout(cfg=cfg)\n\nimg_path = \"https://raw.githubusercontent.com/RapidAI/RapidLayout/718b60e927ab893c2fad67c98f753b2105a6f421/tests/test_files/layout.jpg\"\nresults = layout_engine(img_path)\nprint(results)\nresults.vis(\"layout_res.png\")\n```\n\n多卡时可通过 `cuda_ep_cfg.device_id` 指定卡号（与 [engine_cfg.yaml](https://github.com/RapidAI/RapidLayout/blob/main/rapid_layout/configs/engine_cfg.yaml) 中 `cuda_ep_cfg.device_id` 一致）。\n\n## 使用 NPU (CANN)\n\n详细配置参数参见：[engine_cfg.yaml](https://github.com/RapidAI/RapidLayout/blob/main/rapid_layout/configs/engine_cfg.yaml)\n\n```python linenums=\"1\"\nfrom rapid_layout import EngineType, ModelType, RapidLayout, RapidLayoutInput\n\ncfg = RapidLayoutInput(\n    model_type=ModelType.PP_LAYOUT_CDLA,\n    engine_type=EngineType.ONNXRUNTIME,\n    engine_cfg={\"use_cann\": True, \"cann_ep_cfg\": {\"device_id\": 0}},\n)\nlayout_engine = RapidLayout(cfg=cfg)\n\nimg_path = \"https://raw.githubusercontent.com/RapidAI/RapidLayout/718b60e927ab893c2fad67c98f753b2105a6f421/tests/test_files/layout.jpg\"\nresults = layout_engine(img_path)\nprint(results)\nresults.vis(\"layout_res.png\")\n```\n\n## 使用 OpenVINO\n\n```bash\npip install rapid-layout onnxruntime openvino\n```\n\n```python linenums=\"1\"\nfrom rapid_layout import EngineType, ModelType, RapidLayout\n\nlayout_engine = RapidLayout(\n    model_type=ModelType.PP_LAYOUT_CDLA,\n    engine_type=EngineType.OPENVINO,\n)\nimg_path = \"https://raw.githubusercontent.com/RapidAI/RapidLayout/718b60e927ab893c2fad67c98f753b2105a6f421/tests/test_files/layout.jpg\"\nresults = layout_engine(img_path)\nprint(results)\nresults.vis(\"layout_res.png\")\n```\n\nOpenVINO 设备与线程等配置见 [engine_cfg.yaml](https://github.com/RapidAI/RapidLayout/blob/main/rapid_layout/configs/engine_cfg.yaml) 中 `openvino` 段。\n"
  },
  {
    "path": "docs/install_usage/installation.md",
    "content": "---\ncomments: true\nhide:\n#   - navigation\n  - toc\n---\n\n由于模型较小，预先将中文版面分析模型（`layout_cdla.onnx`）打包进了 whl 包内，若仅做中文版面分析，可直接安装使用：\n\n```bash\npip install rapid-layout onnxruntime\n```\n"
  },
  {
    "path": "docs/install_usage/usage.md",
    "content": "---\ncomments: true\nhide:\n#   - navigation\n  - toc\n---\n\n## Python 脚本运行\n\n**默认用法**（默认模型 `pp_layout_cdla` + `onnxruntime` 引擎）：\n\n```python\nfrom rapid_layout import RapidLayout\n\nlayout_engine = RapidLayout()\nimg_path = \"https://raw.githubusercontent.com/RapidAI/RapidLayout/718b60e927ab893c2fad67c98f753b2105a6f421/tests/test_files/layout.jpg\"\nresults = layout_engine(img_path)\nprint(results)\nresults.vis(\"layout_res.png\")\n```\n\n**指定模型与引擎**（关键字参数）：\n\n```python\nfrom rapid_layout import EngineType, ModelType, RapidLayout\n\nlayout_engine = RapidLayout(\n    model_type=ModelType.PP_LAYOUT_CDLA,\n    engine_type=EngineType.ONNXRUNTIME,\n    conf_thresh=0.5,\n    iou_thresh=0.5,\n)\nresults = layout_engine(img_path)\nprint(results)\nresults.vis(\"layout_res.png\")\n```\n\n**使用配置对象**（与上方等价）：\n\n```python\nfrom rapid_layout import EngineType, ModelType, RapidLayout, RapidLayoutInput\n\ncfg = RapidLayoutInput(\n    model_type=ModelType.PP_LAYOUT_CDLA,\n    engine_type=EngineType.ONNXRUNTIME,\n    conf_thresh=0.5,\n    iou_thresh=0.5,\n)\nlayout_engine = RapidLayout(cfg=cfg)\nresults = layout_engine(img_path)\nprint(results)\nresults.vis(\"layout_res.png\")\n```\n\n## 终端运行\n\n```bash\nrapid_layout test_images/layout.png\nrapid_layout test_images/layout.png -m pp_layout_cdla --conf_thresh 0.5 --iou_thresh 0.5\n```\n\n## 构造函数参数（RapidLayout / RapidLayoutInput）\n\n| 参数 | 类型 | 默认值 | 说明 |\n|------|------|--------|------|\n| `model_type` | ModelType / str | `pp_layout_cdla` | 模型类型 |\n| `model_dir_or_path` | str / Path / None | None | 模型路径，不传则按 model_type 解析 |\n| `engine_type` | EngineType / str | `onnxruntime` | 推理引擎：`onnxruntime`、`openvino` |\n| `engine_cfg` | dict | `{}` | 引擎额外配置 |\n| `conf_thresh` | float | 0.5 | 框置信度阈值 [0, 1] |\n| `iou_thresh` | float | 0.5 | IoU 阈值 [0, 1] |\n\n## 可视化结果\n\n<div align=\"center\">\n    <img src=\"https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/master/resources/images/layout_vis.jpg\" width=\"80%\">\n</div>\n"
  },
  {
    "path": "docs/models.md",
    "content": "---\ncomments: true\ntitle: 模型列表\nhide:\n  - navigation\n  - toc\n---\n\n!!! tip\n\n    由于不同场景下的版面差异较大，现阶段不存在一个模型可以搞定所有场景。如果实际业务需要，以下模型效果不好的话，建议构建自己的训练集微调。\n\n| `model_type` | 版面类型 | 支持类别 |\n| :------ | :----- | :----- |\n|`pp_doc_layoutv3 (rapid_layout>=1.2.0)`|文档|`['abstract', 'algorithm', 'aside_text', 'chart', 'content', 'display_formula', 'doc_title', 'figure_title', 'footer', 'footer_image', 'footnote', 'formula_number', 'header', 'header_image', 'image', 'inline_formula', 'number', 'paragraph_title', 'reference', 'reference_content', 'seal', 'table', 'text', 'vertical_text', 'vision_footnote']`|\n|`pp_doc_layoutv2 (rapid_layout>=1.1.0)`|文档|`['abstract', 'algorithm', 'aside_text', 'chart', 'content', 'display_formula', 'doc_title', 'figure_title', 'footer', 'footer_image', 'footnote', 'formula_number', 'header', 'header_image', 'image', 'inline_formula', 'number', 'paragraph_title', 'reference', 'reference_content', 'seal', 'table', 'text', 'vertical_text', 'vision_footnote']`|\n||||\n| `pp_layout_table` | 表格 | `[\"table\"]` |\n| `pp_layout_publaynet` | 英文 | `[\"text\", \"title\", \"list\", \"table\", \"figure\"]` |\n| `pp_layout_cdla` | 中文 | `['text', 'title', 'figure', 'figure_caption', 'table', 'table_caption', 'header', 'footer', 'reference', 'equation']` |\n||||\n| `yolov8n_layout_paper` | 论文 | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` |\n| `yolov8n_layout_report` | 研报 | `['Text', 'Title', 'Header', 'Footer', 'Figure', 'Table', 'Toc', 'Figure caption', 'Table caption']` |\n| `yolov8n_layout_publaynet` | 英文 | `[\"Text\", \"Title\", \"List\", \"Table\", \"Figure\"]` |\n| `yolov8n_layout_general6` | 通用 | `[\"Text\", \"Title\", \"Figure\", \"Table\", \"Caption\", \"Equation\"]` |\n||||\n| `doclayout_docstructbench` | 通用 | `['title', 'plain text', 'abandon', 'figure', 'figure_caption', 'table', 'table_caption', 'table_footnote', 'isolate_formula', 'formula_caption']` |\n| `doclayout_d4la` | 通用 | `['DocTitle', 'ParaTitle', 'ParaText', 'ListText', 'RegionTitle', 'Date', 'LetterHead', 'LetterDear', 'LetterSign', 'Question', 'OtherText', 'RegionKV', 'RegionList', 'Abstract', 'Author', 'TableName', 'Table', 'Figure', 'FigureName', 'Equation', 'Reference', 'Footer', 'PageHeader', 'PageFooter', 'Number', 'Catalog', 'PageNumber']` |\n| `doclayout_docsynth` | 通用 | `['Caption', 'Footnote', 'Formula', 'List-item', 'Page-footer', 'Page-header', 'Picture', 'Section-header', 'Table', 'Text', 'Title']` |\n\n## 模型来源\n\n**🔥 PP-DocLayoutV3**: [PP-DocLayoutV2](https://huggingface.co/PaddlePaddle/PP-DocLayoutV3)\n\n**🔥 PP-DocLayoutV2**: [PP-DocLayoutV2](https://huggingface.co/PaddlePaddle/PP-DocLayoutV2)\n\n**PP 模型**：[PaddleOCR 版面分析](https://github.com/PaddlePaddle/PaddleOCR/blob/133d67f27dc8a241d6b2e30a9f047a0fb75bebbe/ppstructure/layout/README_ch.md)\n\n**yolov8n 系列**：[360LayoutAnalysis](https://github.com/360AILAB-NLP/360LayoutAnalysis)\n\n**doclayout_yolo（推荐）**：[DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)，目前较为优秀的开源版面分析模型，提供基于不同训练集的三个模型：\n\n- `doclayout_docstructbench`：[Hugging Face](https://huggingface.co/juliozhao/DocLayout-YOLO-DocStructBench/tree/main)\n- `doclayout_d4la`：[Hugging Face](https://huggingface.co/juliozhao/DocLayout-YOLO-D4LA-Docsynth300K_pretrained/blob/main/doclayout_yolo_d4la_imgsz1600_docsynth_pretrain.pt)\n- `doclayout_docsynth`：[Hugging Face](https://huggingface.co/juliozhao/DocLayout-YOLO-DocLayNet-Docsynth300K_pretrained/tree/main)\n\n## 模型下载\n\n模型均已经托管在 [魔搭平台](https://www.modelscope.cn/models/RapidAI/RapidLayout/files)。\n"
  },
  {
    "path": "docs/quickstart.md",
    "content": "---\ncomments: true\ntitle: 快速开始\nhide:\n  - navigation\n  - toc\n---\n\n## 安装\n\n```bash\npip install rapid-layout onnxruntime\n```\n\n如需使用 OpenVINO 引擎，请额外安装：`pip install openvino`。\n\n## 运行\n\n=== \"Python 脚本（默认）\"\n\n    不传参数时使用默认模型 `pp_layout_cdla` 与 `onnxruntime` 引擎：\n\n    ```python linenums=\"1\"\n    from rapid_layout import RapidLayout\n\n    layout_engine = RapidLayout()\n    img_path = \"https://raw.githubusercontent.com/RapidAI/RapidLayout/718b60e927ab893c2fad67c98f753b2105a6f421/tests/test_files/layout.jpg\"\n    results = layout_engine(img_path)\n    print(results)\n    results.vis(\"layout_res.png\")\n    ```\n\n=== \"Python 脚本（指定模型与引擎）\"\n\n    通过关键字参数指定 `model_type`、`engine_type`、`conf_thresh` 等：\n\n    ```python linenums=\"1\"\n    from rapid_layout import EngineType, ModelType, RapidLayout\n\n    layout_engine = RapidLayout(\n        model_type=ModelType.PP_LAYOUT_CDLA,\n        engine_type=EngineType.ONNXRUNTIME,\n        conf_thresh=0.5,\n    )\n    img_path = \"https://raw.githubusercontent.com/RapidAI/RapidLayout/718b60e927ab893c2fad67c98f753b2105a6f421/tests/test_files/layout.jpg\"\n    results = layout_engine(img_path)\n    print(results)\n    results.vis(\"layout_res.png\")\n    ```\n\n=== \"Python 脚本（使用配置对象）\"\n\n    ```python linenums=\"1\"\n    from rapid_layout import EngineType, ModelType, RapidLayout, RapidLayoutInput\n\n    cfg = RapidLayoutInput(\n        model_type=ModelType.PP_LAYOUT_CDLA,\n        engine_type=EngineType.ONNXRUNTIME,\n    )\n    layout_engine = RapidLayout(cfg=cfg)\n    img_path = \"https://raw.githubusercontent.com/RapidAI/RapidLayout/718b60e927ab893c2fad67c98f753b2105a6f421/tests/test_files/layout.jpg\"\n    results = layout_engine(img_path)\n    print(results)\n    results.vis(\"layout_res.png\")\n    ```\n\n=== \"终端运行\"\n\n    ```bash linenums=\"1\"\n    rapid_layout test_images/layout.png\n    rapid_layout test_images/layout.png -m pp_layout_cdla --conf_thresh 0.5\n    ```\n\n## 构造函数参数说明\n\n`RapidLayout(cfg=None, **kwargs)` 支持以下关键字参数（与 `RapidLayoutInput` 一致）：\n\n| 参数 | 类型 | 默认值 | 说明 |\n|------|------|--------|------|\n| `model_type` | `ModelType` 或 str | `pp_layout_cdla` | 模型类型，见 [模型列表](models.md) |\n| `model_dir_or_path` | str / Path / None | None | 模型路径，不传则按 `model_type` 自动解析 |\n| `engine_type` | `EngineType` 或 str | `onnxruntime` | 推理引擎：`onnxruntime`、`openvino` |\n| `engine_cfg` | dict | `{}` | 引擎额外配置，见 [engine_cfg.yaml](https://github.com/RapidAI/RapidLayout/blob/main/rapid_layout/configs/engine_cfg.yaml) |\n| `conf_thresh` | float | 0.5 | 框置信度阈值 [0, 1] |\n| `iou_thresh` | float | 0.5 | IoU 阈值 [0, 1] |\n\n传入 `cfg` 时，`kwargs` 会覆盖同名字段。\n\n## 可视化结果\n\n<div align=\"center\">\n    <img src=\"https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/master/resources/images/layout_vis.jpg\" width=\"60%\">\n</div>\n"
  },
  {
    "path": "docs/stylesheets/extra.css",
    "content": ":root {\n  --admonition-border-left-width: 0.2rem;\n  --base-border-radius: 1rem;\n  --md-text-font: \"LXGW WenKai Screen\";\n  --md-code-font: \"consolas, 'Courier New', monospace\";\n}\n\n\n/*亮色样式*/\n[data-md-color-scheme=\"default\"] {\n    --md-primary-fg-color: rgba(255, 255, 255, 0.7);\n    --md-header-bg-color: rgba(255, 255, 255, 0.7);\n    --md-typeset-a-color: steelblue;\n    --md-footer-bg-color: #f6f6f6;\n    --md-footer-bg-color--dark: #f6f6f6;\n    --md-footer-fg-color: #222;\n    --md-footer-fg-color--light: #505050;\n    --md-footer-fg-color--lighter: #777777;\n    --md-code-hl-comment-color: #999999;\n}\n\n/*暗色样式*/\n[data-md-color-scheme=\"slate\"] {\n    --md-primary-fg-color: rgba(17, 16, 17, 0.7);\n    --md-header-bg-color: rgba(17, 16, 17, 0.7);\n    --md-typeset-a-color: royalblue;\n    --md-footer-bg-color: #101010;\n    --md-footer-bg-color--dark: #101010;\n    --md-code-hl-comment-color: #777777;\n}\n\n\n/* 卡片圆角与悬浮阴影 */\n.md-typeset .grid.cards>ul>li,\n.md-typeset .md-button,\n.md-typeset table:not([class]) {\n    border-radius: var(--base-border-radius);\n}\n\n.md-typeset .grid.cards>ul>li:hover {\n    box-shadow: var(--card-hover-shadow);\n}\n\n/* 页脚社交图标高度 */\n.md-social__link svg {\n    max-height: 1rem;\n}\n\n/* 搜索框及下拉结果圆角 */\n.md-search__form {\n    border-radius: var(--base-border-radius);\n}\n\n[data-md-toggle=\"search\"]:checked~.md-header .md-search__form {\n    border-top-right-radius: var(--base-border-radius);\n    border-top-left-radius: var(--base-border-radius);\n}\n\n[dir=\"ltr\"] .md-search__output {\n    border-bottom-right-radius: var(--base-border-radius);\n    border-bottom-left-radius: var(--base-border-radius);\n}\n\n.banner{\n  font-family: var(--font-family);\n}\n\n/* 可选：如需恢复代码块、警告框等样式，取消注释即可 */\n/*\n.highlight span.filename {\n  border-bottom: none;\n  border-radius: var(--base-border-radius);\n  display: inline;\n  font-family: var(--md-code-font-family);\n  border-bottom-left-radius: 0;\n  border-bottom-right-radius: 0;\n  margin-bottom: 5px;\n  text-align: center;\n}\n.highlight span.filename + pre > code,\n.md-typeset pre > code {\n  border-radius: var(--base-border-radius);\n  border-top-left-radius: 0;\n}\n.md-typeset .admonition {\n  border-width: 0px;\n  border-left-width: var(--admonition-border-left-width);\n}\n[dir=\"ltr\"] .md-typeset blockquote {\n  border-radius: 0.2rem;\n  border-left-width: var(--admonition-border-left-width);\n}\n*/\n\n/* 可选：博客相关样式，按需启用 */\n\n/* .md-post--excerpt {\n  background-color: rgba(68,138,255,.1);\n  box-shadow: 0 0 0 1rem rgba(68,138,255,.1);\n  border-radius: var(--base-border-radius);\n}\n.md-post--excerpt .md-post__header {\n  justify-content: left;\n}\n.md-post--excerpt .md-post__content > h2,\n.md-post__action {\n  text-align: left;\n} */\n\n\n/* 让所有admonition（包括!!! tip）圆角化且更自然 */\n.md-typeset .admonition,\n.md-typeset details {\n    border-radius: 1.5em;\n    box-shadow: 0 2px 12px 0 rgba(60, 60, 60, 0.07);\n    transition: border-radius 0.4s cubic-bezier(.4, 2, .6, 1), box-shadow 0.3s;\n    overflow: hidden;\n}\n\n/*图像圆角*/\nimg.img1 {\n  border-radius: 25px;\n\n}"
  },
  {
    "path": "mkdocs.yml",
    "content": "site_name: RapidLayout 文档\nsite_url: https://rapidai.github.io/RapidLayout/\nsite_author: RapidAI\nsite_description:\n  Analysis of Chinese and English document layouts.\n\nrepo_name: RapidAI/RapidLayout\nrepo_url: https://github.com/RapidAI/RapidLayout\n\ncopyright: Copyright &copy; 2026 Maintained by RapidAI.\n\nedit_uri: https://github.com/RapidAI/RapidLayout/blob/main/docs\n\ntheme:\n  name: material\n  favicon: ./static/logo.svg\n  language: zh\n  custom_dir: overrides\n  features:\n    - announce.dismiss\n    - content.tooltips\n    - content.code.copy\n    - content.tabs.link\n    - content.action.edit  # 显示编辑按钮\n    - content.action.view  # 显示查看源码按钮\n    - content.footnote.tooltips\n    - navigation.expand  # 默认打开所有的子节\n    - navigation.tabs # 顶级索引被作为tab\n    - navigation.tabs.sticky # tab始终可见\n    - navigation.top # 开启顶部导航栏\n    - navigation.tracking # 导航栏跟踪\n    - navigation.footer\n    - navigation.indexes\n    - search.highlight # 搜索高亮\n    - search.share # 搜索分享\n    - search.suggest # 搜索建议\n    - toc.follow # 目录跟踪-页面右侧的小目录\n\n  palette:\n    - media: \"(prefers-color-scheme)\" # 系统主题\n      toggle:\n        icon: material/theme-light-dark\n        name: 系统主题\n    - media: \"(prefers-color-scheme: light)\" # 亮色主题\n      scheme: default\n      primary: white\n      accent: light blue\n      toggle:\n        icon: material/weather-sunny\n        name: 明亮主题\n    - media: \"(prefers-color-scheme: dark)\" # 暗色主题\n      scheme: slate\n      primary: black\n      accent: yellow\n      toggle:\n        icon: material/weather-night\n        name: 暗黑主题\n\n  icon:\n    logo: material/file-document-multiple\n    previous: fontawesome/solid/angle-left\n    next: fontawesome/solid/angle-right\n    repo: fontawesome/brands/github\n    edit: material/pencil\n    view: material/eye\n    tag:\n      default-tag: fontawesome/solid/tag\n      hardware-tag: fontawesome/solid/microchip\n      software-tag: fontawesome/solid/laptop-code\n\nplugins:\n  - blog:\n      archive: false\n      post_date_format: short\n      blog_toc: true\n      categories_toc: true\n      pagination: false\n  - search:\n      separator: '[\\s\\u200b\\-_,:!=\\[\\]()\"`/]+|\\.(?!\\d)|&[lg]t;|(?!\\b)(?=[A-Z][a-z])'\n  - git-committers:\n      repository: RapidAI/RapidLayout\n      branch: main\n      token: !!python/object/apply:os.getenv [\"MKDOCS_GIT_COMMITTERS_APIKEY\"]\n  - git-revision-date-localized:\n      enable_creation_date: true\n      timezone: Asia/Shanghai\n      type: iso_datetime\n\n\nhooks:\n  - docs/hooks/expiry.py\n  - docs/hooks/change_copyright.py\n  - docs/hooks/link.py\n\nextra_css:\n  - stylesheets/extra.css\n  - https://cdn.jsdelivr.net/npm/lxgw-wenkai-screen-web/style.css\n\nextra:\n  social:\n    - icon: fontawesome/brands/github\n      link: https://github.com/RapidAI\n    - icon: fontawesome/brands/weixin\n      link: https://raw.githubusercontent.com/RapidAI/.github/main/assets/RapidAI_poster_compose.png\n    - icon: fontawesome/brands/python\n      link: https://pypi.org/project/rapid-layout/\n  version:\n    provider: mike\n  expiry_days: 180\n\nmarkdown_extensions:\n  - abbr\n  - attr_list\n  - pymdownx.snippets\n  - pymdownx.critic\n  - pymdownx.caret\n  - pymdownx.keys\n  - pymdownx.mark\n  - pymdownx.tilde\n  - pymdownx.details\n  - footnotes\n  - def_list\n  - md_in_html\n  - tables\n  - pymdownx.tasklist:\n      custom_checkbox: true\n  - toc:\n      permalink: true\n  - pymdownx.betterem:\n      smart_enable: all\n  - pymdownx.superfences:\n      custom_fences:\n        - name: mermaid\n          class: mermaid\n          format: !!python/name:pymdownx.superfences.fence_code_format\n  - pymdownx.emoji:\n      emoji_index: !!python/name:material.extensions.emoji.twemoji\n      emoji_generator: !!python/name:material.extensions.emoji.to_svg\n  - pymdownx.highlight:\n        anchor_linenums: true\n        line_spans: __span\n        pygments_lang_class: true\n  - pymdownx.inlinehilite\n  - pymdownx.tabbed:\n      alternate_style: true\n  - admonition\n\nnav:\n  - 概览: index.md\n  - 快速开始: quickstart.md\n  - 模型列表: models.md\n  - 安装及使用:\n    - 安装: install_usage/installation.md\n    - 使用: install_usage/usage.md\n    - 使用其他推理引擎: install_usage/how_to_use_other_engine.md\n  - 贡献指南: contributing.md\n  - 博客:\n    - blog/index.md\n\n"
  },
  {
    "path": "overrides/404.html",
    "content": "<!-- overrides/404.html -->\n{% extends \"main.html\" %}\n\n{% block content %}\n<div class=\"md-content md-grid\" style=\"margin-top: 2rem;\">\n    <div class=\"md-main__inner md-grid\">\n        <div class=\"md-content__inner md-typeset\">\n            <h1>页面未找到</h1>\n            <p>\n                抱歉，您访问的页面 <code>{{ page.url | url }}</code> 不存在。\n            </p>\n            <p>\n                可能的原因：\n            </p>\n            <ul>\n                <li>链接已失效或拼写错误</li>\n                <li>该页面已在当前版本中被移除</li>\n                <li>您正在访问旧版文档中的链接</li>\n            </ul>\n\n            <h2>接下来您可以：</h2>\n            <ul>\n                <li>使用上方搜索框查找相关内容</li>\n                <li><a href=\"{{ config.site_url | default('/') }}\">返回文档首页</a></li>\n                <li>\n                    <a href=\"https://github.com/RapidAI/RapidLayout/issues/new?title=死链报告：%20{{ page.url | urlencode }}&body=访问了不存在的页面：%20{{ page.url | urlencode }}%0A%0A**期望内容**：%0A%0A**浏览器信息**：%0A\"\n                        target=\"_blank\" rel=\"noopener\">\n                        🐞 报告此死链（点击跳转到 GitHub Issues）\n                    </a>\n                </li>\n            </ul>\n\n            <p style=\"font-size: 0.9em; color: var(--md-default-fg-color--light);\">\n                注：该页面返回 HTTP 404 状态，搜索引擎将不会索引此地址。\n            </p>\n        </div>\n    </div>\n</div>\n{% endblock %}\n"
  },
  {
    "path": "overrides/main.html",
    "content": "{% extends \"base.html\" %}\n\n{% block announce %}\n<p class=\"banner\">🎉 支持PP-DocLayoutV2/V3系列模型 → <a\n        href=\"https://rapidai.github.io/RapidLayout/latest/blog/2026/02/10/support-PP-DocLayoutv2-v3/\" target=\"_blank\">link</a>\n</p>\n{% endblock %}\n\n\n{% block outdated %}\nYou're not viewing the latest version.\n<a href=\"{{ '../' ~ base_url }}\">\n    <strong>Click here to go to latest.</strong>\n</a>\n{% endblock %}\n"
  },
  {
    "path": "overrides/partials/comments.html",
    "content": "{% if page.meta.comments %}\n<h2 id=\"__comments\">{{ lang.t(\"meta.comments\") }}</h2>\n<!-- Insert generated snippet here -->\n\n<script src=\"https://giscus.app/client.js\"\n    data-repo=\"RapidAI/RapidLayout\"\n    data-repo-id=\"R_kgDOMLOtcQ\"\n    data-category=\"General\"\n    data-category-id=\"DIC_kwDOMLOtcc4CgMBG\"\n    data-mapping=\"title\"\n    data-strict=\"0\"\n    data-reactions-enabled=\"1\"\n    data-emit-metadata=\"0\"\n    data-input-position=\"top\"\n    data-theme=\"https://cdn.jsdelivr.net/gh/L33Z22L11/giscus-theme/lxgw-wenkai.css\"\n    data-lang=\"zh-CN\"\n    data-loading=\"lazy\" crossorigin=\"anonymous\" async>\n    </script>\n\n\n<!-- Synchronize Giscus theme with palette -->\n<script>\n    var giscus = document.querySelector(\"script[src*=giscus]\")\n\n    // Set palette on initial load\n    var palette = __md_get(\"__palette\")\n    if (palette && typeof palette.color === \"object\") {\n        var theme = palette.color.scheme === \"slate\"\n            ? \"transparent_dark\"\n            : \"light\"\n\n        // Instruct Giscus to set theme\n        giscus.setAttribute(\"data-theme\", theme)\n    }\n\n    // Register event handlers after documented loaded\n    document.addEventListener(\"DOMContentLoaded\", function () {\n        var ref = document.querySelector(\"[data-md-component=palette]\")\n        ref.addEventListener(\"change\", function () {\n            var palette = __md_get(\"__palette\")\n            if (palette && typeof palette.color === \"object\") {\n                var theme = palette.color.scheme === \"slate\"\n                    ? \"transparent_dark\"\n                    : \"light\"\n\n                // Instruct Giscus to change theme\n                var frame = document.querySelector(\".giscus-frame\")\n                frame.contentWindow.postMessage(\n                    { giscus: { setConfig: { theme } } },\n                    \"https://giscus.app\"\n                )\n            }\n        })\n    })\n</script>\n{% endif %}\n"
  },
  {
    "path": "overrides/partials/content.html",
    "content": "<!--\n  Copyright (c) 2016-2025 Martin Donath <martin.donath@squidfunk.com>\n\n  Permission is hereby granted, free of charge, to any person obtaining a copy\n  of this software and associated documentation files (the \"Software\"), to\n  deal in the Software without restriction, including without limitation the\n  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n  sell copies of the Software, and to permit persons to whom the Software is\n  furnished to do so, subject to the following conditions:\n\n  The above copyright notice and this permission notice shall be included in\n  all copies or substantial portions of the Software.\n\n  THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n  FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE\n  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n  IN THE SOFTWARE.\n-->\n\n<!-- Tags -->\n{% include \"partials/tags.html\" %}\n\n<!-- Actions -->\n{% include \"partials/actions.html\" %}\n\n<!--\n  Hack: check whether the content contains a h1 headline. If it doesn't, the\n  page title (or respectively site name) is used as the main headline.\n-->\n{% if \"\\u003ch1\" not in page.content %}\n<h1>{{ page.title | d(config.site_name, true)}}</h1>\n{% endif %}\n\n{% include \"partials/expired_notice.html\" %}\n\n<!-- Page content -->\n{{ page.content }}\n\n<!-- Source file information -->\n{% include \"partials/source-file.html\" %}\n\n<!-- Was this page helpful? -->\n{% include \"partials/feedback.html\" %}\n\n<!-- Comment system -->\n{% include \"partials/comments.html\" %}\n"
  },
  {
    "path": "overrides/partials/expired_notice.html",
    "content": "{% if is_expired %}\n    <div class=\"admonition warning\">\n        <p class=\"admonition-title\">Warning</p>\n        <p>\n            本文档最后更新于 {{ last_update }}，\n            已超过 {{ expiry_days }} 天未更新，内容可能已经过时，阅读注意甄别。\n        </p>\n    </div>\n{% endif %}\n"
  },
  {
    "path": "rapid_layout/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom .main import RapidLayout\nfrom .utils.typings import EngineType, ModelType, RapidLayoutInput\n\n__all__ = ['RapidLayout', 'EngineType', 'ModelType', 'RapidLayoutInput']\n"
  },
  {
    "path": "rapid_layout/configs/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\n"
  },
  {
    "path": "rapid_layout/configs/default_models.yaml",
    "content": "pp_doc_layoutv3:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/pp_doc_layout/pp_doc_layoutv3.onnx\n  SHA256: 250dbad1dfb9e4983fab75e1bf5085cd56ec3f41d5c7d0f8623ec74856e7aa67\n\npp_doc_layoutv2:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/pp_doc_layout/pp_doc_layoutv2.onnx\n  SHA256: 0bd2ea0997fe0789f0300292291f8bbf897d890b44a9a3bd5be72afd6198aa90\n\npp_layout_cdla:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/pp_layout/layout_cdla.onnx\n  SHA256: 25b1f27ec56aa932a48f30cbd6293c358a156280f4b20b0a973bab210c39f62c\n\npp_layout_publaynet:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/pp_layout/layout_publaynet.onnx\n  SHA256: 958aa6dcef1cc1a542d0a513b5976a3d5edbcc37d76460ec1e9f126358e4d100\n\npp_layout_table:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/pp_layout/layout_table.onnx\n  SHA256: 5b07ba6df1d1889bed2877c9d7501235c6fb6e2212aca8f2f56f4b1b8d0e37b5\n\nyolov8n_layout_paper:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/360/yolov8n_layout_paper.onnx\n  SHA256: bc074c8d8fbe89e5d90c3e21b7e3b52f279c70fe210ae96d73b74141df64347c\n\nyolov8n_layout_report:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/360/yolov8n_layout_report.onnx\n  SHA256: 9d5ada6a69b5825eb255da2b82d2c8d11636a0adae801074d88892527b535980\n\nyolov8n_layout_publaynet:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/360/yolov8n_layout_publaynet.onnx\n  SHA256: 5304bf18e538312a1bd211eb2ad3283524dff956e5cbffcefb3ad294c6e3cba6\n\nyolov8n_layout_general6:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/360/yolov8n_layout_general6.onnx\n  SHA256: 927b6edcb268e896e6a170f7d78980591b408e04b3908f54d58eb69efd018c95\n\ndoclayout_docstructbench:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/doclayout/doclayout_yolo_docstructbench_imgsz1024.onnx\n  SHA256: 3b452baef10ecabd615491bc82cc4d49475fbc2cd7a8e535044f2c6bb28fb9fe\n\ndoclayout_d4la:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/doclayout/doclayout_yolo_d4la_imgsz1600_docsynth_pretrain.onnx\n  SHA256: 1c81715d45d5bee2e6b644f92563a9eaa5cb4cad3d4293f890f99c0862937e69\n\ndoclayout_docsynth:\n  model_dir_or_path: https://www.modelscope.cn/models/RapidAI/RapidLayout/resolve/v1.2.0/onnx/doclayout/doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.onnx\n  SHA256: 527e60cefc2801dec727dddbfa3a2bd225876a8c5505461c9d3b1193f50a7c84\n"
  },
  {
    "path": "rapid_layout/configs/engine_cfg.yaml",
    "content": "onnxruntime:\n    intra_op_num_threads: -1\n    inter_op_num_threads: -1\n    enable_cpu_mem_arena: false\n\n    cpu_ep_cfg:\n        arena_extend_strategy: \"kSameAsRequested\"\n\n    use_cuda: false\n    cuda_ep_cfg:\n        device_id: 0\n        arena_extend_strategy: \"kNextPowerOfTwo\"\n        cudnn_conv_algo_search: \"EXHAUSTIVE\"\n        do_copy_in_default_stream: true\n\n    use_dml: false\n    dm_ep_cfg: null\n\n    use_cann: false\n    cann_ep_cfg:\n        device_id: 0\n        arena_extend_strategy: \"kNextPowerOfTwo\"\n        npu_mem_limit:  21474836480 # 20 * 1024 * 1024 * 1024\n        op_select_impl_mode: \"high_performance\"\n        optypelist_for_implmode: \"Gelu\"\n        enable_cann_graph: true\n\nopenvino:\n    device: \"CPU\"\n    inference_num_threads: -1\n    performance_hint: \"LATENCY\"\n    performance_num_requests: -1\n    enable_cpu_pinning: null\n    num_streams: -1\n    enable_hyper_threading: null\n    scheduling_core_type: null\n"
  },
  {
    "path": "rapid_layout/inference_engine/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\n"
  },
  {
    "path": "rapid_layout/inference_engine/base.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Union\n\nimport numpy as np\nfrom omegaconf import DictConfig, OmegaConf\n\nfrom ..utils.logger import Logger\nfrom ..utils.typings import EngineType\nfrom ..utils.utils import import_package\n\nlogger = Logger(logger_name=__name__).get_log()\n\n\nclass InferSession(ABC):\n    cur_dir = Path(__file__).resolve().parent.parent\n    MODEL_URL_PATH = cur_dir / \"configs\" / \"default_models.yaml\"\n    ENGINE_CFG_PATH = cur_dir / \"configs\" / \"engine_cfg.yaml\"\n\n    model_info = OmegaConf.load(MODEL_URL_PATH)\n    DEFAULT_MODEL_PATH = cur_dir / \"models\"\n\n    engine_cfg = OmegaConf.load(ENGINE_CFG_PATH)\n\n    @abstractmethod\n    def __init__(self, config):\n        pass\n\n    @abstractmethod\n    def __call__(self, input_content: np.ndarray) -> np.ndarray:\n        pass\n\n    @staticmethod\n    def _verify_model(model_path: Union[str, Path, None]):\n        if model_path is None:\n            raise ValueError(\"model_path is None!\")\n\n        model_path = Path(model_path)\n        if not model_path.exists():\n            raise FileNotFoundError(f\"{model_path} does not exists.\")\n\n        if not model_path.is_file():\n            raise FileExistsError(f\"{model_path} is not a file.\")\n\n    @abstractmethod\n    def have_key(self, key: str = \"character\") -> bool:\n        pass\n\n    @property\n    def characters(self):\n        return self.get_character_list()\n\n    @abstractmethod\n    def get_character_list(self, key: str = \"character\") -> List[str]:\n        pass\n\n    @staticmethod\n    def update_params(cfg: DictConfig, params: Dict[str, Any]) -> DictConfig:\n        for k, v in params.items():\n            OmegaConf.update(cfg, k, v)\n        return cfg\n\n\ndef get_engine(engine_type: EngineType):\n    logger.info(\"Using engine_name: %s\", engine_type.value)\n\n    if engine_type == EngineType.ONNXRUNTIME:\n        if not import_package(engine_type.value):\n            raise ImportError(f\"{engine_type.value} is not installed.\")\n\n        from .onnxruntime import OrtInferSession\n\n        return OrtInferSession\n\n    elif engine_type == EngineType.OPENVINO:\n        if not import_package(engine_type.value):\n            raise ImportError(f\"{engine_type.value} is not installed.\")\n\n        from .openvino import OpenVINOInferSession\n\n        return OpenVINOInferSession\n\n    raise ValueError(f\"Unsupported engine: {engine_type.value}\")\n"
  },
  {
    "path": "rapid_layout/inference_engine/onnxruntime/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom .main import OrtInferSession\n"
  },
  {
    "path": "rapid_layout/inference_engine/onnxruntime/main.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport os\nimport traceback\nfrom pathlib import Path\nfrom typing import Any, List\n\nimport numpy as np\nfrom omegaconf import DictConfig\nfrom onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions\n\nfrom ...model_handler.utils import ModelProcessor\nfrom ...utils.logger import Logger\nfrom ...utils.typings import RapidLayoutInput\nfrom ..base import InferSession\nfrom .provider_config import ProviderConfig\n\n\nclass OrtInferSession(InferSession):\n    def __init__(self, cfg: RapidLayoutInput):\n        self.logger = Logger(logger_name=__name__).get_log()\n\n        if cfg.model_dir_or_path is None:\n            model_path = ModelProcessor.get_model_path(cfg.model_type)\n        else:\n            model_path = Path(cfg.model_dir_or_path)\n\n        self._verify_model(model_path)\n        self.logger.info(f\"Using {model_path}\")\n\n        engine_cfg = self.update_params(\n            self.engine_cfg[cfg.engine_type.value], cfg.engine_cfg\n        )\n\n        sess_opt = self._init_sess_opts(engine_cfg)\n\n        provider_cfg = ProviderConfig(engine_cfg=engine_cfg)\n        self.session = InferenceSession(\n            model_path,\n            sess_options=sess_opt,\n            providers=provider_cfg.get_ep_list(),\n        )\n        provider_cfg.verify_providers(self.session.get_providers())\n\n    @staticmethod\n    def _init_sess_opts(cfg: DictConfig) -> SessionOptions:\n        sess_opt = SessionOptions()\n        sess_opt.log_severity_level = 4\n        sess_opt.enable_cpu_mem_arena = cfg.enable_cpu_mem_arena\n        sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL\n\n        cpu_nums = os.cpu_count()\n        intra_op_num_threads = cfg.get(\"intra_op_num_threads\", -1)\n        if intra_op_num_threads != -1 and 1 <= intra_op_num_threads <= cpu_nums:\n            sess_opt.intra_op_num_threads = intra_op_num_threads\n\n        inter_op_num_threads = cfg.get(\"inter_op_num_threads\", -1)\n        if inter_op_num_threads != -1 and 1 <= inter_op_num_threads <= cpu_nums:\n            sess_opt.inter_op_num_threads = inter_op_num_threads\n\n        return sess_opt\n\n    def __call__(self, input_content: np.ndarray) -> Any:\n        if isinstance(input_content, list):\n            input_dict = dict(zip(self.get_input_names(), input_content))\n        else:\n            input_dict = dict(zip(self.get_input_names(), [input_content]))\n\n        try:\n            return self.session.run(self.get_output_names(), input_dict)\n        except Exception as e:\n            error_info = traceback.format_exc()\n            raise ONNXRuntimeError(error_info) from e\n\n    def get_input_names(self) -> List[str]:\n        return [v.name for v in self.session.get_inputs()]\n\n    def get_output_names(self) -> List[str]:\n        return [v.name for v in self.session.get_outputs()]\n\n    @property\n    def characters(self):\n        return self.get_character_list()\n\n    def get_character_list(self, key: str = \"character\") -> List[str]:\n        meta_dict = self.session.get_modelmeta().custom_metadata_map\n        return meta_dict[key].splitlines()\n\n    def have_key(self, key: str = \"character\") -> bool:\n        meta_dict = self.session.get_modelmeta().custom_metadata_map\n        if key in meta_dict.keys():\n            return True\n        return False\n\n\nclass ONNXRuntimeError(Exception):\n    pass\n"
  },
  {
    "path": "rapid_layout/inference_engine/onnxruntime/provider_config.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport platform\nfrom enum import Enum\nfrom typing import Any, Dict, List, Sequence, Tuple\n\nfrom omegaconf import DictConfig\nfrom onnxruntime import get_available_providers, get_device\n\nfrom ...utils.logger import Logger\n\n\nclass EP(Enum):\n    CPU_EP = \"CPUExecutionProvider\"\n    CUDA_EP = \"CUDAExecutionProvider\"\n    DIRECTML_EP = \"DmlExecutionProvider\"\n    CANN_EP = \"CANNExecutionProvider\"\n\n\nclass ProviderConfig:\n    def __init__(self, engine_cfg: DictConfig):\n        self.logger = Logger(logger_name=__name__).get_log()\n\n        self.had_providers: List[str] = get_available_providers()\n        self.default_provider = self.had_providers[0]\n\n        self.cfg_use_cuda = engine_cfg.get(\"use_cuda\", False)\n        self.cfg_use_dml = engine_cfg.get(\"use_dml\", False)\n        self.cfg_use_cann = engine_cfg.get(\"use_cann\", False)\n\n        self.cfg = engine_cfg\n\n    def get_ep_list(self) -> List[Tuple[str, Dict[str, Any]]]:\n        results = [(EP.CPU_EP.value, self.cpu_ep_cfg())]\n\n        if self.is_cuda_available():\n            results.insert(0, (EP.CUDA_EP.value, self.cuda_ep_cfg()))\n\n        if self.is_dml_available():\n            self.logger.info(\n                \"Windows 10 or above detected, try to use DirectML as primary provider\"\n            )\n            results.insert(0, (EP.DIRECTML_EP.value, self.dml_ep_cfg()))\n\n        if self.is_cann_available():\n            self.logger.info(\"Try to use CANNExecutionProvider to infer\")\n            results.insert(0, (EP.CANN_EP.value, self.cann_ep_cfg()))\n\n        return results\n\n    def cpu_ep_cfg(self) -> Dict[str, Any]:\n        return dict(self.cfg.cpu_ep_cfg)\n\n    def cuda_ep_cfg(self) -> Dict[str, Any]:\n        return dict(self.cfg.cuda_ep_cfg)\n\n    def dml_ep_cfg(self) -> Dict[str, Any]:\n        if self.cfg.dm_ep_cfg is not None:\n            return self.cfg.dm_ep_cfg\n\n        if self.is_cuda_available():\n            return self.cuda_ep_cfg()\n        return self.cpu_ep_cfg()\n\n    def cann_ep_cfg(self) -> Dict[str, Any]:\n        return dict(self.cfg.cann_ep_cfg)\n\n    def verify_providers(self, session_providers: Sequence[str]):\n        if not session_providers:\n            raise ValueError(\"Session Providers is empty\")\n\n        first_provider = session_providers[0]\n\n        providers_to_check = {\n            EP.CUDA_EP: self.is_cuda_available,\n            EP.DIRECTML_EP: self.is_dml_available,\n            EP.CANN_EP: self.is_cann_available,\n        }\n\n        for ep, check_func in providers_to_check.items():\n            if check_func() and first_provider != ep.value:\n                self.logger.warning(\n                    f\"{ep.value} is available, but the inference part is automatically shifted to be executed under {first_provider}. \"\n                )\n                self.logger.warning(f\"The available lists are {session_providers}\")\n\n    def is_cuda_available(self) -> bool:\n        if not self.cfg_use_cuda:\n            return False\n\n        CUDA_EP = EP.CUDA_EP.value\n        if get_device() == \"GPU\" and CUDA_EP in self.had_providers:\n            return True\n\n        self.logger.warning(\n            f\"{CUDA_EP} is not in available providers ({self.had_providers}). Use {self.default_provider} inference by default.\"\n        )\n        install_instructions = [\n            f\"If you want to use {CUDA_EP} acceleration, you must do:\"\n            \"(For reference only) If you want to use GPU acceleration, you must do:\",\n            \"First, uninstall all onnxruntime packages in current environment.\",\n            \"Second, install onnxruntime-gpu by `pip install onnxruntime-gpu`.\",\n            \"Note the onnxruntime-gpu version must match your cuda and cudnn version.\",\n            \"You can refer this link: https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html\",\n            f\"Third, ensure {CUDA_EP} is in available providers list. e.g. ['CUDAExecutionProvider', 'CPUExecutionProvider']\",\n        ]\n        self.print_log(install_instructions)\n        return False\n\n    def is_dml_available(self) -> bool:\n        if not self.cfg_use_dml:\n            return False\n\n        cur_os = platform.system()\n        if cur_os != \"Windows\":\n            self.logger.warning(\n                f\"DirectML is only supported in Windows OS. The current OS is {cur_os}. Use {self.default_provider} inference by default.\",\n            )\n            return False\n\n        window_build_number_str = platform.version().split(\".\")[-1]\n        window_build_number = (\n            int(window_build_number_str) if window_build_number_str.isdigit() else 0\n        )\n        if window_build_number < 18362:\n            self.logger.warning(\n                f\"DirectML is only supported in Windows 10 Build 18362 and above OS. The current Windows Build is {window_build_number}. Use {self.default_provider} inference by default.\",\n            )\n            return False\n\n        DML_EP = EP.DIRECTML_EP.value\n        if DML_EP in self.had_providers:\n            return True\n\n        self.logger.warning(\n            f\"{DML_EP} is not in available providers ({self.had_providers}). Use {self.default_provider} inference by default.\"\n        )\n        install_instructions = [\n            \"If you want to use DirectML acceleration, you must do:\",\n            \"First, uninstall all onnxruntime packages in current environment.\",\n            \"Second, install onnxruntime-directml by `pip install onnxruntime-directml`\",\n            f\"Third, ensure {DML_EP} is in available providers list. e.g. ['DmlExecutionProvider', 'CPUExecutionProvider']\",\n        ]\n        self.print_log(install_instructions)\n        return False\n\n    def is_cann_available(self) -> bool:\n        if not self.cfg_use_cann:\n            return False\n\n        CANN_EP = EP.CANN_EP.value\n        if CANN_EP in self.had_providers:\n            return True\n\n        self.logger.warning(\n            f\"{CANN_EP} is not in available providers ({self.had_providers}). Use {self.default_provider} inference by default.\"\n        )\n        install_instructions = [\n            \"If you want to use CANN acceleration, you must do:\",\n            \"First, ensure you have installed Huawei Ascend software stack.\",\n            \"Second, install onnxruntime with CANN support by following the instructions at:\",\n            \"\\thttps://onnxruntime.ai/docs/execution-providers/community-maintained/CANN-ExecutionProvider.html\",\n            f\"Third, ensure {CANN_EP} is in available providers list. e.g. ['CANNExecutionProvider', 'CPUExecutionProvider']\",\n        ]\n        self.print_log(install_instructions)\n        return False\n\n    def print_log(self, log_list: List[str]):\n        for log_info in log_list:\n            self.logger.info(log_info)\n"
  },
  {
    "path": "rapid_layout/inference_engine/openvino/__init__.py",
    "content": "from .main import OpenVINOInferSession\n"
  },
  {
    "path": "rapid_layout/inference_engine/openvino/device_config.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport os\nfrom typing import Any, Dict\n\nfrom omegaconf import DictConfig\n\nfrom ...utils.logger import logger\n\n\nclass OpenVINOConfig:\n    def __init__(self, engine_cfg: DictConfig):\n        self.cfg = engine_cfg\n\n    def get_config(self) -> Dict[str, Any]:\n        config = {}\n\n        infer_num_threads = self.cfg.get(\"inference_num_threads\", -1)\n        if infer_num_threads != -1 and 1 <= infer_num_threads <= os.cpu_count():\n            config[\"INFERENCE_NUM_THREADS\"] = str(infer_num_threads)\n\n        performance_hint = self.cfg.get(\"performance_hint\", None)\n        if performance_hint is not None:\n            config[\"PERFORMANCE_HINT\"] = str(performance_hint)\n\n        performance_num_requests = self.cfg.get(\"performance_num_requests\", -1)\n        if performance_num_requests != -1:\n            config[\"PERFORMANCE_HINT_NUM_REQUESTS\"] = str(performance_num_requests)\n\n        enable_cpu_pinning = self.cfg.get(\"enable_cpu_pinning\", None)\n        if enable_cpu_pinning is not None:\n            config[\"ENABLE_CPU_PINNING\"] = str(enable_cpu_pinning)\n\n        num_streams = self.cfg.get(\"num_streams\", -1)\n        if num_streams != -1:\n            config[\"NUM_STREAMS\"] = str(num_streams)\n\n        enable_hyper_threading = self.cfg.get(\"enable_hyper_threading\", None)\n        if enable_hyper_threading is not None:\n            config[\"ENABLE_HYPER_THREADING\"] = str(enable_hyper_threading)\n\n        scheduling_core_type = self.cfg.get(\"scheduling_core_type\", None)\n        if scheduling_core_type is not None:\n            config[\"SCHEDULING_CORE_TYPE\"] = str(scheduling_core_type)\n\n        logger.info(f\"Using OpenVINO config: {config}\")\n        return config\n"
  },
  {
    "path": "rapid_layout/inference_engine/openvino/main.py",
    "content": "# -*- encoding: utf-8 -*-\nimport traceback\nfrom pathlib import Path\nfrom typing import Any, List\n\nimport numpy as np\n\ntry:\n    from openvino import Core, Tensor\nexcept ImportError:\n    from openvino.runtime import Core, Tensor\n\nfrom ...model_handler.utils import ModelProcessor\nfrom ...utils.logger import logger\nfrom ...utils.typings import RapidLayoutInput\nfrom ..base import InferSession\nfrom .device_config import OpenVINOConfig\n\n\nclass OpenVINOInferSession(InferSession):\n    def __init__(self, cfg: RapidLayoutInput):\n        if cfg.model_dir_or_path is None:\n            model_path = ModelProcessor.get_model_path(cfg.model_type)\n        else:\n            model_path = Path(cfg.model_dir_or_path)\n\n        self._verify_model(model_path)\n        logger.info(f\"Using {model_path}\")\n\n        core = Core()\n        self.model = core.read_model(model=str(model_path))\n        self.input_tensors = self.model.inputs\n        self.output_tensors = self.model.outputs\n\n        engine_cfg = self.update_params(\n            self.engine_cfg[cfg.engine_type.value], cfg.engine_cfg\n        )\n        device = engine_cfg.get(\"device\", \"CPU\")\n        ov_config = OpenVINOConfig(engine_cfg)\n        core.set_property(device, ov_config.get_config())\n        self.compiled_model = core.compile_model(self.model, device_name=device)\n        self.infer_request = self.compiled_model.create_infer_request()\n\n    def __call__(self, input_content: np.ndarray) -> Any:\n        if not isinstance(input_content, list):\n            input_content = [input_content]\n\n        if len(input_content) != len(self.input_tensors):\n            raise OpenVINOError(\n                f\"The number of inputs ({len(input_content)}) does not match the number of model inputs ({len(self.input_tensors)}).\"\n            )\n\n        try:\n            for input_tensor, input_content in zip(self.input_tensors, input_content):\n                input_tensor_name = input_tensor.get_any_name()\n                self.infer_request.set_tensor(input_tensor_name, Tensor(input_content))\n            self.infer_request.infer()\n\n            outputs = []\n            for output_tensor in self.output_tensors:\n                output_tensor_name = output_tensor.get_any_name()\n                output = self.infer_request.get_tensor(output_tensor_name).data\n                outputs.append(output)\n\n            return outputs\n\n        except Exception as e:\n            error_info = traceback.format_exc()\n            raise OpenVINOError(error_info) from e\n\n    def get_input_names(self) -> List[str]:\n        return [tensor.get_any_name() for tensor in self.model.inputs]\n\n    def get_output_names(self) -> List[str]:\n        return [tensor.get_any_name() for tensor in self.model.outputs]\n\n    @property\n    def characters(self):\n        return self.get_character_list()\n\n    def get_character_list(self, key: str = \"character\") -> List[str]:\n        framework_info = self.get_rt_info_framework()\n        if framework_info is None:\n            return []\n\n        val = framework_info[key] if key in framework_info else None\n        if val is None or not hasattr(val, \"value\"):\n            return []\n\n        value = getattr(val, \"value\", None)\n        if value is None:\n            return []\n\n        return value.splitlines()\n\n    def have_key(self, key: str = \"character\") -> bool:\n        try:\n            framework_info = self.get_rt_info_framework()\n            return framework_info is not None and key in framework_info\n        except (AttributeError, TypeError, KeyError):\n            return False\n\n    def get_rt_info_framework(self):\n        rt_info = self.model.get_rt_info()\n        if \"framework\" not in rt_info:\n            return None\n        return rt_info[\"framework\"]\n\n\nclass OpenVINOError(Exception):\n    pass\n"
  },
  {
    "path": "rapid_layout/main.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport argparse\nimport dataclasses\nfrom typing import List, Optional\n\nfrom .inference_engine.base import get_engine\nfrom .model_handler import ModelHandler, ModelProcessor\nfrom .utils.load_image import InputType, LoadImage\nfrom .utils.typings import ModelType, RapidLayoutInput, RapidLayoutOutput\nfrom .utils.utils import is_url\n\n\nclass RapidLayout:\n    def __init__(self, cfg: Optional[RapidLayoutInput] = None, **kwargs):\n        \"\"\"初始化布局检测引擎。\n\n        Args:\n            cfg: 可选，完整配置；若为 None 则仅用 kwargs 构造配置。\n\n        Kwargs（与 RapidLayoutInput 字段一致，传入时会覆盖 cfg 中同名字段）:\n            model_type: 模型类型，ModelType 或 str（如 \"pp_layout_cdla\"），默认 PP_LAYOUT_CDLA。\n            model_dir_or_path: 模型目录或单文件路径，str | Path | None，默认 None（按 model_type 自动解析）。\n            engine_type: 推理引擎，EngineType 或 str（\"onnxruntime\" | \"openvino\"），默认 onnxruntime。\n            engine_cfg: 引擎额外配置，dict，默认 {}。\n            conf_thresh: 框置信度阈值 [0, 1]，默认 0.5。\n            iou_thresh: IoU 阈值 [0, 1]，默认 0.5。\n        \"\"\"\n        if cfg is None:\n            cfg = RapidLayoutInput(**RapidLayoutInput.normalize_kwargs(kwargs))\n        elif kwargs:\n            cfg = dataclasses.replace(cfg, **RapidLayoutInput.normalize_kwargs(kwargs))\n\n        if not cfg.model_dir_or_path:\n            cfg.model_dir_or_path = ModelProcessor.get_model_path(cfg.model_type)\n\n        self.session = get_engine(cfg.engine_type)(cfg)\n        self.model_handler = ModelHandler(cfg, self.session)\n\n        self.load_img = LoadImage()\n\n    def __call__(self, img_content: InputType) -> RapidLayoutOutput:\n        img = self.load_img(img_content)\n        result = self.model_handler(img)\n        return result\n\n\ndef parse_args(arg_list: Optional[List[str]] = None):\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"img_path\", type=str, help=\"Path to image for layout.\")\n    parser.add_argument(\n        \"-m\",\n        \"--model_type\",\n        type=str,\n        default=ModelType.PP_LAYOUT_CDLA.value,\n        choices=[v.value for v in ModelType],\n        help=\"Support model type\",\n    )\n    parser.add_argument(\n        \"--conf_thresh\",\n        type=float,\n        default=0.5,\n        help=\"Box threshold, the range is [0, 1]\",\n    )\n    parser.add_argument(\n        \"--iou_thresh\",\n        type=float,\n        default=0.5,\n        help=\"IoU threshold, the range is [0, 1]\",\n    )\n    parser.add_argument(\n        \"-v\",\n        \"--vis\",\n        action=\"store_true\",\n        help=\"Wheter to visualize the layout results.\",\n    )\n    args = parser.parse_args(arg_list)\n    return args\n\n\ndef main(arg_list: Optional[List[str]] = None):\n    args = parse_args(arg_list)\n\n    input_args = RapidLayoutInput(\n        model_type=ModelType(args.model_type),\n        iou_thresh=args.iou_thresh,\n        conf_thresh=args.conf_thresh,\n    )\n    layout_engine = RapidLayout(input_args)\n\n    results = layout_engine(args.img_path)\n    print(results)\n\n    if args.vis:\n        save_path = \"layout_vis.jpg\"\n        if not is_url(args.img_path):\n            save_path = args.img_path.resolve().parent / \"layout_vis.jpg\"\n        results.vis(save_path)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "rapid_layout/model_handler/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom .main import ModelHandler\nfrom .utils import ModelProcessor\n"
  },
  {
    "path": "rapid_layout/model_handler/base/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom abc import ABC, abstractmethod\n\n\nclass BaseModelHandler(ABC):\n    @abstractmethod\n    def __call__(self):\n        pass\n\n    @abstractmethod\n    def preprocess(self):\n        pass\n\n    @abstractmethod\n    def postprocess(self):\n        pass\n"
  },
  {
    "path": "rapid_layout/model_handler/doc_layout/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom .main import DocLayoutModelHandler\n"
  },
  {
    "path": "rapid_layout/model_handler/doc_layout/main.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport time\n\nimport numpy as np\n\nfrom ...inference_engine.base import InferSession\nfrom ...utils.typings import RapidLayoutOutput\nfrom ..base import BaseModelHandler\nfrom .post_process import DocLayoutPostProcess\nfrom .pre_process import DocLayoutPreProcess\n\n\nclass DocLayoutModelHandler(BaseModelHandler):\n    def __init__(self, labels, conf_thres, iou_thres, session: InferSession):\n        self.img_size = (1024, 1024)\n        self.preprocess = DocLayoutPreProcess(img_size=self.img_size)\n        self.postprocess = DocLayoutPostProcess(labels, conf_thres, iou_thres)\n\n        self.session = session\n\n    def __call__(self, ori_img: np.ndarray) -> RapidLayoutOutput:\n        s1 = time.perf_counter()\n\n        ori_img_shape = ori_img.shape[:2]\n\n        img = self.preprocess(ori_img)\n        preds = self.session(img)\n        boxes, scores, class_names = self.postprocess(\n            preds, ori_img_shape, self.img_size\n        )\n\n        elapse = time.perf_counter() - s1\n        return RapidLayoutOutput(\n            img=ori_img,\n            boxes=boxes,\n            class_names=class_names,\n            scores=scores,\n            elapse=elapse,\n        )\n\n    def preprocess(self, image):\n        return self.preprocess(image)\n\n    def postprocess(self, preds, ori_img_shape, img_shape):\n        return self.postprocess(preds, ori_img_shape, img_shape)\n"
  },
  {
    "path": "rapid_layout/model_handler/doc_layout/post_process.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom typing import List, Tuple\n\nfrom ..utils import scale_boxes\n\n\nclass DocLayoutPostProcess:\n    def __init__(self, labels: List[str], conf_thres=0.2, iou_thres=0.5):\n        self.labels = labels\n        self.conf_threshold = conf_thres\n        self.iou_threshold = iou_thres\n        self.input_width, self.input_height = None, None\n        self.img_width, self.img_height = None, None\n\n    def __call__(\n        self,\n        preds,\n        ori_img_shape: Tuple[int, int],\n        img_shape: Tuple[int, int] = (1024, 1024),\n    ):\n        preds = preds[0]\n        mask = preds[..., 4] > self.conf_threshold\n        preds = [p[mask[idx]] for idx, p in enumerate(preds)][0]\n        preds[:, :4] = scale_boxes(list(img_shape), preds[:, :4], list(ori_img_shape))\n\n        boxes = preds[:, :4]\n        confidences = preds[:, 4]\n        class_ids = preds[:, 5].astype(int)\n        labels = [self.labels[i] for i in class_ids]\n        return boxes, confidences, labels\n"
  },
  {
    "path": "rapid_layout/model_handler/doc_layout/pre_process.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom pathlib import Path\nfrom typing import Tuple, Union\n\nimport numpy as np\n\nfrom ..utils import LetterBox\n\nInputType = Union[str, np.ndarray, bytes, Path]\n\n\nclass DocLayoutPreProcess:\n    def __init__(self, img_size: Tuple[int, int]):\n        self.img_size = img_size\n        self.letterbox = LetterBox(new_shape=img_size, auto=False, stride=32)\n\n    def __call__(self, image: np.ndarray) -> np.ndarray:\n        input_img = self.letterbox(image=image)\n        input_img = input_img[None, ...]\n        input_img = input_img[..., ::-1].transpose(0, 3, 1, 2)\n        input_img = np.ascontiguousarray(input_img)\n        input_img = input_img / 255\n        input_tensor = input_img.astype(np.float32)\n        return input_tensor\n"
  },
  {
    "path": "rapid_layout/model_handler/main.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom typing import Any\n\nimport numpy as np\n\nfrom ..inference_engine.base import InferSession\nfrom ..utils.logger import Logger\nfrom ..utils.typings import RapidLayoutInput, RapidLayoutOutput\nfrom .doc_layout import DocLayoutModelHandler\nfrom .pp import PPModelHandler\nfrom .pp_doc_layout import PPDocLayoutModelHandler\nfrom .yolov8 import YOLOv8ModelHandler\n\n\nclass ModelHandler:\n    def __init__(self, cfg: RapidLayoutInput, session: InferSession):\n        self.logger = Logger(logger_name=__name__).get_log()\n        self.model_processors = self._init_handler(cfg, session)\n\n    def _init_handler(self, cfg: RapidLayoutInput, session: InferSession) -> Any:\n        model_type = cfg.model_type.value\n        self.logger.info(f\"{model_type} contains {session.characters}\")\n\n        if model_type.startswith(\"pp_doc_layout\"):\n            return PPDocLayoutModelHandler(\n                session.characters, cfg.conf_thresh, cfg.iou_thresh, session\n            )\n\n        if model_type.startswith(\"pp\"):\n            return PPModelHandler(\n                session.characters, cfg.conf_thresh, cfg.iou_thresh, session\n            )\n\n        if model_type.startswith(\"yolov8\"):\n            return YOLOv8ModelHandler(\n                session.characters, cfg.conf_thresh, cfg.iou_thresh, session\n            )\n\n        if model_type.startswith(\"doclayout\"):\n            return DocLayoutModelHandler(\n                session.characters, cfg.conf_thresh, cfg.iou_thresh, session\n            )\n\n        raise ValueError(f\"{model_type.value} is not supported!\")\n\n    def __call__(self, img: np.ndarray) -> RapidLayoutOutput:\n        return self.model_processors(img)\n"
  },
  {
    "path": "rapid_layout/model_handler/pp/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom .main import PPModelHandler\n"
  },
  {
    "path": "rapid_layout/model_handler/pp/main.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport time\n\nimport numpy as np\n\nfrom ...inference_engine.base import InferSession\nfrom ...utils.typings import RapidLayoutOutput\nfrom ..base import BaseModelHandler\nfrom .post_process import PPPostProcess\nfrom .pre_process import PPPreProcess\n\n\nclass PPModelHandler(BaseModelHandler):\n    def __init__(self, labels, conf_thres, iou_thres, session: InferSession):\n        self.img_size = (800, 608)\n        self.pp_preprocess = PPPreProcess(img_size=self.img_size)\n        self.pp_postprocess = PPPostProcess(labels, conf_thres, iou_thres)\n\n        self.session = session\n\n    def __call__(self, ori_img: np.ndarray) -> RapidLayoutOutput:\n        s1 = time.perf_counter()\n\n        ori_img_shape = ori_img.shape[:2]\n        img = self.preprocess(ori_img)\n        preds = self.session(img)\n        boxes, scores, class_names = self.postprocess(ori_img_shape, img, preds)\n\n        elapse = time.perf_counter() - s1\n        return RapidLayoutOutput(\n            img=ori_img,\n            boxes=boxes,\n            class_names=class_names,\n            scores=scores,\n            elapse=elapse,\n        )\n\n    def preprocess(self, image: np.ndarray) -> np.ndarray:\n        return self.pp_preprocess(image)\n\n    def postprocess(self, ori_img_shape, img, preds):\n        return self.pp_postprocess(ori_img_shape, img, preds)\n"
  },
  {
    "path": "rapid_layout/model_handler/pp/post_process.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom typing import List, Tuple\n\nimport numpy as np\n\n\nclass PPPostProcess:\n    def __init__(self, labels, conf_thres=0.4, iou_thres=0.5):\n        self.labels = labels\n        self.strides = [8, 16, 32, 64]\n        self.conf_thres = conf_thres\n        self.iou_thres = iou_thres\n        self.nms_top_k = 1000\n        self.keep_top_k = 100\n\n    def __call__(\n        self, ori_shape, img: np.ndarray, preds: List[np.ndarray]\n    ) -> Tuple[List[List[float]], List[float], List[str]]:\n        scores, raw_boxes = [], []\n        num_outs = int(len(preds) / 2)\n        for out_idx in range(num_outs):\n            scores.append(preds[out_idx])\n            raw_boxes.append(preds[out_idx + num_outs])\n\n        batch_size = raw_boxes[0].shape[0]\n        reg_max = int(raw_boxes[0].shape[-1] / 4 - 1)\n\n        out_boxes_num, out_boxes_list = [], []\n        ori_shape, input_shape, scale_factor = self.img_info(ori_shape, img)\n\n        for batch_id in range(batch_size):\n            # generate centers\n            decode_boxes, select_scores = [], []\n            for stride, box_distribute, score in zip(self.strides, raw_boxes, scores):\n                box_distribute = box_distribute[batch_id]\n                score = score[batch_id]\n                # centers\n                fm_h = input_shape[0] / stride\n                fm_w = input_shape[1] / stride\n                h_range = np.arange(fm_h)\n                w_range = np.arange(fm_w)\n                ww, hh = np.meshgrid(w_range, h_range)\n                ct_row = (hh.flatten() + 0.5) * stride\n                ct_col = (ww.flatten() + 0.5) * stride\n                center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)\n\n                # box distribution to distance\n                reg_range = np.arange(reg_max + 1)\n                box_distance = box_distribute.reshape((-1, reg_max + 1))\n                box_distance = self.softmax(box_distance, axis=1)\n                box_distance = box_distance * np.expand_dims(reg_range, axis=0)\n                box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))\n                box_distance = box_distance * stride\n\n                # top K candidate\n                topk_idx = np.argsort(score.max(axis=1))[::-1]\n                topk_idx = topk_idx[: self.nms_top_k]\n                center = center[topk_idx]\n                score = score[topk_idx]\n                box_distance = box_distance[topk_idx]\n\n                # decode box\n                decode_box = center + [-1, -1, 1, 1] * box_distance\n\n                select_scores.append(score)\n                decode_boxes.append(decode_box)\n\n            # nms\n            bboxes = np.concatenate(decode_boxes, axis=0)\n            confidences = np.concatenate(select_scores, axis=0)\n            picked_box_probs, picked_labels = [], []\n            for class_index in range(0, confidences.shape[1]):\n                probs = confidences[:, class_index]\n                mask = probs > self.conf_thres\n                probs = probs[mask]\n                if probs.shape[0] == 0:\n                    continue\n\n                subset_boxes = bboxes[mask, :]\n                box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)\n                box_probs = self.hard_nms(\n                    box_probs,\n                    iou_thres=self.iou_thres,\n                    top_k=self.keep_top_k,\n                )\n                picked_box_probs.append(box_probs)\n                picked_labels.extend([class_index] * box_probs.shape[0])\n\n            if len(picked_box_probs) == 0:\n                out_boxes_list.append(np.empty((0, 4)))\n                out_boxes_num.append(0)\n            else:\n                picked_box_probs = np.concatenate(picked_box_probs)\n\n                # resize output boxes\n                picked_box_probs[:, :4] = self.warp_boxes(\n                    picked_box_probs[:, :4], ori_shape[batch_id]\n                )\n                im_scale = np.concatenate(\n                    [scale_factor[batch_id][::-1], scale_factor[batch_id][::-1]]\n                )\n                picked_box_probs[:, :4] /= im_scale\n                # clas score box\n                out_boxes_list.append(\n                    np.concatenate(\n                        [\n                            np.expand_dims(np.array(picked_labels), axis=-1),\n                            np.expand_dims(picked_box_probs[:, 4], axis=-1),\n                            picked_box_probs[:, :4],\n                        ],\n                        axis=1,\n                    )\n                )\n                out_boxes_num.append(len(picked_labels))\n\n        out_boxes_list = np.concatenate(out_boxes_list, axis=0)\n        out_boxes_num = np.asarray(out_boxes_num).astype(np.int32)\n\n        boxes, scores, class_names = [], [], []\n        for dt in out_boxes_list:\n            clsid, bbox, score = int(dt[0]), dt[2:], dt[1]\n            label = self.labels[clsid]\n            boxes.append(bbox.tolist())\n            scores.append(float(score))\n            class_names.append(label)\n        return boxes, scores, class_names\n\n    def warp_boxes(self, boxes, ori_shape):\n        \"\"\"Apply transform to boxes\"\"\"\n        width, height = ori_shape[1], ori_shape[0]\n        n = len(boxes)\n        if n:\n            # warp points\n            xy = np.ones((n * 4, 3))\n            xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(\n                n * 4, 2\n            )  # x1y1, x2y2, x1y2, x2y1\n            # xy = xy @ M.T  # transform\n            xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale\n            # create new boxes\n            x = xy[:, [0, 2, 4, 6]]\n            y = xy[:, [1, 3, 5, 7]]\n            xy = (\n                np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T\n            )\n            # clip boxes\n            xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)\n            xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)\n            return xy.astype(np.float32)\n        return boxes\n\n    def img_info(self, origin_shape, img):\n        resize_shape = img.shape\n        im_scale_y = resize_shape[2] / float(origin_shape[0])\n        im_scale_x = resize_shape[3] / float(origin_shape[1])\n        scale_factor = np.array([im_scale_y, im_scale_x], dtype=np.float32)\n        img_shape = np.array(img.shape[2:], dtype=np.float32)\n\n        input_shape = np.array(img).astype(\"float32\").shape[2:]\n        ori_shape = np.array((img_shape,)).astype(\"float32\")\n        scale_factor = np.array((scale_factor,)).astype(\"float32\")\n        return ori_shape, input_shape, scale_factor\n\n    @staticmethod\n    def softmax(x, axis=None):\n        def logsumexp(a, axis=None, b=None, keepdims=False):\n            a_max = np.amax(a, axis=axis, keepdims=True)\n\n            if a_max.ndim > 0:\n                a_max[~np.isfinite(a_max)] = 0\n            elif not np.isfinite(a_max):\n                a_max = 0\n\n            tmp = np.exp(a - a_max)\n\n            # suppress warnings about log of zero\n            with np.errstate(divide=\"ignore\"):\n                s = np.sum(tmp, axis=axis, keepdims=keepdims)\n                out = np.log(s)\n\n            if not keepdims:\n                a_max = np.squeeze(a_max, axis=axis)\n            out += a_max\n            return out\n\n        return np.exp(x - logsumexp(x, axis=axis, keepdims=True))\n\n    def hard_nms(self, box_scores, iou_thres, top_k=-1, candidate_size=200):\n        \"\"\"\n        Args:\n            box_scores (N, 5): boxes in corner-form and probabilities.\n            iou_thres: intersection over union threshold.\n            top_k: keep top_k results. If k <= 0, keep all the results.\n            candidate_size: only consider the candidates with the highest scores.\n        Returns:\n            picked: a list of indexes of the kept boxes\n        \"\"\"\n        scores = box_scores[:, -1]\n        boxes = box_scores[:, :-1]\n        picked = []\n        indexes = np.argsort(scores)\n        indexes = indexes[-candidate_size:]\n        while len(indexes) > 0:\n            current = indexes[-1]\n            picked.append(current)\n            if 0 < top_k == len(picked) or len(indexes) == 1:\n                break\n            current_box = boxes[current, :]\n            indexes = indexes[:-1]\n            rest_boxes = boxes[indexes, :]\n            iou = self.iou_of(\n                rest_boxes,\n                np.expand_dims(current_box, axis=0),\n            )\n            indexes = indexes[iou <= iou_thres]\n\n        return box_scores[picked, :]\n\n    def iou_of(self, boxes0, boxes1, eps=1e-5):\n        \"\"\"Return intersection-over-union (Jaccard index) of boxes.\n        Args:\n            boxes0 (N, 4): ground truth boxes.\n            boxes1 (N or 1, 4): predicted boxes.\n            eps: a small number to avoid 0 as denominator.\n        Returns:\n            iou (N): IoU values.\n        \"\"\"\n        overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])\n        overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])\n\n        overlap_area = self.area_of(overlap_left_top, overlap_right_bottom)\n        area0 = self.area_of(boxes0[..., :2], boxes0[..., 2:])\n        area1 = self.area_of(boxes1[..., :2], boxes1[..., 2:])\n        return overlap_area / (area0 + area1 - overlap_area + eps)\n\n    @staticmethod\n    def area_of(left_top, right_bottom):\n        \"\"\"Compute the areas of rectangles given two corners.\n        Args:\n            left_top (N, 2): left top corner.\n            right_bottom (N, 2): right bottom corner.\n        Returns:\n            area (N): return the area.\n        \"\"\"\n        hw = np.clip(right_bottom - left_top, 0.0, None)\n        return hw[..., 0] * hw[..., 1]\n"
  },
  {
    "path": "rapid_layout/model_handler/pp/pre_process.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom pathlib import Path\nfrom typing import Optional, Tuple, Union\n\nimport cv2\nimport numpy as np\n\nInputType = Union[str, np.ndarray, bytes, Path]\n\n\nclass PPPreProcess:\n    def __init__(self, img_size: Tuple[int, int]):\n        self.size = img_size\n        self.mean = np.array([0.485, 0.456, 0.406])\n        self.std = np.array([0.229, 0.224, 0.225])\n        self.scale = 1 / 255.0\n\n    def __call__(self, img: Optional[np.ndarray] = None) -> np.ndarray:\n        if img is None:\n            raise ValueError(\"img is None.\")\n\n        img = self.resize(img)\n        img = self.normalize(img)\n        img = self.permute(img)\n        img = np.expand_dims(img, axis=0)\n        return img.astype(np.float32)\n\n    def resize(self, img: np.ndarray) -> np.ndarray:\n        resize_h, resize_w = self.size\n        img = cv2.resize(img, (int(resize_w), int(resize_h)))\n        return img\n\n    def normalize(self, img: np.ndarray) -> np.ndarray:\n        return (img.astype(\"float32\") * self.scale - self.mean) / self.std\n\n    def permute(self, img: np.ndarray) -> np.ndarray:\n        return img.transpose((2, 0, 1))\n"
  },
  {
    "path": "rapid_layout/model_handler/pp_doc_layout/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom .main import PPDocLayoutModelHandler\n"
  },
  {
    "path": "rapid_layout/model_handler/pp_doc_layout/main.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport time\nfrom typing import Any, List\n\nimport numpy as np\n\nfrom ...inference_engine.base import InferSession\nfrom ...utils.typings import RapidLayoutOutput\nfrom ..base import BaseModelHandler\nfrom .post_process import PPDocLayoutPostProcess\nfrom .pre_process import PPDocLayoutPreProcess\n\n\nclass PPDocLayoutModelHandler(BaseModelHandler):\n    def __init__(\n        self,\n        labels: List[str],\n        conf_thres: float,\n        iou_thres: float,\n        session: InferSession,\n    ):\n        self.img_size = (800, 800)\n        self.pp_preprocess = PPDocLayoutPreProcess(img_size=self.img_size)\n        self.pp_postprocess = PPDocLayoutPostProcess(labels=labels)\n\n        self.session = session\n\n        self.conf_thres = conf_thres\n        self.iou_thres = iou_thres\n\n    def __call__(self, ori_img: np.ndarray) -> RapidLayoutOutput:\n        s1 = time.perf_counter()\n\n        ori_data, ort_inputs = self.preprocess(ori_img)\n        ort_outputs = self.session(ort_inputs)\n        preds_list = self.format_output(ort_outputs)\n        boxes, scores, class_names = self.postprocess(\n            batch_outputs=preds_list,\n            datas=[ori_data],\n            threshold=self.conf_thres,\n            layout_nms=True,\n            layout_shape_mode=\"auto\",\n            filter_overlap_boxes=True,\n            skip_order_labels=None,\n        )\n\n        elapse = time.perf_counter() - s1\n        return RapidLayoutOutput(\n            img=ori_img,\n            boxes=boxes,\n            class_names=class_names,\n            scores=scores,\n            elapse=elapse,\n        )\n\n    def preprocess(self, image: np.ndarray) -> np.ndarray:\n        return self.pp_preprocess(image)\n\n    def postprocess(self, **kwargs: Any):\n        return self.pp_postprocess(**kwargs)\n\n    @staticmethod\n    def format_output(pred):\n        box_idx_start = 0\n        np_boxes_num = pred[1][0]\n        box_idx_end = box_idx_start + np_boxes_num\n        np_boxes = pred[0][box_idx_start:box_idx_end]\n        return [{\"boxes\": np.array(np_boxes)}]\n"
  },
  {
    "path": "rapid_layout/model_handler/pp_doc_layout/post_process.py",
    "content": "# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#    http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\nfrom typing import Dict, List, Optional, Tuple, Union\n\nimport cv2\nimport numpy as np\nfrom numpy import ndarray\n\nBoxes = List[dict]\nNumber = Union[int, float]\n\n\nclass PPDocLayoutPostProcess:\n    def __init__(\n        self, labels: Optional[List[str]] = None, scale_size: Optional[List[int]] = None\n    ) -> None:\n        self.labels = labels\n        self.scale_size = scale_size\n\n    def apply(\n        self,\n        boxes: ndarray,\n        img_size: Tuple[int, int],\n        threshold: Union[float, dict],\n        layout_nms: Optional[bool],\n        layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]],\n        layout_merge_bboxes_mode: Optional[Union[str, dict]],\n        masks: Optional[ndarray] = None,\n        layout_shape_mode: Optional[str] = \"auto\",\n    ) -> Boxes:\n        \"\"\"Apply post-processing to the detection boxes.\n\n        Args:\n            boxes (ndarray): The input detection boxes with scores.\n            img_size (tuple): The original image size.\n\n        Returns:\n            Boxes: The post-processed detection boxes.\n        \"\"\"\n        if layout_shape_mode == \"rect\":\n            masks = None\n        boxes[:, 2:6] = np.round(boxes[:, 2:6]).astype(int)\n        if isinstance(threshold, float):\n            expect_boxes = (boxes[:, 1] > threshold) & (boxes[:, 0] > -1)\n            boxes = boxes[expect_boxes, :]\n            if masks is not None:\n                masks = masks[expect_boxes, ...]\n        elif isinstance(threshold, dict):\n            category_filtered_boxes = []\n            if masks is not None:\n                category_filtered_masks = []\n            for cat_id in np.unique(boxes[:, 0]):\n                category_boxes = boxes[boxes[:, 0] == cat_id]\n                if masks is not None:\n                    category_masks = masks[boxes[:, 0] == cat_id]\n                category_threshold = threshold.get(int(cat_id), 0.5)\n                selected_indices = (category_boxes[:, 1] > category_threshold) & (\n                    category_boxes[:, 0] > -1\n                )\n                if masks is not None:\n                    category_masks = category_masks[selected_indices]\n                    category_filtered_masks.append(category_masks)\n                category_filtered_boxes.append(category_boxes[selected_indices])\n            boxes = (\n                np.vstack(category_filtered_boxes)\n                if category_filtered_boxes\n                else np.array([])\n            )\n            if masks is not None:\n                masks = (\n                    np.concatenate(category_filtered_masks)\n                    if category_filtered_masks\n                    else np.array([])\n                )\n\n        if layout_nms:\n            selected_indices = nms(boxes[:, :6], iou_same=0.6, iou_diff=0.98)\n            boxes = np.array(boxes[selected_indices])\n            if masks is not None:\n                masks = [masks[i] for i in selected_indices]\n\n        filter_large_image = True\n        # boxes.shape[1] == 6 is object detection, 7 is new ordered object detection, 8 is ordered object detection\n        if filter_large_image and len(boxes) > 1 and boxes.shape[1] in [6, 7, 8]:\n            if img_size[0] > img_size[1]:\n                area_thres = 0.82\n            else:\n                area_thres = 0.93\n            image_index = self.labels.index(\"image\") if \"image\" in self.labels else None\n            img_area = img_size[0] * img_size[1]\n            filtered_boxes = []\n            filtered_masks = []\n            for idx, box in enumerate(boxes):\n                (\n                    label_index,\n                    score,\n                    xmin,\n                    ymin,\n                    xmax,\n                    ymax,\n                ) = box[:6]\n                if label_index == image_index:\n                    xmin = max(0, xmin)\n                    ymin = max(0, ymin)\n                    xmax = min(img_size[0], xmax)\n                    ymax = min(img_size[1], ymax)\n                    box_area = (xmax - xmin) * (ymax - ymin)\n                    if box_area <= area_thres * img_area:\n                        filtered_boxes.append(box)\n                        if masks is not None:\n                            filtered_masks.append(masks[idx])\n                else:\n                    filtered_boxes.append(box)\n                    if masks is not None:\n                        filtered_masks.append(masks[idx])\n            if len(filtered_boxes) == 0:\n                filtered_boxes = boxes\n                if masks is not None:\n                    filtered_masks = masks\n            boxes = np.array(filtered_boxes)\n            if masks is not None:\n                masks = filtered_masks\n\n        if layout_merge_bboxes_mode:\n            formula_index = (\n                self.labels.index(\"formula\") if \"formula\" in self.labels else None\n            )\n            if isinstance(layout_merge_bboxes_mode, str):\n                assert layout_merge_bboxes_mode in [\n                    \"union\",\n                    \"large\",\n                    \"small\",\n                ], f\"The value of `layout_merge_bboxes_mode` must be one of ['union', 'large', 'small'], but got {layout_merge_bboxes_mode}\"\n\n                if layout_merge_bboxes_mode == \"union\":\n                    pass\n                else:\n                    contains_other, contained_by_other = check_containment(\n                        boxes[:, :6], formula_index\n                    )\n                    if layout_merge_bboxes_mode == \"large\":\n                        boxes = boxes[contained_by_other == 0]\n                        if masks is not None:\n                            masks = [\n                                mask\n                                for i, mask in enumerate(masks)\n                                if contained_by_other[i] == 0\n                            ]\n                    elif layout_merge_bboxes_mode == \"small\":\n                        boxes = boxes[(contains_other == 0) | (contained_by_other == 1)]\n                        if masks is not None:\n                            masks = [\n                                mask\n                                for i, mask in enumerate(masks)\n                                if (contains_other[i] == 0)\n                                | (contained_by_other[i] == 1)\n                            ]\n            elif isinstance(layout_merge_bboxes_mode, dict):\n                keep_mask = np.ones(len(boxes), dtype=bool)\n                for category_index, layout_mode in layout_merge_bboxes_mode.items():\n                    assert layout_mode in [\n                        \"union\",\n                        \"large\",\n                        \"small\",\n                    ], f\"The value of `layout_merge_bboxes_mode` must be one of ['union', 'large', 'small'], but got {layout_mode}\"\n                    if layout_mode == \"union\":\n                        pass\n                    else:\n                        if layout_mode == \"large\":\n                            contains_other, contained_by_other = check_containment(\n                                boxes[:, :6],\n                                formula_index,\n                                category_index,\n                                mode=layout_mode,\n                            )\n                            # Remove boxes that are contained by other boxes\n                            keep_mask &= contained_by_other == 0\n                        elif layout_mode == \"small\":\n                            contains_other, contained_by_other = check_containment(\n                                boxes[:, :6],\n                                formula_index,\n                                category_index,\n                                mode=layout_mode,\n                            )\n                            # Keep boxes that do not contain others or are contained by others\n                            keep_mask &= (contains_other == 0) | (\n                                contained_by_other == 1\n                            )\n                boxes = boxes[keep_mask]\n                if masks is not None:\n                    masks = [mask for i, mask in enumerate(masks) if keep_mask[i]]\n\n        if boxes.size == 0:\n            return np.array([])\n\n        if boxes.shape[1] == 8:\n            # Sort boxes by their order\n            sorted_idx = np.lexsort((-boxes[:, 7], boxes[:, 6]))\n            sorted_boxes = boxes[sorted_idx]\n            boxes = sorted_boxes[:, :6]\n            if masks is not None:\n                sorted_masks = [masks[i] for i in sorted_idx]\n                masks = sorted_masks\n\n        if boxes.shape[1] == 7:\n            # Sort boxes by their order\n            sorted_idx = np.argsort(boxes[:, 6])\n            sorted_boxes = boxes[sorted_idx]\n            boxes = sorted_boxes[:, :6]\n            if masks is not None:\n                sorted_masks = [masks[i] for i in sorted_idx]\n                masks = sorted_masks\n\n        polygon_points = None\n        if masks is not None:\n            scale_ratio = [h / s for h, s in zip(self.scale_size, img_size)]\n            polygon_points = extract_polygon_points_by_masks(\n                boxes, np.array(masks), scale_ratio, layout_shape_mode\n            )\n\n        if layout_unclip_ratio:\n            if isinstance(layout_unclip_ratio, float):\n                layout_unclip_ratio = (layout_unclip_ratio, layout_unclip_ratio)\n            elif isinstance(layout_unclip_ratio, (tuple, list)):\n                assert (\n                    len(layout_unclip_ratio) == 2\n                ), f\"The length of `layout_unclip_ratio` should be 2.\"\n            elif isinstance(layout_unclip_ratio, dict):\n                pass\n            else:\n                raise ValueError(\n                    f\"The type of `layout_unclip_ratio` must be float, Tuple[float, float] or  Dict[int, Tuple[float, float]], but got {type(layout_unclip_ratio)}.\"\n                )\n            boxes = unclip_boxes(boxes, layout_unclip_ratio)\n\n        if boxes.shape[1] == 6:\n            \"\"\"For Normal Object Detection\"\"\"\n            boxes = restructured_boxes(boxes, self.labels, img_size, polygon_points)\n        else:\n            \"\"\"Unexpected Input Box Shape\"\"\"\n            raise ValueError(\n                f\"The shape of boxes should be 6 or 10, instead of {boxes.shape[1]}\"\n            )\n        return boxes\n\n    def __call__(\n        self,\n        batch_outputs: List[dict],\n        datas: List[dict],\n        threshold: Optional[Union[float, dict]] = None,\n        layout_nms: Optional[bool] = None,\n        layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,\n        layout_merge_bboxes_mode: Optional[str] = None,\n        layout_shape_mode: Optional[str] = None,\n        filter_overlap_boxes: Optional[bool] = None,\n        skip_order_labels: Optional[List[str]] = None,\n    ) -> Tuple[List[float], List[float], List[str]]:\n        outputs = []\n        for idx, (data, output) in enumerate(zip(datas, batch_outputs)):\n            if \"masks\" in output:\n                masks = output[\"masks\"]\n            else:\n                layout_shape_mode = \"rect\"\n                if idx == 0 and layout_shape_mode not in [\"rect\", \"auto\"]:\n                    print(\n                        f\"The model you are using does not support polygon output, but the layout_shape_mode is specified as {layout_shape_mode}, which will be set to 'rect'\"\n                    )\n                masks = None\n            boxes = self.apply(\n                output[\"boxes\"],\n                data[\"ori_img_size\"],\n                threshold,\n                layout_nms,\n                layout_unclip_ratio,\n                layout_merge_bboxes_mode,\n                masks,\n                layout_shape_mode,\n            )\n            if filter_overlap_boxes:\n                boxes = filter_boxes(boxes, layout_shape_mode)\n            skip_order_labels = (\n                skip_order_labels\n                if skip_order_labels is not None\n                else SKIP_ORDER_LABELS\n            )\n            boxes = update_order_index(boxes, skip_order_labels)\n            outputs.append(boxes)\n\n        if len(outputs) != 1:\n            raise ValueError(\n                f\"The length of outputs should be 1, but got {len(outputs)}\"\n            )\n\n        output = outputs[0]\n        boxes, scores, class_names = [], [], []\n        for data in output:\n            boxes.append(data[\"coordinate\"])\n            scores.append(float(data[\"score\"]))\n            class_names.append(data[\"label\"])\n        return boxes, scores, class_names\n\n\ndef is_contained(box1, box2):\n    \"\"\"Check if box1 is contained within box2.\"\"\"\n    _, _, x1, y1, x2, y2 = box1\n    _, _, x1_p, y1_p, x2_p, y2_p = box2\n    box1_area = (x2 - x1) * (y2 - y1)\n    xi1 = max(x1, x1_p)\n    yi1 = max(y1, y1_p)\n    xi2 = min(x2, x2_p)\n    yi2 = min(y2, y2_p)\n    inter_width = max(0, xi2 - xi1)\n    inter_height = max(0, yi2 - yi1)\n    intersect_area = inter_width * inter_height\n    iou = intersect_area / box1_area if box1_area > 0 else 0\n    return iou >= 0.9\n\n\ndef check_containment(boxes, formula_index=None, category_index=None, mode=None):\n    \"\"\"Check containment relationships among boxes.\"\"\"\n    n = len(boxes)\n    contains_other = np.zeros(n, dtype=int)\n    contained_by_other = np.zeros(n, dtype=int)\n\n    for i in range(n):\n        for j in range(n):\n            if i == j:\n                continue\n            if formula_index is not None:\n                if boxes[i][0] == formula_index and boxes[j][0] != formula_index:\n                    continue\n            if category_index is not None and mode is not None:\n                if mode == \"large\" and boxes[j][0] == category_index:\n                    if is_contained(boxes[i], boxes[j]):\n                        contained_by_other[i] = 1\n                        contains_other[j] = 1\n                if mode == \"small\" and boxes[i][0] == category_index:\n                    if is_contained(boxes[i], boxes[j]):\n                        contained_by_other[i] = 1\n                        contains_other[j] = 1\n            else:\n                if is_contained(boxes[i], boxes[j]):\n                    contained_by_other[i] = 1\n                    contains_other[j] = 1\n    return contains_other, contained_by_other\n\n\ndef nms(boxes, iou_same=0.6, iou_diff=0.95):\n    \"\"\"Perform Non-Maximum Suppression (NMS) with different IoU thresholds for same and different classes.\"\"\"\n    # Extract class scores\n    scores = boxes[:, 1]\n\n    # Sort indices by scores in descending order\n    indices = np.argsort(scores)[::-1]\n    selected_boxes = []\n\n    while len(indices) > 0:\n        current = indices[0]\n        current_box = boxes[current]\n        current_class = current_box[0]\n        current_box[1]\n        current_coords = current_box[2:]\n\n        selected_boxes.append(current)\n        indices = indices[1:]\n\n        filtered_indices = []\n        for i in indices:\n            box = boxes[i]\n            box_class = box[0]\n            box_coords = box[2:]\n            iou_value = iou(current_coords, box_coords)\n            threshold = iou_same if current_class == box_class else iou_diff\n\n            # If the IoU is below the threshold, keep the box\n            if iou_value < threshold:\n                filtered_indices.append(i)\n        indices = filtered_indices\n    return selected_boxes\n\n\ndef iou(box1, box2):\n    \"\"\"Compute the Intersection over Union (IoU) of two bounding boxes.\"\"\"\n    x1, y1, x2, y2 = box1\n    x1_p, y1_p, x2_p, y2_p = box2\n\n    # Compute the intersection coordinates\n    x1_i = max(x1, x1_p)\n    y1_i = max(y1, y1_p)\n    x2_i = min(x2, x2_p)\n    y2_i = min(y2, y2_p)\n\n    # Compute the area of intersection\n    inter_area = max(0, x2_i - x1_i + 1) * max(0, y2_i - y1_i + 1)\n\n    # Compute the area of both bounding boxes\n    box1_area = (x2 - x1 + 1) * (y2 - y1 + 1)\n    box2_area = (x2_p - x1_p + 1) * (y2_p - y1_p + 1)\n\n    # Compute the IoU\n    iou_value = inter_area / float(box1_area + box2_area - inter_area)\n\n    return iou_value\n\n\nSKIP_ORDER_LABELS = [\n    \"figure_title\",\n    \"vision_footnote\",\n    \"image\",\n    \"chart\",\n    \"table\",\n    \"header\",\n    \"header_image\",\n    \"footer\",\n    \"footer_image\",\n    \"footnote\",\n    \"aside_text\",\n]\n\n\ndef is_convex(p_prev, p_curr, p_next):\n    \"\"\"\n    Calculate if the polygon is convex.\n    \"\"\"\n    v1 = p_curr - p_prev\n    v2 = p_next - p_curr\n    cross = v1[0] * v2[1] - v1[1] * v2[0]\n    return cross < 0\n\n\ndef angle_between_vectors(v1, v2):\n    \"\"\"\n    Calculate the angle between two vectors.\n    \"\"\"\n\n    unit_v1 = v1 / np.linalg.norm(v1)\n    unit_v2 = v2 / np.linalg.norm(v2)\n    dot_prod = np.clip(np.dot(unit_v1, unit_v2), -1.0, 1.0)\n    angle_rad = np.arccos(dot_prod)\n    return np.degrees(angle_rad)\n\n\ndef calc_new_point(p_curr, v1, v2, distance=20):\n    \"\"\"\n    Calculate the new point based on the direction of two vectors.\n    \"\"\"\n    dir_vec = v1 / np.linalg.norm(v1) + v2 / np.linalg.norm(v2)\n    dir_vec = dir_vec / np.linalg.norm(dir_vec)\n    p_new = p_curr + dir_vec * distance\n    return p_new\n\n\ndef extract_custom_vertices(\n    polygon, max_allowed_dist, sharp_angle_thresh=45, max_dist_ratio=0.3\n):\n    poly = np.array(polygon)\n    n = len(poly)\n    max_allowed_dist *= max_dist_ratio\n\n    point_info = []\n    for i in range(n):\n        p_prev, p_curr, p_next = poly[(i - 1) % n], poly[i], poly[(i + 1) % n]\n        v1, v2 = p_prev - p_curr, p_next - p_curr\n        is_convex_point = is_convex(p_prev, p_curr, p_next)\n        angle = angle_between_vectors(v1, v2)\n        point_info.append(\n            {\n                \"index\": i,\n                \"is_convex\": is_convex_point,\n                \"angle\": angle,\n                \"v1\": v1,\n                \"v2\": v2,\n            }\n        )\n\n    concave_indices = [i for i, info in enumerate(point_info) if not info[\"is_convex\"]]\n    preserve_concave = set()\n\n    if concave_indices:\n        groups = []\n        current_group = [concave_indices[0]]\n\n        for i in range(1, len(concave_indices)):\n            if concave_indices[i] - concave_indices[i - 1] == 1 or (\n                concave_indices[i - 1] == n - 1 and concave_indices[i] == 0\n            ):\n                current_group.append(concave_indices[i])\n            else:\n                if len(current_group) >= 2:\n                    groups.extend(current_group)\n                current_group = [concave_indices[i]]\n\n        if len(current_group) >= 2:\n            groups.extend(current_group)\n\n        if (\n            len(concave_indices) >= 2\n            and concave_indices[0] == 0\n            and concave_indices[-1] == n - 1\n        ):\n            if 0 in groups and n - 1 in groups:\n                preserve_concave.update(groups)\n        else:\n            preserve_concave.update(groups)\n\n    kept_points = [\n        i\n        for i, info in enumerate(point_info)\n        if info[\"is_convex\"] or (i in preserve_concave and info[\"angle\"] >= 120)\n    ]\n\n    final_points = []\n    for idx in range(len(kept_points)):\n        current_idx = kept_points[idx]\n        next_idx = kept_points[(idx + 1) % len(kept_points)]\n        final_points.append(current_idx)\n\n        dist = np.linalg.norm(poly[current_idx] - poly[next_idx])\n        if dist > max_allowed_dist:\n            intermediate = (\n                list(range(current_idx + 1, next_idx))\n                if next_idx > current_idx\n                else list(range(current_idx + 1, n)) + list(range(0, next_idx))\n            )\n\n            if intermediate:\n                num_needed = int(np.ceil(dist / max_allowed_dist)) - 1\n                if len(intermediate) <= num_needed:\n                    final_points.extend(intermediate)\n                else:\n                    step = len(intermediate) / num_needed\n                    final_points.extend(\n                        [intermediate[int(i * step)] for i in range(num_needed)]\n                    )\n\n    final_points = sorted(set(final_points))\n    res = []\n\n    for i in final_points:\n        info = point_info[i]\n        p_curr = poly[i]\n\n        if info[\"is_convex\"] and abs(info[\"angle\"] - sharp_angle_thresh) < 1:\n            v1_norm = info[\"v1\"] / np.linalg.norm(info[\"v1\"])\n            v2_norm = info[\"v2\"] / np.linalg.norm(info[\"v2\"])\n            dir_vec = v1_norm + v2_norm\n            dir_vec /= np.linalg.norm(dir_vec)\n            d = (np.linalg.norm(info[\"v1\"]) + np.linalg.norm(info[\"v2\"])) / 2\n            res.append(tuple(p_curr + dir_vec * d))\n        else:\n            res.append(tuple(p_curr))\n\n    return res\n\n\ndef mask2polygon(mask, max_allowed_dist, epsilon_ratio=0.004, extract_custom=True):\n    \"\"\"\n    Postprocess mask by removing small noise.\n    Args:\n        mask (ndarray): The input mask of shape [H, W].\n        epsilon_ratio (float): The ratio of epsilon.\n    Returns:\n        ndarray: The output mask after postprocessing.\n    \"\"\"\n    cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n\n    if not cnts:\n        return None\n\n    cnt = max(cnts, key=cv2.contourArea)\n    epsilon = epsilon_ratio * cv2.arcLength(cnt, True)\n    approx_cnt = cv2.approxPolyDP(cnt, epsilon, True)\n    polygon_points = approx_cnt.squeeze()\n    polygon_points = np.atleast_2d(polygon_points)\n    if extract_custom:\n        polygon_points = extract_custom_vertices(polygon_points, max_allowed_dist)\n\n    return polygon_points\n\n\ndef extract_polygon_points_by_masks(boxes, masks, scale_ratio, layout_shape_mode):\n    \"\"\"\n    修改后的提取函数：auto 模式下信任几何决策\n    \"\"\"\n    scale_w, scale_h = scale_ratio[0] / 4, scale_ratio[1] / 4\n    h_m, w_m = masks.shape[1:]\n    polygon_points = []\n    iou_threshold = 0.95\n\n    max_box_w = max(boxes[:, 4] - boxes[:, 3])\n\n    for i in range(len(boxes)):\n        x_min, y_min, x_max, y_max = boxes[i, 2:6].astype(np.int32)\n        box_w, box_h = x_max - x_min, y_max - y_min\n\n        # default rect\n        rect = np.array(\n            [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]],\n            dtype=np.float32,\n        )\n\n        if box_w <= 0 or box_h <= 0:\n            polygon_points.append(rect)\n            continue\n\n        # crop mask\n        x_s = np.clip(\n            [int(round(x_min * scale_w)), int(round(x_max * scale_w))], 0, w_m\n        )\n        y_s = np.clip(\n            [int(round(y_min * scale_h)), int(round(y_max * scale_h))], 0, h_m\n        )\n\n        cropped = masks[i, y_s[0] : y_s[1], x_s[0] : x_s[1]]\n        if cropped.size == 0 or np.sum(cropped) == 0:\n            polygon_points.append(rect)\n            continue\n\n        if layout_shape_mode == \"rect\":\n            polygon_points.append(rect)\n            continue\n\n        # resize mask to match box size\n        resized_mask = cv2.resize(\n            cropped.astype(np.uint8), (box_w, box_h), interpolation=cv2.INTER_NEAREST\n        )\n\n        if box_w > max_box_w * 0.6:\n            max_allowed_dist = box_w\n        else:\n            max_allowed_dist = max_box_w\n\n        polygon = mask2polygon(resized_mask, max_allowed_dist)\n        if polygon is not None and len(polygon) < 4:\n            polygon_points.append(rect)\n            continue\n        if polygon is not None and len(polygon) > 0:\n            polygon = polygon + np.array([x_min, y_min])\n        if layout_shape_mode == \"poly\":\n            polygon_points.append(polygon)\n        elif layout_shape_mode == \"quad\":\n            # convert polygon to quadrilateral\n            quad = convert_polygon_to_quad(polygon)\n            polygon_points.append(quad if quad is not None else rect)\n        elif layout_shape_mode == \"auto\":\n            iou_threshold = 0.8\n\n            rect_list = rect.tolist()\n            quad = convert_polygon_to_quad(polygon)\n            if quad is not None:\n                quad_list = quad.tolist()\n\n                iou_quad = calculate_polygon_overlap_ratio(\n                    rect_list,\n                    quad_list,\n                    mode=\"union\",\n                )\n                if iou_quad >= 0.95:\n                    # if quad is very similar to rect, use rect instead\n                    quad = rect\n\n                poly_list = (\n                    polygon.tolist() if isinstance(polygon, np.ndarray) else polygon\n                )\n\n                iou_quad = calculate_polygon_overlap_ratio(\n                    poly_list, quad_list, mode=\"union\"\n                )\n\n                pre_poly = polygon_points[-1] if len(polygon_points) > 0 else None\n                iou_pre = 0\n                if pre_poly is not None:\n                    iou_pre = calculate_polygon_overlap_ratio(\n                        pre_poly.tolist(),\n                        rect_list,\n                        mode=\"small\",\n                    )\n\n                if iou_quad >= iou_threshold and iou_pre < 0.01:\n                    # if quad is similar to polygon, use quad\n                    polygon_points.append(quad)\n                    continue\n\n            # if all ious are less than threshold, use polygon\n            polygon_points.append(polygon)\n        else:\n            raise ValueError(\n                \"layout_shape_mode must be one of ['rect', 'poly', 'quad', 'auto']\"\n            )\n\n    return polygon_points\n\n\ndef convert_polygon_to_quad(polygon):\n    \"\"\"\n    Convert polygon to minimum bounding rectangle (quad).\n    Args:\n        polygon (ndarray): The polygon points of shape [N, 2].\n    Returns:\n        quad (ndarray): The 4-point quad, clockwise from top-left, or None if invalid.\n    \"\"\"\n    if polygon is None or len(polygon) < 3:\n        return None\n\n    points = np.array(polygon, dtype=np.float32)\n    if len(points.shape) == 1:\n        points = points.reshape(-1, 2)\n\n    min_rect = cv2.minAreaRect(points)\n    quad = cv2.boxPoints(min_rect)\n\n    center = quad.mean(axis=0)\n    angles = np.arctan2(quad[:, 1] - center[1], quad[:, 0] - center[0])\n    sorted_indices = np.argsort(angles)\n    quad = quad[sorted_indices]\n    sums = quad[:, 0] + quad[:, 1]\n    top_left_idx = np.argmin(sums)\n    quad = np.roll(quad, -top_left_idx, axis=0)\n\n    return quad\n\n\ndef restructured_boxes(\n    boxes: ndarray,\n    labels: List[str],\n    img_size: Tuple[int, int],\n    polygon_points: ndarray = None,\n) -> Boxes:\n    \"\"\"\n    Restructure the given bounding boxes and labels based on the image size.\n\n    Args:\n        boxes (ndarray): A 2D array of bounding boxes with each box represented as [cls_id, score, xmin, ymin, xmax, ymax].\n        labels (List[str]): A list of class labels corresponding to the class ids.\n        img_size (Tuple[int, int]): A tuple representing the width and height of the image.\n        polygon_points (ndarray): A 2D array of polygon points with each point represented as [x, y].\n    Returns:\n        Boxes: A list of dictionaries, each containing 'cls_id', 'label', 'score', and 'coordinate' keys.\n    \"\"\"\n    box_list = []\n    w, h = img_size\n\n    for idx, box in enumerate(boxes):\n        xmin, ymin, xmax, ymax = box[2:]\n        xmin = int(max(0, xmin))\n        ymin = int(max(0, ymin))\n        xmax = int(min(w, xmax))\n        ymax = int(min(h, ymax))\n        if xmax <= xmin or ymax <= ymin:\n            continue\n        res = {\n            \"cls_id\": int(box[0]),\n            \"label\": labels[int(box[0])],\n            \"score\": float(box[1]),\n            \"coordinate\": [xmin, ymin, xmax, ymax],\n            \"order\": idx + 1,\n        }\n        if polygon_points is not None:\n            polygon_point = polygon_points[idx]\n            if polygon_point is None:\n                continue\n            res[\"polygon_points\"] = polygon_point\n        box_list.append(res)\n\n    return box_list\n\n\ndef unclip_boxes(boxes, unclip_ratio=None):\n    \"\"\"\n    Expand bounding boxes from (x1, y1, x2, y2) format using an unclipping ratio.\n\n    Parameters:\n    - boxes: np.ndarray of shape (N, 4), where each row is (x1, y1, x2, y2).\n    - unclip_ratio: tuple of (width_ratio, height_ratio), optional.\n\n    Returns:\n    - expanded_boxes: np.ndarray of shape (N, 4), where each row is (x1, y1, x2, y2).\n    \"\"\"\n    if unclip_ratio is None:\n        return boxes\n\n    if isinstance(unclip_ratio, dict):\n        expanded_boxes = []\n        for box in boxes:\n            class_id, score, x1, y1, x2, y2 = box\n            if class_id in unclip_ratio:\n                width_ratio, height_ratio = unclip_ratio[class_id]\n\n                width = x2 - x1\n                height = y2 - y1\n\n                new_w = width * width_ratio\n                new_h = height * height_ratio\n                center_x = x1 + width / 2\n                center_y = y1 + height / 2\n\n                new_x1 = center_x - new_w / 2\n                new_y1 = center_y - new_h / 2\n                new_x2 = center_x + new_w / 2\n                new_y2 = center_y + new_h / 2\n\n                expanded_boxes.append([class_id, score, new_x1, new_y1, new_x2, new_y2])\n            else:\n                expanded_boxes.append(box)\n        return np.array(expanded_boxes)\n\n    else:\n        widths = boxes[:, 4] - boxes[:, 2]\n        heights = boxes[:, 5] - boxes[:, 3]\n\n        new_w = widths * unclip_ratio[0]\n        new_h = heights * unclip_ratio[1]\n        center_x = boxes[:, 2] + widths / 2\n        center_y = boxes[:, 3] + heights / 2\n\n        new_x1 = center_x - new_w / 2\n        new_y1 = center_y - new_h / 2\n        new_x2 = center_x + new_w / 2\n        new_y2 = center_y + new_h / 2\n        expanded_boxes = np.column_stack(\n            (boxes[:, 0], boxes[:, 1], new_x1, new_y1, new_x2, new_y2)\n        )\n        return expanded_boxes\n\n\ndef make_valid(poly):\n    if not poly.is_valid:\n        poly = poly.buffer(0)\n    return poly\n\n\ndef calculate_polygon_overlap_ratio(\n    polygon1: List[Tuple[int, int]],\n    polygon2: List[Tuple[int, int]],\n    mode: str = \"union\",\n) -> float:\n    \"\"\"\n    Calculate the overlap ratio between two polygons.\n\n    Args:\n        polygon1 (List[Tuple[int, int]]): First polygon represented as a list of points.\n        polygon2 (List[Tuple[int, int]]): Second polygon represented as a list of points.\n        mode (str, optional): Overlap calculation mode. Defaults to \"union\".\n\n    Returns:\n        float: Overlap ratio value between 0 and 1.\n    \"\"\"\n    try:\n        from shapely.geometry import Polygon\n    except ImportError:\n        raise ImportError(\"Please install Shapely library.\")\n    poly1 = Polygon(polygon1)\n    poly2 = Polygon(polygon2)\n    poly1 = make_valid(poly1)\n    poly2 = make_valid(poly2)\n    intersection = poly1.intersection(poly2).area\n    union = poly1.union(poly2).area\n    if mode == \"union\":\n        return intersection / union\n    elif mode == \"small\":\n        small_area = min(poly1.area, poly2.area)\n        return intersection / small_area\n    elif mode == \"large\":\n        large_area = max(poly1.area, poly2.area)\n        return intersection / large_area\n    else:\n        raise ValueError(f\"Unknown mode: {mode}\")\n\n\ndef calculate_bbox_area(bbox):\n    \"\"\"Calculate bounding box area\"\"\"\n    x1, y1, x2, y2 = map(float, bbox)\n    area = abs((x2 - x1) * (y2 - y1))\n    return area\n\n\ndef calculate_overlap_ratio(\n    bbox1: Union[np.ndarray, list, tuple],\n    bbox2: Union[np.ndarray, list, tuple],\n    mode=\"union\",\n) -> float:\n    \"\"\"\n    Calculate the overlap ratio between two bounding boxes using NumPy.\n\n    Args:\n        bbox1 (np.ndarray, list or tuple): The first bounding box, format [x_min, y_min, x_max, y_max]\n        bbox2 (np.ndarray, list or tuple): The second bounding box, format [x_min, y_min, x_max, y_max]\n        mode (str): The mode of calculation, either 'union', 'small', or 'large'.\n\n    Returns:\n        float: The overlap ratio value between the two bounding boxes\n    \"\"\"\n    bbox1 = np.array(bbox1)\n    bbox2 = np.array(bbox2)\n\n    x_min_inter = np.maximum(bbox1[0], bbox2[0])\n    y_min_inter = np.maximum(bbox1[1], bbox2[1])\n    x_max_inter = np.minimum(bbox1[2], bbox2[2])\n    y_max_inter = np.minimum(bbox1[3], bbox2[3])\n\n    inter_width = np.maximum(0, x_max_inter - x_min_inter)\n    inter_height = np.maximum(0, y_max_inter - y_min_inter)\n\n    inter_area = inter_width * inter_height\n\n    bbox1_area = calculate_bbox_area(bbox1)\n    bbox2_area = calculate_bbox_area(bbox2)\n\n    if mode == \"union\":\n        ref_area = bbox1_area + bbox2_area - inter_area\n    elif mode == \"small\":\n        ref_area = np.minimum(bbox1_area, bbox2_area)\n    elif mode == \"large\":\n        ref_area = np.maximum(bbox1_area, bbox2_area)\n    else:\n        raise ValueError(\n            f\"Invalid mode {mode}, must be one of ['union', 'small', 'large'].\"\n        )\n\n    if ref_area == 0:\n        return 0.0\n\n    return inter_area / ref_area\n\n\ndef filter_boxes(\n    src_boxes: Dict[str, List[Dict]], layout_shape_mode: str\n) -> Dict[str, List[Dict]]:\n    \"\"\"\n    Remove overlapping boxes from layout detection results based on a given overlap ratio.\n\n    Args:\n        boxes (Dict[str, List[Dict]]): Layout detection result dict containing a 'boxes' list.\n\n    Returns:\n        Dict[str, List[Dict]]: Filtered dict with overlapping boxes removed.\n    \"\"\"\n    boxes = [box for box in src_boxes if box[\"label\"] != \"reference\"]\n    dropped_indexes = set()\n\n    for i in range(len(boxes)):\n        x1, y1, x2, y2 = boxes[i][\"coordinate\"]\n        w, h = x2 - x1, y2 - y1\n        if w < 6 or h < 6:\n            dropped_indexes.add(i)\n        for j in range(i + 1, len(boxes)):\n            if i in dropped_indexes or j in dropped_indexes:\n                continue\n            overlap_ratio = calculate_overlap_ratio(\n                boxes[i][\"coordinate\"], boxes[j][\"coordinate\"], \"small\"\n            )\n            if (\n                boxes[i][\"label\"] == \"inline_formula\"\n                or boxes[j][\"label\"] == \"inline_formula\"\n            ):\n                if overlap_ratio > 0.5:\n                    if boxes[i][\"label\"] == \"inline_formula\":\n                        dropped_indexes.add(i)\n                    if boxes[j][\"label\"] == \"inline_formula\":\n                        dropped_indexes.add(j)\n                    continue\n            if overlap_ratio > 0.7:\n                if layout_shape_mode != \"rect\" and \"polygon_points\" in boxes[i]:\n                    poly_overlap_ratio = calculate_polygon_overlap_ratio(\n                        boxes[i][\"polygon_points\"], boxes[j][\"polygon_points\"], \"small\"\n                    )\n                    if poly_overlap_ratio < 0.7:\n                        continue\n                box_area_i = calculate_bbox_area(boxes[i][\"coordinate\"])\n                box_area_j = calculate_bbox_area(boxes[j][\"coordinate\"])\n                if (\n                    boxes[i][\"label\"] == \"image\" or boxes[j][\"label\"] == \"image\"\n                ) and boxes[i][\"label\"] != boxes[j][\"label\"]:\n                    continue\n                if box_area_i >= box_area_j:\n                    dropped_indexes.add(j)\n                else:\n                    dropped_indexes.add(i)\n    out_boxes = [box for idx, box in enumerate(boxes) if idx not in dropped_indexes]\n    return out_boxes\n\n\ndef update_order_index(boxes: List[Dict], skip_order_labels: List[str]):\n    \"\"\"\n    Update the 'order_index' field of each box in the provided list of boxes.\n\n    Args:\n        boxes (List[Dict]): A list of boxes, where each box is represented as a dictionary with an 'order_index' field.\n\n    Returns:\n        None. The  function updates the 'order_index' field of each box in the input list.\n    \"\"\"\n    order_index = 1\n    for box in boxes:\n        label = box[\"label\"]\n        if label not in skip_order_labels:\n            box[\"order\"] = order_index\n            order_index += 1\n        else:\n            box[\"order\"] = None\n    return boxes\n\n\ndef find_label_position(box, polygon_points, text_w, text_h, max_shift=50):\n    try:\n        from shapely.geometry import Polygon\n    except ImportError:\n        raise ImportError(\"Please install Shapely library.\")\n    poly = Polygon(polygon_points)\n    min_x = min([p[0] for p in polygon_points])\n    min_y = min([p[1] for p in polygon_points])\n    for dy in range(max_shift):\n        x1, y1 = min_x, min_y + dy\n        x2, y2 = x1 + text_w, y1 + text_h\n        label_rect = box(x1, y1, x2, y2)\n        if poly.intersects(label_rect):\n            return int(x1), int(y1)\n\n    return int(min_x), int(min_y)\n"
  },
  {
    "path": "rapid_layout/model_handler/pp_doc_layout/pre_process.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport copy\nfrom pathlib import Path\nfrom typing import Any, Dict, Optional, Tuple, Union\n\nimport cv2\nimport numpy as np\n\nInputType = Union[str, np.ndarray, bytes, Path]\n\n\nclass PPDocLayoutPreProcess:\n    def __init__(self, img_size: Tuple[int, int]):\n        self.size = [800, 800]\n\n        self.mean = [0.0, 0.0, 0.0]\n        self.std = [1.0, 1.0, 1.0]\n        self.scale = 1 / 255.0\n        self.alpha = [self.scale / self.std[i] for i in range(len(self.std))]\n        self.beta = [-self.mean[i] / self.std[i] for i in range(len(self.std))]\n\n    def __call__(self, img: Optional[np.ndarray] = None) -> Dict[str, Any]:\n        if img is None:\n            raise ValueError(\"img is None.\")\n\n        data = self.resize(img)\n        data = self.normalize(data)\n        data = self.permute(data)\n        ori_data = copy.deepcopy(data)\n        batch_inputs = self.to_batch(data)\n        return ori_data, batch_inputs\n\n    def resize(self, img: np.ndarray):\n        resize_h, resize_w = self.size\n        img_ori_h, img_ori_w = img.shape[:2]\n\n        img = cv2.resize(\n            img, (int(resize_w), int(resize_h)), interpolation=cv2.INTER_CUBIC\n        )\n        img_h, img_w = img.shape[:2]\n        data = {\n            \"img\": img,\n            \"img_size\": [img_w, img_h],\n            \"scale_factors\": [img_w / img_ori_w, img_h / img_ori_h],\n            \"ori_img_size\": [img_ori_w, img_ori_h],\n        }\n        return data\n\n    def normalize(self, data: Dict[str, Any]) -> np.ndarray:\n        img = data[\"img\"]\n        split_im = list(cv2.split(img))\n        for c in range(img.shape[2]):\n            split_im[c] = split_im[c].astype(np.float32)\n            split_im[c] *= self.alpha[c]\n            split_im[c] += self.beta[c]\n\n        res = cv2.merge(split_im)\n        data[\"img\"] = res\n        return data\n\n    def permute(self, data: Dict[str, Any]) -> np.ndarray:\n        img = data[\"img\"]\n        data[\"img\"] = img.transpose((2, 0, 1))\n        return data\n\n    def to_batch(self, data, dtype: np.dtype = np.float32) -> list[np.ndarray]:\n        result = []\n        for key in [\"img_size\", \"img\", \"scale_factors\"]:\n            if key == \"img_size\":\n                val = [data[key][::-1]]\n            elif key == \"scale_factors\":\n                val = [data.get(key, [1.0, 1.0])[::-1]]\n            else:\n                val = [data[key]]\n            result.append(np.array(val, dtype=dtype))\n        return result\n"
  },
  {
    "path": "rapid_layout/model_handler/utils.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom pathlib import Path\nfrom typing import Dict\n\nimport cv2\nimport numpy as np\n\nfrom ..utils.download_file import DownloadFile, DownloadFileInput\nfrom ..utils.logger import Logger\nfrom ..utils.typings import ModelType\nfrom ..utils.utils import mkdir, read_yaml\n\n\nclass ModelProcessor:\n    logger = Logger(logger_name=__name__).get_log()\n\n    cur_dir = Path(__file__).resolve().parent\n    root_dir = cur_dir.parent\n    DEFAULT_MODEL_PATH = root_dir / \"configs\" / \"default_models.yaml\"\n\n    DEFAULT_MODEL_DIR = root_dir / \"models\"\n    mkdir(DEFAULT_MODEL_DIR)\n\n    model_map = read_yaml(DEFAULT_MODEL_PATH)\n\n    @classmethod\n    def get_model_path(cls, model_type: ModelType) -> str:\n        return cls.get_single_model_path(model_type)\n\n    @classmethod\n    def get_single_model_path(cls, model_type: ModelType) -> str:\n        model_info = cls.model_map[model_type.value]\n        save_model_path = (\n            cls.DEFAULT_MODEL_DIR / Path(model_info[\"model_dir_or_path\"]).name\n        )\n        download_params = DownloadFileInput(\n            file_url=model_info[\"model_dir_or_path\"],\n            sha256=model_info[\"SHA256\"],\n            save_path=save_model_path,\n            logger=cls.logger,\n        )\n        DownloadFile.run(download_params)\n\n        return str(save_model_path)\n\n    @classmethod\n    def get_multi_models_dict(cls, model_type: ModelType) -> Dict[str, str]:\n        model_info = cls.model_map[model_type.value]\n\n        results = {}\n\n        model_root_dir = model_info[\"model_dir_or_path\"]\n        save_model_dir = cls.DEFAULT_MODEL_DIR / Path(model_root_dir).name\n        for file_name, sha256 in model_info[\"SHA256\"].items():\n            save_path = save_model_dir / file_name\n\n            download_params = DownloadFileInput(\n                file_url=f\"{model_root_dir}/{file_name}\",\n                sha256=sha256,\n                save_path=save_path,\n                logger=cls.logger,\n            )\n            DownloadFile.run(download_params)\n            results[Path(file_name).stem] = str(save_path)\n\n        return results\n\n\nclass LetterBox:\n    \"\"\"Resize image and padding for detection, instance segmentation, pose.\"\"\"\n\n    def __init__(\n        self,\n        new_shape=(640, 640),\n        auto=False,\n        scaleFill=False,\n        scaleup=True,\n        center=True,\n        stride=32,\n    ):\n        \"\"\"Initialize LetterBox object with specific parameters.\"\"\"\n        self.new_shape = new_shape\n        self.auto = auto\n        self.scaleFill = scaleFill\n        self.scaleup = scaleup\n        self.stride = stride\n        self.center = center  # Put the image in the middle or top-left\n\n    def __call__(self, labels=None, image=None):\n        \"\"\"Return updated labels and image with added border.\"\"\"\n        if labels is None:\n            labels = {}\n        img = labels.get(\"img\") if image is None else image\n        shape = img.shape[:2]  # current shape [height, width]\n        new_shape = labels.pop(\"rect_shape\", self.new_shape)\n        if isinstance(new_shape, int):\n            new_shape = (new_shape, new_shape)\n\n        # Scale ratio (new / old)\n        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n        if not self.scaleup:  # only scale down, do not scale up (for better val mAP)\n            r = min(r, 1.0)\n\n        # Compute padding\n        ratio = r, r  # width, height ratios\n        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding\n        if self.auto:  # minimum rectangle\n            dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride)  # wh padding\n        elif self.scaleFill:  # stretch\n            dw, dh = 0.0, 0.0\n            new_unpad = (new_shape[1], new_shape[0])\n            ratio = (\n                new_shape[1] / shape[1],\n                new_shape[0] / shape[0],\n            )  # width, height ratios\n\n        if self.center:\n            dw /= 2  # divide padding into 2 sides\n            dh /= 2\n\n        if shape[::-1] != new_unpad:  # resize\n            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)\n        top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))\n        left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))\n        img = cv2.copyMakeBorder(\n            img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)\n        )  # add border\n        if labels.get(\"ratio_pad\"):\n            labels[\"ratio_pad\"] = (labels[\"ratio_pad\"], (left, top))  # for evaluation\n\n        if len(labels):\n            labels = self._update_labels(labels, ratio, dw, dh)\n            labels[\"img\"] = img\n            labels[\"resized_shape\"] = new_shape\n            return labels\n        return img\n\n    def _update_labels(self, labels, ratio, padw, padh):\n        \"\"\"Update labels.\"\"\"\n        labels[\"instances\"].convert_bbox(format=\"xyxy\")\n        labels[\"instances\"].denormalize(*labels[\"img\"].shape[:2][::-1])\n        labels[\"instances\"].scale(*ratio)\n        labels[\"instances\"].add_padding(padw, padh)\n        return labels\n\n\ndef rescale_boxes(boxes, input_width, input_height, img_width, img_height):\n    # Rescale boxes to original image dimensions\n    input_shape = np.array([input_width, input_height, input_width, input_height])\n    boxes = np.divide(boxes, input_shape, dtype=np.float32)\n    boxes *= np.array([img_width, img_height, img_width, img_height])\n    return boxes\n\n\ndef scale_boxes(\n    img1_shape, boxes, img0_shape, ratio_pad=None, padding=True, xywh=False\n):\n    \"\"\"\n    Rescales bounding boxes (in the format of xyxy by default) from the shape of the image they were originally\n    specified in (img1_shape) to the shape of a different image (img0_shape).\n\n    Args:\n        img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).\n        boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)\n        img0_shape (tuple): the shape of the target image, in the format of (height, width).\n        ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be\n            calculated based on the size difference between the two images.\n        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular\n            rescaling.\n        xywh (bool): The box format is xywh or not, default=False.\n\n    Returns:\n        boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)\n    \"\"\"\n    if ratio_pad is None:  # calculate from img0_shape\n        gain = min(\n            img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]\n        )  # gain  = old / new\n        pad = (\n            round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1),\n            round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1),\n        )  # wh padding\n    else:\n        gain = ratio_pad[0][0]\n        pad = ratio_pad[1]\n\n    if padding:\n        boxes[..., 0] -= pad[0]  # x padding\n        boxes[..., 1] -= pad[1]  # y padding\n        if not xywh:\n            boxes[..., 2] -= pad[0]  # x padding\n            boxes[..., 3] -= pad[1]  # y padding\n    boxes[..., :4] /= gain\n    return clip_boxes(boxes, img0_shape)\n\n\ndef clip_boxes(boxes, shape):\n    boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2\n    boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2\n    return boxes\n\n\ndef nms(boxes, scores, iou_threshold):\n    # Sort by score\n    sorted_indices = np.argsort(scores)[::-1]\n\n    keep_boxes = []\n    while sorted_indices.size > 0:\n        # Pick the last box\n        box_id = sorted_indices[0]\n        keep_boxes.append(box_id)\n\n        # Compute IoU of the picked box with the rest\n        ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])\n\n        # Remove boxes with IoU over the threshold\n        keep_indices = np.where(ious < iou_threshold)[0]\n\n        # print(keep_indices.shape, sorted_indices.shape)\n        sorted_indices = sorted_indices[keep_indices + 1]\n\n    return keep_boxes\n\n\ndef multiclass_nms(boxes, scores, class_ids, iou_threshold):\n    unique_class_ids = np.unique(class_ids)\n\n    keep_boxes = []\n    for class_id in unique_class_ids:\n        class_indices = np.where(class_ids == class_id)[0]\n        class_boxes = boxes[class_indices, :]\n        class_scores = scores[class_indices]\n\n        class_keep_boxes = nms(class_boxes, class_scores, iou_threshold)\n        keep_boxes.extend(class_indices[class_keep_boxes])\n\n    return keep_boxes\n\n\ndef compute_iou(box, boxes):\n    # Compute xmin, ymin, xmax, ymax for both boxes\n    xmin = np.maximum(box[0], boxes[:, 0])\n    ymin = np.maximum(box[1], boxes[:, 1])\n    xmax = np.minimum(box[2], boxes[:, 2])\n    ymax = np.minimum(box[3], boxes[:, 3])\n\n    # Compute intersection area\n    intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)\n\n    # Compute union area\n    box_area = (box[2] - box[0]) * (box[3] - box[1])\n    boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])\n    union_area = box_area + boxes_area - intersection_area\n\n    # Compute IoU\n    iou = intersection_area / union_area\n\n    return iou\n\n\ndef xywh2xyxy(x):\n    # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)\n    y = np.copy(x)\n    y[..., 0] = x[..., 0] - x[..., 2] / 2\n    y[..., 1] = x[..., 1] - x[..., 3] / 2\n    y[..., 2] = x[..., 0] + x[..., 2] / 2\n    y[..., 3] = x[..., 1] + x[..., 3] / 2\n    return y\n"
  },
  {
    "path": "rapid_layout/model_handler/yolov8/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom .main import YOLOv8ModelHandler\n"
  },
  {
    "path": "rapid_layout/model_handler/yolov8/main.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport time\nfrom typing import List, Tuple\n\nimport numpy as np\n\nfrom ...inference_engine.base import InferSession\nfrom ...utils.typings import RapidLayoutOutput\nfrom ..base import BaseModelHandler\nfrom .post_process import YOLOv8PostProcess\nfrom .pre_process import YOLOv8PreProcess\n\n\nclass YOLOv8ModelHandler(BaseModelHandler):\n    def __init__(self, labels, conf_thres, iou_thres, session: InferSession):\n        self.img_size = (640, 640)\n        self.preprocess = YOLOv8PreProcess(img_size=self.img_size)\n        self.postprocess = YOLOv8PostProcess(labels, conf_thres, iou_thres)\n\n        self.session = session\n\n    def __call__(self, ori_img: np.ndarray) -> RapidLayoutOutput:\n        s1 = time.perf_counter()\n\n        ori_img_shape = ori_img.shape[:2]\n\n        img = self.preprocess(ori_img)\n        preds = self.session(img)\n        boxes, scores, class_names = self.postprocess(\n            preds, ori_img_shape, self.img_size\n        )\n\n        elapse = time.perf_counter() - s1\n        return RapidLayoutOutput(\n            img=ori_img,\n            boxes=boxes,\n            class_names=class_names,\n            scores=scores,\n            elapse=elapse,\n        )\n\n    def preprocess(self, image: np.ndarray) -> np.ndarray:\n        return self.preprocess(image)\n\n    def postprocess(self, model_output) -> Tuple[np.ndarray, np.ndarray, List[str]]:\n        return self.postprocess(model_output)\n"
  },
  {
    "path": "rapid_layout/model_handler/yolov8/post_process.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom typing import List, Tuple\n\nimport numpy as np\n\nfrom ..utils import multiclass_nms, rescale_boxes, xywh2xyxy\n\n\nclass YOLOv8PostProcess:\n    def __init__(self, labels: List[str], conf_thres=0.7, iou_thres=0.5):\n        self.labels = labels\n        self.conf_threshold = conf_thres\n        self.iou_threshold = iou_thres\n        self.input_width, self.input_height = None, None\n        self.img_width, self.img_height = None, None\n\n    def __call__(\n        self,\n        output: List[np.ndarray],\n        ori_img_shape: Tuple[int, int],\n        img_shape: Tuple[int, int],\n    ) -> Tuple[np.ndarray, np.ndarray, List[str]]:\n        self.img_height, self.img_width = ori_img_shape\n        self.input_height, self.input_width = img_shape\n\n        predictions = np.squeeze(output[0]).T\n\n        # Filter out object confidence scores below threshold\n        scores = np.max(predictions[:, 4:], axis=1)\n        predictions = predictions[scores > self.conf_threshold, :]\n        scores = scores[scores > self.conf_threshold]\n\n        if len(scores) == 0:\n            return [], [], []\n\n        # Get the class with the highest confidence\n        class_ids = np.argmax(predictions[:, 4:], axis=1)\n\n        # Get bounding boxes for each object\n        boxes = self.extract_boxes(predictions)\n\n        # Apply non-maxima suppression to suppress weak, overlapping bounding boxes\n        # indices = nms(boxes, scores, self.iou_threshold)\n        indices = multiclass_nms(boxes, scores, class_ids, self.iou_threshold)\n\n        labels = [self.labels[i] for i in class_ids[indices]]\n        return boxes[indices], scores[indices], labels\n\n    def extract_boxes(self, predictions: np.ndarray) -> np.ndarray:\n        # Extract boxes from predictions\n        boxes = predictions[:, :4]\n\n        # Scale boxes to original image dimensions\n        boxes = rescale_boxes(\n            boxes, self.input_width, self.input_height, self.img_width, self.img_height\n        )\n\n        # Convert boxes to xyxy format\n        boxes = xywh2xyxy(boxes)\n\n        return boxes\n"
  },
  {
    "path": "rapid_layout/model_handler/yolov8/pre_process.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom typing import Tuple\n\nimport cv2\nimport numpy as np\n\n\nclass YOLOv8PreProcess:\n    def __init__(self, img_size: Tuple[int, int]):\n        self.img_size = img_size\n\n    def __call__(self, image: np.ndarray) -> np.ndarray:\n        input_img = cv2.resize(image, self.img_size)\n        input_img = input_img / 255.0\n        input_img = input_img.transpose(2, 0, 1)\n        return input_img[np.newaxis, :, :, :].astype(np.float32)\n"
  },
  {
    "path": "rapid_layout/models/.gitkeep",
    "content": ""
  },
  {
    "path": "rapid_layout/models/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\n"
  },
  {
    "path": "rapid_layout/utils/__init__.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\n\n"
  },
  {
    "path": "rapid_layout/utils/download_file.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport logging\nimport sys\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Optional, Union\n\nimport requests\nfrom tqdm import tqdm\n\nfrom .utils import get_file_sha256\n\n\n@dataclass\nclass DownloadFileInput:\n    file_url: str\n    save_path: Union[str, Path]\n    logger: logging.Logger\n    sha256: Optional[str] = None\n\n\nclass DownloadFile:\n    BLOCK_SIZE = 1024  # 1 KiB\n    REQUEST_TIMEOUT = 60\n\n    @classmethod\n    def run(cls, input_params: DownloadFileInput):\n        save_path = Path(input_params.save_path)\n\n        logger = input_params.logger\n        cls._ensure_parent_dir_exists(save_path)\n        if cls._should_skip_download(save_path, input_params.sha256, logger):\n            return\n\n        response = cls._make_http_request(input_params.file_url, logger)\n        cls._save_response_with_progress(response, save_path, logger)\n\n    @staticmethod\n    def _ensure_parent_dir_exists(path: Path):\n        path.parent.mkdir(parents=True, exist_ok=True)\n\n    @classmethod\n    def _should_skip_download(\n        cls, path: Path, expected_sha256: Optional[str], logger: logging.Logger\n    ) -> bool:\n        if not path.exists():\n            return False\n\n        if expected_sha256 is None:\n            logger.info(\"File exists (no checksum verification): %s\", path)\n            return True\n\n        if cls.check_file_sha256(path, expected_sha256):\n            logger.info(\"File exists and is valid: %s\", path)\n            return True\n\n        logger.warning(\"File exists but is invalid, redownloading: %s\", path)\n        return False\n\n    @classmethod\n    def _make_http_request(cls, url: str, logger: logging.Logger) -> requests.Response:\n        logger.info(\"Initiating download: %s\", url)\n        try:\n            response = requests.get(url, stream=True, timeout=cls.REQUEST_TIMEOUT)\n            response.raise_for_status()  # Raises HTTPError for 4XX/5XX\n            return response\n        except requests.RequestException as e:\n            logger.error(\"Download failed: %s\", url)\n            raise DownloadFileException(f\"Failed to download {url}\") from e\n\n    @classmethod\n    def _save_response_with_progress(\n        cls, response: requests.Response, save_path: Path, logger: logging.Logger\n    ) -> None:\n        total_size = int(response.headers.get(\"content-length\", 0))\n        logger.info(\"Download size: %.2fMB\", total_size / 1024 / 1024)\n\n        with tqdm(\n            total=total_size,\n            unit=\"iB\",\n            unit_scale=True,\n            disable=not cls.check_is_atty(),\n        ) as progress_bar:\n            with open(save_path, \"wb\") as output_file:\n                for chunk in response.iter_content(chunk_size=cls.BLOCK_SIZE):\n                    progress_bar.update(len(chunk))\n                    output_file.write(chunk)\n\n        logger.info(\"Successfully saved to: %s\", save_path)\n\n    @staticmethod\n    def check_file_sha256(file_path: Union[str, Path], gt_sha256: str) -> bool:\n        return get_file_sha256(file_path) == gt_sha256\n\n    @staticmethod\n    def check_is_atty() -> bool:\n        try:\n            is_interactive = sys.stderr.isatty()\n        except AttributeError:\n            return False\n        return is_interactive\n\n\nclass DownloadFileException(Exception):\n    pass\n"
  },
  {
    "path": "rapid_layout/utils/load_image.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom io import BytesIO\nfrom pathlib import Path\nfrom typing import Any, Union\n\nimport cv2\nimport numpy as np\nimport requests\nfrom PIL import Image, ImageOps, UnidentifiedImageError\n\nfrom .utils import is_url\n\nroot_dir = Path(__file__).resolve().parent\nInputType = Union[str, np.ndarray, bytes, Path, Image.Image]\n\n\nclass LoadImage:\n    def __init__(self):\n        pass\n\n    def __call__(self, img: InputType) -> np.ndarray:\n        if not isinstance(img, InputType.__args__):\n            raise LoadImageError(\n                f\"The img type {type(img)} does not in {InputType.__args__}\"\n            )\n\n        origin_img_type = type(img)\n        img = self.load_img(img)\n        img = self.convert_img(img, origin_img_type)\n        return img\n\n    def load_img(self, img: InputType) -> np.ndarray:\n        if isinstance(img, (str, Path)):\n            if is_url(str(img)):\n                img = Image.open(requests.get(img, stream=True, timeout=60).raw)\n            else:\n                self.verify_exist(img)\n                img = Image.open(img)\n\n            img = self.exif_transpose(img)\n\n            try:\n                img = self.img_to_ndarray(img)\n            except UnidentifiedImageError as e:\n                raise LoadImageError(f\"cannot identify image file {img}\") from e\n            return img\n\n        if isinstance(img, bytes):\n            img = self.img_to_ndarray(Image.open(BytesIO(img)))\n            return img\n\n        if isinstance(img, np.ndarray):\n            return img\n\n        if isinstance(img, Image.Image):\n            return self.img_to_ndarray(img)\n\n        raise LoadImageError(f\"{type(img)} is not supported!\")\n\n    @staticmethod\n    def verify_exist(file_path: Union[str, Path]):\n        if not Path(file_path).exists():\n            raise LoadImageError(f\"{file_path} does not exist.\")\n\n    @staticmethod\n    def exif_transpose(img: Image.Image) -> Image.Image:\n        try:\n            img_corrected = ImageOps.exif_transpose(img)\n            if img_corrected is None:\n                return img\n            return img_corrected\n        except Exception:\n            return img\n\n    def img_to_ndarray(self, img: Image.Image) -> np.ndarray:\n        if img.mode == \"1\":\n            img = img.convert(\"L\")\n            return np.array(img)\n        return np.array(img)\n\n    def convert_img(self, img: np.ndarray, origin_img_type: Any) -> np.ndarray:\n        if img.ndim == 2:\n            return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)\n\n        if img.ndim == 3:\n            channel = img.shape[2]\n            if channel == 1:\n                return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)\n\n            if channel == 2:\n                return self.cvt_two_to_three(img)\n\n            if channel == 3:\n                if issubclass(origin_img_type, (str, Path, bytes, Image.Image)):\n                    return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)\n                return img\n\n            if channel == 4:\n                return self.cvt_four_to_three(img)\n\n            raise LoadImageError(\n                f\"The channel({channel}) of the img is not in [1, 2, 3, 4]\"\n            )\n\n        raise LoadImageError(f\"The ndim({img.ndim}) of the img is not in [2, 3]\")\n\n    @staticmethod\n    def cvt_two_to_three(img: np.ndarray) -> np.ndarray:\n        \"\"\"gray + alpha → BGR\"\"\"\n        img_gray = img[..., 0]\n        img_bgr = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR)\n\n        img_alpha = img[..., 1]\n        not_a = cv2.bitwise_not(img_alpha)\n        not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR)\n\n        new_img = cv2.bitwise_and(img_bgr, img_bgr, mask=img_alpha)\n        new_img = cv2.add(new_img, not_a)\n        return new_img\n\n    @staticmethod\n    def cvt_four_to_three(img: np.ndarray) -> np.ndarray:\n        \"\"\"自动调整背景颜色，以增强文字对比度\"\"\"\n\n        rgb = img[:, :, :3]  # shape (H, W, 3)\n        alpha = img[:, :, 3]  # shape (H, W)\n\n        # 获取非透明区域的 RGB 像素\n        mask = alpha > 0\n        non_transparent_rgb = rgb[mask]  # shape (N, 3)\n        if non_transparent_rgb.size == 0:\n            # 全透明图像：默认用白色背景\n            bg_color = (255, 255, 255)\n        else:\n            # 使用加权灰度公式计算亮度均值\n            # luminance = 0.299*R + 0.587*G + 0.114*B\n            r, g, b = (\n                non_transparent_rgb[:, 0],\n                non_transparent_rgb[:, 1],\n                non_transparent_rgb[:, 2],\n            )\n            luminance = 0.299 * r + 0.587 * g + 0.114 * b\n            avg_luminance = np.mean(luminance)\n\n            # 根据平均亮度选择高对比度背景\n            bg_color = (255, 255, 255) if avg_luminance < 128 else (0, 0, 0)\n\n        # 构建背景图像\n        background = np.full_like(rgb, bg_color, dtype=np.uint8)\n\n        # 合成：前景 = rgb * (alpha/255), 背景 = bg * (1 - alpha/255)\n        alpha_norm = alpha.astype(np.float32) / 255.0\n        foreground_blend = rgb.astype(np.float32) * alpha_norm[..., None]\n        background_blend = background.astype(np.float32) * (1.0 - alpha_norm)[..., None]\n\n        blended = (foreground_blend + background_blend).astype(np.uint8)\n\n        return cv2.cvtColor(blended, cv2.COLOR_RGB2BGR)\n\n\nclass LoadImageError(Exception):\n    pass\n"
  },
  {
    "path": "rapid_layout/utils/logger.py",
    "content": "# -*- encoding: utf-8 -*-\nimport logging\n\nimport colorlog\n\n\nclass Logger:\n    def __init__(self, log_level=logging.INFO, logger_name=None):\n        self.logger = logging.getLogger(logger_name)\n        self.logger.setLevel(log_level)\n        self.logger.propagate = False\n\n        formatter = colorlog.ColoredFormatter(\n            f\"%(log_color)s[%(levelname)s] %(asctime)s [{logger_name}] %(filename)s:%(lineno)d: %(message)s\",\n            log_colors={\n                \"DEBUG\": \"cyan\",\n                \"INFO\": \"green\",\n                \"WARNING\": \"yellow\",\n                \"ERROR\": \"red\",\n                \"CRITICAL\": \"red,bg_white\",\n            },\n        )\n\n        if not self.logger.handlers:\n            console_handler = logging.StreamHandler()\n            console_handler.setFormatter(formatter)\n\n            for handler in self.logger.handlers:\n                self.logger.removeHandler(handler)\n\n            console_handler.setLevel(log_level)\n            self.logger.addHandler(console_handler)\n\n    def get_log(self):\n        return self.logger\n\n\nlogger = Logger(log_level=logging.INFO, logger_name=\"RapidLayout\").get_log()\n"
  },
  {
    "path": "rapid_layout/utils/typings.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport dataclasses\nfrom dataclasses import dataclass, field\nfrom enum import Enum\nfrom pathlib import Path\nfrom typing import List, Optional, Union\n\nimport numpy as np\n\nfrom .logger import Logger\nfrom .utils import save_img\n\nlogger = Logger(logger_name=__name__).get_log()\n\n\nclass ModelType(Enum):\n    PP_LAYOUT_CDLA = \"pp_layout_cdla\"\n    PP_LAYOUT_PUBLAYNET = \"pp_layout_publaynet\"\n    PP_LAYOUT_TABLE = \"pp_layout_table\"\n    YOLOV8N_LAYOUT_PAPER = \"yolov8n_layout_paper\"\n    YOLOV8N_LAYOUT_REPORT = \"yolov8n_layout_report\"\n    YOLOV8N_LAYOUT_PUBLAYNET = \"yolov8n_layout_publaynet\"\n    YOLOV8N_LAYOUT_GENERAL6 = \"yolov8n_layout_general6\"\n    DOCLAYOUT_DOCSTRUCTBENCH = \"doclayout_docstructbench\"\n    DOCLAYOUT_D4LA = \"doclayout_d4la\"\n    DOCLAYOUT_DOCSYNTH = \"doclayout_docsynth\"\n    PP_DOC_LAYOUTV2 = \"pp_doc_layoutv2\"\n    PP_DOC_LAYOUTV3 = \"pp_doc_layoutv3\"\n\n\nclass EngineType(Enum):\n    ONNXRUNTIME = \"onnxruntime\"\n    OPENVINO = \"openvino\"\n\n\n@dataclass\nclass RapidLayoutInput:\n    model_type: ModelType = ModelType.PP_LAYOUT_CDLA\n    model_dir_or_path: Union[str, Path, None] = None\n\n    engine_type: EngineType = EngineType.ONNXRUNTIME\n    engine_cfg: dict = field(default_factory=dict)\n\n    conf_thresh: float = 0.5\n    iou_thresh: float = 0.5\n\n    @classmethod\n    def normalize_kwargs(cls, kwargs: dict) -> dict:\n        \"\"\"只保留本 dataclass 的字段，并将 model_type/engine_type 从 str 转为枚举。\"\"\"\n        valid = {f.name for f in dataclasses.fields(cls)}\n        filtered = {k: v for k, v in kwargs.items() if k in valid}\n        if \"model_type\" in filtered and isinstance(filtered[\"model_type\"], str):\n            filtered[\"model_type\"] = ModelType(filtered[\"model_type\"])\n        if \"engine_type\" in filtered and isinstance(filtered[\"engine_type\"], str):\n            filtered[\"engine_type\"] = EngineType(filtered[\"engine_type\"])\n        return filtered\n\n\n@dataclass\nclass RapidLayoutOutput:\n    img: Optional[np.ndarray] = None\n    boxes: Optional[List[List[float]]] = None\n    class_names: Optional[List[str]] = None\n    scores: Optional[List[float]] = None\n    elapse: Optional[float] = None\n\n    def vis(self, save_path: Union[str, Path, None] = None) -> Optional[np.ndarray]:\n        if self.img is None or self.boxes is None:\n            logger.warning(\"No image or boxes to visualize.\")\n            return None\n\n        from .vis_res import VisLayout\n\n        vis_img = VisLayout.draw_detections(\n            self.img,\n            np.array(self.boxes),\n            np.array(self.scores),\n            np.array(self.class_names),\n        )\n        if save_path is not None and vis_img is not None:\n            save_img(save_path, vis_img)\n            logger.info(f\"Visualization saved as {save_path}\")\n\n        return vis_img\n"
  },
  {
    "path": "rapid_layout/utils/utils.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport hashlib\nimport importlib\nfrom pathlib import Path\nfrom typing import Tuple, Union\nfrom urllib.parse import urlparse\n\nimport cv2\nimport numpy as np\nfrom omegaconf import DictConfig, OmegaConf\n\n\ndef mkdir(dir_path):\n    Path(dir_path).mkdir(parents=True, exist_ok=True)\n\n\ndef read_yaml(file_path: Union[str, Path]) -> DictConfig:\n    return OmegaConf.load(file_path)\n\n\ndef quads_to_rect_bbox(bbox: np.ndarray) -> Tuple[float, float, float, float]:\n    if bbox.ndim != 3:\n        raise ValueError(\"bbox shape must be 3\")\n\n    if bbox.shape[1] != 4 and bbox.shape[2] != 2:\n        raise ValueError(\"bbox shape must be (N, 4, 2)\")\n\n    all_x, all_y = (bbox[:, :, 0].flatten(), bbox[:, :, 1].flatten())\n    x_min, y_min = np.min(all_x), np.min(all_y)\n    x_max, y_max = np.max(all_x), np.max(all_y)\n    return float(x_min), float(y_min), float(x_max), float(y_max)\n\n\ndef has_chinese_char(text: str) -> bool:\n    return any(\"\\u4e00\" <= ch <= \"\\u9fff\" for ch in text)\n\n\ndef get_file_sha256(file_path: Union[str, Path], chunk_size: int = 65536) -> str:\n    with open(file_path, \"rb\") as file:\n        sha_signature = hashlib.sha256()\n        while True:\n            chunk = file.read(chunk_size)\n            if not chunk:\n                break\n            sha_signature.update(chunk)\n\n    return sha_signature.hexdigest()\n\n\ndef save_img(save_path: Union[str, Path], img: np.ndarray):\n    if not Path(save_path).parent.exists():\n        Path(save_path).parent.mkdir(parents=True, exist_ok=True)\n\n    cv2.imwrite(str(save_path), img)\n\n\ndef is_url(url: str) -> bool:\n    try:\n        result = urlparse(url)\n        return all([result.scheme, result.netloc])\n    except Exception:\n        return False\n\n\ndef import_package(name, package=None):\n    try:\n        module = importlib.import_module(name, package=package)\n        return module\n    except ModuleNotFoundError:\n        return None\n"
  },
  {
    "path": "rapid_layout/utils/vis_res.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nfrom typing import Optional, Tuple\n\nimport cv2\nimport numpy as np\n\n\nclass VisLayout:\n    @classmethod\n    def draw_detections(\n        cls,\n        image: np.ndarray,\n        boxes: Optional[np.ndarray],\n        scores: Optional[np.ndarray],\n        class_names: Optional[np.ndarray],\n        mask_alpha=0.3,\n    ) -> Optional[np.ndarray]:\n        \"\"\"_summary_\n\n        Args:\n            image (np.ndarray): H x W x C\n            boxes (np.ndarray): (N, 4)\n            scores (np.ndarray): (N, )\n            class_ids (np.ndarray): (N, )\n            mask_alpha (float, optional): _description_. Defaults to 0.3.\n\n        Returns:\n            np.ndarray: _description_\n        \"\"\"\n        if boxes is None or scores is None or class_names is None:\n            return None\n\n        det_img = image.copy()\n\n        img_height, img_width = image.shape[:2]\n        font_size = min([img_height, img_width]) * 0.0006\n        text_thickness = int(min([img_height, img_width]) * 0.001)\n\n        det_img = cls.draw_masks(det_img, boxes, mask_alpha)\n\n        for label, box, score in zip(class_names, boxes, scores):\n            color = cls.get_color()\n\n            cls.draw_box(det_img, box, color)\n            caption = f\"{label} {int(score * 100)}%\"\n            cls.draw_text(det_img, caption, box, color, font_size, text_thickness)\n\n        return det_img\n\n    @staticmethod\n    def draw_box(\n        image: np.ndarray,\n        box: np.ndarray,\n        color: Tuple[int, int, int] = (0, 0, 255),\n        thickness: int = 2,\n    ) -> np.ndarray:\n        x1, y1, x2, y2 = box.astype(int)\n        return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)\n\n    @staticmethod\n    def draw_text(\n        image: np.ndarray,\n        text: str,\n        box: np.ndarray,\n        color: Tuple[int, int, int] = (0, 0, 255),\n        font_size: float = 0.001,\n        text_thickness: int = 2,\n    ) -> np.ndarray:\n        x1, y1, x2, y2 = box.astype(int)\n        (tw, th), _ = cv2.getTextSize(\n            text=text,\n            fontFace=cv2.FONT_HERSHEY_SIMPLEX,\n            fontScale=font_size,\n            thickness=text_thickness,\n        )\n        th = int(th * 1.2)\n\n        cv2.rectangle(image, (x1, y1), (x1 + tw, y1 - th), color, -1)\n\n        return cv2.putText(\n            image,\n            text,\n            (x1, y1),\n            cv2.FONT_HERSHEY_SIMPLEX,\n            font_size,\n            (255, 255, 255),\n            text_thickness,\n            cv2.LINE_AA,\n        )\n\n    @classmethod\n    def draw_masks(\n        cls,\n        image: np.ndarray,\n        boxes: np.ndarray,\n        mask_alpha: float = 0.3,\n    ) -> np.ndarray:\n        mask_img = image.copy()\n        for box in boxes:\n            color = cls.get_color()\n            x1, y1, x2, y2 = box.astype(int)\n            cv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)\n\n        return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)\n\n    @staticmethod\n    def get_color():\n        colors = (\n            np.random.randint(0, 255),\n            np.random.randint(0, 255),\n            np.random.randint(0, 255),\n        )\n        return colors\n"
  },
  {
    "path": "requirements.txt",
    "content": "opencv_python>=4.5.1.48\nnumpy>=2.0.0\nPillow\ntqdm\nrequests\ncolorlog\nomegaconf\n"
  },
  {
    "path": "setup.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport sys\nfrom pathlib import Path\nfrom typing import List, Union\n\nfrom get_pypi_latest_version import GetPyPiLatestVersion\nfrom setuptools import find_packages, setup\n\n\ndef read_txt(txt_path: Union[Path, str]) -> List[str]:\n    with open(txt_path, \"r\", encoding=\"utf-8\") as f:\n        data = [v.rstrip(\"\\n\") for v in f]\n    return data\n\n\ndef get_readme():\n    root_dir = Path(__file__).resolve().parent\n    readme_path = str(root_dir / \"docs\" / \"doc_whl_rapid_layout.md\")\n    with open(readme_path, \"r\", encoding=\"utf-8\") as f:\n        readme = f.read()\n    return readme\n\n\nMODULE_NAME = \"rapid_layout\"\nobtainer = GetPyPiLatestVersion()\nlatest_version = obtainer(MODULE_NAME)\nVERSION_NUM = obtainer.version_add_one(latest_version, add_patch=True)\n\nif len(sys.argv) > 2:\n    match_str = \" \".join(sys.argv[2:])\n    matched_versions = obtainer.extract_version(match_str)\n    if matched_versions:\n        VERSION_NUM = matched_versions\nsys.argv = sys.argv[:2]\n\nsetup(\n    name=MODULE_NAME,\n    version=VERSION_NUM,\n    platforms=\"Any\",\n    long_description=get_readme(),\n    long_description_content_type=\"text/markdown\",\n    description=\"Tools for document layout analysis based ONNXRuntime.\",\n    author=\"SWHL\",\n    author_email=\"liekkaskono@163.com\",\n    url=\"https://github.com/RapidAI/RapidLayout\",\n    license=\"Apache-2.0\",\n    include_package_data=True,\n    install_requires=read_txt(\"requirements.txt\"),\n    packages=find_packages(),\n    package_data={\"\": [\"*.onnx\", \"*.yaml\"]},\n    keywords=[\"ppstructure,layout,rapidocr,rapid_layout\"],\n    classifiers=[\n        \"Programming Language :: Python :: 3.6\",\n        \"Programming Language :: Python :: 3.7\",\n        \"Programming Language :: Python :: 3.8\",\n        \"Programming Language :: Python :: 3.9\",\n        \"Programming Language :: Python :: 3.10\",\n        \"Programming Language :: Python :: 3.11\",\n        \"Programming Language :: Python :: 3.12\",\n        \"Programming Language :: Python :: 3.13\",\n    ],\n    python_requires=\">=3.6\",\n    entry_points={\"console_scripts\": [f\"{MODULE_NAME}={MODULE_NAME}.main:main\"]},\n)\n"
  },
  {
    "path": "tests/test_engine.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport sys\nfrom pathlib import Path\n\nimport pytest\n\ncur_dir = Path(__file__).resolve().parent\nroot_dir = cur_dir.parent\n\nsys.path.insert(0, str(root_dir))\n\nfrom rapid_layout import EngineType, ModelType, RapidLayout, RapidLayoutInput\n\ntest_dir = cur_dir / \"test_files\"\n\n# 与 test_main.py 保持一致：(图片名, 模型类型, 期望检测框数量)\nENGINE_TEST_CASES = [\n    (\"layout.jpg\", \"pp_layout_cdla\", 14),\n    (\"PMC3576793_00004.jpg\", \"yolov8n_layout_publaynet\", 12),\n    (\"PMC3576793_00004.jpg\", \"yolov8n_layout_general6\", 13),\n    (\"PMC3576793_00004.jpg\", \"doclayout_docstructbench\", 14),\n]\n\n\ndef get_engine(params: RapidLayoutInput):\n    return RapidLayout(cfg=params)\n\n\n@pytest.mark.parametrize(\"img_name, model_type, gt\", ENGINE_TEST_CASES)\ndef test_engine_onnxruntime(img_name, model_type, gt):\n    \"\"\"使用 onnxruntime 引擎推理，结果与 test_main 预期一致。\"\"\"\n    params = RapidLayoutInput(\n        model_type=ModelType(model_type),\n        engine_type=EngineType.ONNXRUNTIME,\n    )\n    engine = get_engine(params)\n    img_path = test_dir / img_name\n    results = engine(img_path)\n\n    assert results.boxes is not None\n    assert len(results.boxes) == gt\n\n\n@pytest.mark.parametrize(\"img_name, model_type, gt\", ENGINE_TEST_CASES)\ndef test_engine_openvino(img_name, model_type, gt):\n    \"\"\"使用 openvino 引擎推理，结果与 test_main 预期一致。\"\"\"\n    pytest.importorskip(\n        \"openvino\", reason=\"openvino not installed, skip openvino tests\"\n    )\n    params = RapidLayoutInput(\n        model_type=ModelType(model_type),\n        engine_type=EngineType.OPENVINO,\n    )\n    engine = get_engine(params)\n    img_path = test_dir / img_name\n    results = engine(img_path)\n\n    assert results.boxes is not None\n    assert len(results.boxes) == gt\n"
  },
  {
    "path": "tests/test_main.py",
    "content": "# -*- encoding: utf-8 -*-\n# @Author: SWHL\n# @Contact: liekkaskono@163.com\nimport shlex\nimport sys\nfrom pathlib import Path\nfrom typing import Optional\n\nimport pytest\n\ncur_dir = Path(__file__).resolve().parent\nroot_dir = cur_dir.parent\n\nsys.path.append(str(root_dir))\n\nfrom rapid_layout import ModelType, RapidLayout, RapidLayoutInput\nfrom rapid_layout.main import main\n\ntest_dir = cur_dir / \"test_files\"\n\n\ndef get_engine(params: Optional[RapidLayoutInput] = None):\n    if params:\n        engine = RapidLayout(cfg=params)\n        return engine\n\n    engine = RapidLayout()\n    return engine\n\n\n@pytest.mark.parametrize(\n    \"img_name,model_type,gt\",\n    [\n        (\"layout.jpg\", \"pp_layout_cdla\", 14),\n        (\"PMC3576793_00004.jpg\", \"yolov8n_layout_publaynet\", 12),\n        (\"PMC3576793_00004.jpg\", \"yolov8n_layout_general6\", 13),\n        (\"PMC3576793_00004.jpg\", \"doclayout_docstructbench\", 14),\n        (\"pp_doc_layoutv2_layout.jpg\", \"pp_doc_layoutv2\", 13),\n        (\"pp_doc_layoutv2_layout.jpg\", \"pp_doc_layoutv3\", 13),\n    ],\n)\ndef test_normal(img_name, model_type, gt):\n    img_path = test_dir / img_name\n    engine = get_engine(params=RapidLayoutInput(model_type=ModelType(model_type)))\n    results = engine(img_path)\n    assert results.boxes is not None\n    assert len(results.boxes) == gt\n\n\n@pytest.mark.parametrize(\n    \"command, expected_output\",\n    [\n        (f\"{test_dir / 'layout.jpg'} --model_type pp_layout_cdla\", 0),\n    ],\n)\ndef test_main_cli(capsys, command, expected_output):\n    main(shlex.split(command))\n    output = capsys.readouterr().out.rstrip()\n    assert len(output) > expected_output\n\n\ndef test_init_with_kwargs():\n    \"\"\"仅用关键字参数构造，不传 cfg。\"\"\"\n    engine = RapidLayout(model_type=ModelType.PP_LAYOUT_CDLA, conf_thresh=0.5)\n    img_path = test_dir / \"layout.jpg\"\n    results = engine(img_path)\n    assert results.boxes is not None\n    assert len(results.boxes) == 14\n\n\ndef test_init_with_kwargs_model_type_string():\n    \"\"\"kwargs 中 model_type 传字符串，应被正确转为枚举。\"\"\"\n    engine = RapidLayout(model_type=\"pp_layout_cdla\", conf_thresh=0.5)\n    img_path = test_dir / \"layout.jpg\"\n    results = engine(img_path)\n    assert results.boxes is not None\n    assert len(results.boxes) == 14\n\n\ndef test_init_with_cfg():\n    \"\"\"仅用配置对象构造。\"\"\"\n    cfg = RapidLayoutInput(model_type=ModelType.PP_LAYOUT_CDLA, conf_thresh=0.5)\n    engine = RapidLayout(cfg=cfg)\n    img_path = test_dir / \"layout.jpg\"\n    results = engine(img_path)\n    assert results.boxes is not None\n    assert len(results.boxes) == 14\n\n\ndef test_init_with_cfg_and_kwargs_override():\n    \"\"\"传入 cfg 的同时用 kwargs 覆盖部分字段。\"\"\"\n    cfg = RapidLayoutInput(model_type=ModelType.PP_LAYOUT_CDLA, conf_thresh=0.5)\n    engine = RapidLayout(cfg=cfg, conf_thresh=0.4)\n    img_path = test_dir / \"layout.jpg\"\n    results = engine(img_path)\n    assert results.boxes is not None\n    assert len(results.boxes) == 15\n"
  }
]