[
  {
    "path": ".github/workflows/ci.yaml",
    "content": "# This is a basic workflow to help you get started with Actions\n\nname: CI\n\n# Controls when the action will run. Triggers the workflow on push or pull request\n# events but only for the master branch\non:\n  push:\n  pull_request:\n\n# A workflow run is made up of one or more jobs that can run sequentially or in parallel\njobs:\n  # This workflow contains a single job called \"build\"\n  build:\n    # The type of runner that the job will run on\n    runs-on: ubuntu-22.04\n    strategy:\n      matrix:\n        python-version: [\"3.8\", \"3.9\", \"3.10\"]\n\n    # Steps represent a sequence of tasks that will be executed as part of the job\n    steps:\n    # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it\n    - uses: actions/checkout@v2\n\n    - name: Set up Python ${{ matrix.python-version }}\n      uses: actions/setup-python@v1\n      with:\n        python-version: ${{ matrix.python-version }}\n\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        pip install -r requirements.txt\n        pip install isort==4.3.21\n        pip install flake8==3.8.3\n        pip install \"importlib-metadata<5.0\"\n    # Runs a set of commands using the runners shell\n    - name: Format check\n      run: ./.github/workflows/format_check.sh\n"
  },
  {
    "path": ".github/workflows/format_check.sh",
    "content": "#!/bin/bash -e\n\nset -e\n\nexport PYTHONPATH=$PWD:$PYTHONPATH\n\nflake8 yolox exps tools || flake8_ret=$?\nif [ \"$flake8_ret\" ]; then\n    exit $flake8_ret\nfi\necho \"All flake check passed!\"\nisort --check-only -rc yolox exps || isort_ret=$?\nif [ \"$isort_ret\" ]; then\n    exit $isort_ret\nfi\necho \"All isort check passed!\"\n"
  },
  {
    "path": ".gitignore",
    "content": "### Linux ###\n*~\n\n# user experiments directory\nYOLOX_outputs/\ndatasets/\n# do not ignore datasets under yolox/data\n!*yolox/data/datasets/\n\n# temporary files which can be created if a process still has a handle open of a deleted file\n.fuse_hidden*\n\n# KDE directory preferences\n.directory\n\n# Linux trash folder which might appear on any partition or disk\n.Trash-*\n\n# .nfs files are created when an open file is removed but is still being accessed\n.nfs*\n\n### PyCharm ###\n# User-specific stuff\n.idea\n\n# CMake\ncmake-build-*/\n\n# Mongo Explorer plugin\n.idea/**/mongoSettings.xml\n\n# File-based project format\n*.iws\n\n# IntelliJ\nout/\n\n# mpeltonen/sbt-idea plugin\n.idea_modules/\n\n# JIRA plugin\natlassian-ide-plugin.xml\n\n# Cursive Clojure plugin\n.idea/replstate.xml\n\n# Crashlytics plugin (for Android Studio and IntelliJ)\ncom_crashlytics_export_strings.xml\ncrashlytics.properties\ncrashlytics-build.properties\nfabric.properties\n\n# Editor-based Rest Client\n.idea/httpRequests\n\n# Android studio 3.1+ serialized cache file\n.idea/caches/build_file_checksums.ser\n\n# JetBrains templates\n**___jb_tmp___\n\n### Python ###\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\npip-wheel-metadata/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\ndocs/build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n.python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don’t work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n### Vim ###\n# Swap\n[._]*.s[a-v][a-z]\n[._]*.sw[a-p]\n[._]s[a-rt-v][a-z]\n[._]ss[a-gi-z]\n[._]sw[a-p]\n\n# Session\nSession.vim\n\n# Temporary\n.netrwhist\n# Auto-generated tag files\ntags\n# Persistent undo\n[._]*.un~\n\n# output\ndocs/api\n.code-workspace.code-workspace\n*.pkl\n*.npy\n*.pth\n*.onnx\n*.engine\nevents.out.tfevents*\n\n# vscode\n*.code-workspace\n.vscode\n\n# vim\n.vim\n\n# OS generated files\n.DS_Store\n.DS_Store?\n.Trashes\nehthumbs.db\nThumbs.db\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "repos:\n  - repo: https://github.com/pycqa/flake8\n    rev: 3.8.3\n    hooks:\n      - id: flake8\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v3.1.0\n    hooks:\n      - id: check-added-large-files\n      - id: check-docstring-first\n      - id: check-executables-have-shebangs\n      - id: check-json\n      - id: check-yaml\n        args: [\"--unsafe\"]\n      - id: debug-statements\n      - id: end-of-file-fixer\n      - id: requirements-txt-fixer\n      - id: trailing-whitespace\n  - repo: https://github.com/jorisroovers/gitlint\n    rev: v0.15.1\n    hooks:\n      - id: gitlint\n  - repo: https://github.com/pycqa/isort\n    rev: 4.3.21\n    hooks:\n      - id: isort\n\n  - repo: https://github.com/PyCQA/autoflake\n    rev: v1.4\n    hooks:\n      - id: autoflake\n        name: Remove unused variables and imports\n        entry: autoflake\n        language: python\n        args:\n          [\n            \"--in-place\",\n            \"--remove-all-unused-imports\",\n            \"--remove-unused-variables\",\n            \"--expand-star-imports\",\n            \"--ignore-init-module-imports\",\n          ]\n        files: \\.py$\n"
  },
  {
    "path": ".readthedocs.yaml",
    "content": "# .readthedocs.yaml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Required\nversion: 2\n\n# Build documentation in the docs/ directory with Sphinx\nsphinx:\n   configuration: docs/conf.py\n\n# Optionally build your docs in additional formats such as PDF\nformats:\n   - pdf\n\n# Optionally set the version of Python and requirements required to build your docs\npython:\n   version: \"3.7\"\n   install:\n   - requirements: docs/requirements-doc.txt\n   - requirements: requirements.txt\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"{}\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright (c) 2021-2022 Megvii Inc. All rights reserved.\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "include requirements.txt\nrecursive-include yolox *.cpp *.h *.cu *.cuh *.cc\n"
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\"><img src=\"assets/logo.png\" width=\"350\"></div>\n<img src=\"assets/demo.png\" >\n\n## Introduction\nYOLOX is an anchor-free version of YOLO, with a simpler design but better performance! It aims to bridge the gap between research and industrial communities.\nFor more details, please refer to our [report on Arxiv](https://arxiv.org/abs/2107.08430).\n\nThis repo is an implementation of PyTorch version YOLOX, there is also a [MegEngine implementation](https://github.com/MegEngine/YOLOX).\n\n<img src=\"assets/git_fig.png\" width=\"1000\" >\n\n## Updates!!\n* 【2023/02/28】 We support assignment visualization tool, see doc [here](./docs/assignment_visualization.md).\n* 【2022/04/14】 We support jit compile op.\n* 【2021/08/19】 We optimize the training process with **2x** faster training and **~1%** higher performance! See [notes](docs/updates_note.md) for more details.\n* 【2021/08/05】 We release [MegEngine version YOLOX](https://github.com/MegEngine/YOLOX).\n* 【2021/07/28】 We fix the fatal error of [memory leak](https://github.com/Megvii-BaseDetection/YOLOX/issues/103)\n* 【2021/07/26】 We now support [MegEngine](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/MegEngine) deployment.\n* 【2021/07/20】 We have released our technical report on [Arxiv](https://arxiv.org/abs/2107.08430).\n\n## Benchmark\n\n#### Standard Models.\n\n|Model |size |mAP<sup>val<br>0.5:0.95 |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |\n| ------        |:---: | :---:    | :---:       |:---:     |:---:  | :---: | :----: |\n|[YOLOX-s](./exps/default/yolox_s.py)    |640  |40.5 |40.5      |9.8      |9.0 | 26.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth) |\n|[YOLOX-m](./exps/default/yolox_m.py)    |640  |46.9 |47.2      |12.3     |25.3 |73.8| [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth) |\n|[YOLOX-l](./exps/default/yolox_l.py)    |640  |49.7 |50.1      |14.5     |54.2| 155.6 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.pth) |\n|[YOLOX-x](./exps/default/yolox_x.py)   |640   |51.1 |**51.5**  | 17.3    |99.1 |281.9 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x.pth) |\n|[YOLOX-Darknet53](./exps/default/yolov3.py)   |640  | 47.7 | 48.0 | 11.1 |63.7 | 185.3 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_darknet.pth) |\n\n<details>\n<summary>Legacy models</summary>\n\n|Model |size |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |\n| ------        |:---: | :---:       |:---:     |:---:  | :---: | :----: |\n|[YOLOX-s](./exps/default/yolox_s.py)    |640  |39.6      |9.8     |9.0 | 26.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EW62gmO2vnNNs5npxjzunVwB9p307qqygaCkXdTO88BLUg?e=NMTQYw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.pth) |\n|[YOLOX-m](./exps/default/yolox_m.py)    |640  |46.4      |12.3     |25.3 |73.8| [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERMTP7VFqrVBrXKMU7Vl4TcBQs0SUeCT7kvc-JdIbej4tQ?e=1MDo9y)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m.pth) |\n|[YOLOX-l](./exps/default/yolox_l.py)    |640  |50.0  |14.5 |54.2| 155.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EWA8w_IEOzBKvuueBqfaZh0BeoG5sVzR-XYbOJO4YlOkRw?e=wHWOBE)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l.pth) |\n|[YOLOX-x](./exps/default/yolox_x.py)   |640  |**51.2**      | 17.3 |99.1 |281.9 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdgVPHBziOVBtGAXHfeHI5kBza0q9yyueMGdT0wXZfI1rQ?e=tABO5u)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x.pth) |\n|[YOLOX-Darknet53](./exps/default/yolov3.py)   |640  | 47.4      | 11.1 |63.7 | 185.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZ-MV1r_fMFPkPrNjvbJEMoBLOLAnXH-XKEB77w8LhXL6Q?e=mf6wOc)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.pth) |\n\n</details>\n\n#### Light Models.\n\n|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |\n| ------        |:---:  |  :---:       |:---:     |:---:  | :---: |\n|[YOLOX-Nano](./exps/default/yolox_nano.py) |416  |25.8  | 0.91 |1.08 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano.pth) |\n|[YOLOX-Tiny](./exps/default/yolox_tiny.py) |416  |32.8 | 5.06 |6.45 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny.pth) |\n\n\n<details>\n<summary>Legacy models</summary>\n\n|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |\n| ------        |:---:  |  :---:       |:---:     |:---:  | :---: |\n|[YOLOX-Nano](./exps/default/yolox_nano.py) |416  |25.3  | 0.91 |1.08 | [github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano.pth) |\n|[YOLOX-Tiny](./exps/default/yolox_tiny.py) |416  |32.8 | 5.06 |6.45 | [github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_32dot8.pth) |\n\n</details>\n\n## Quick Start\n\n<details>\n<summary>Installation</summary>\n\nStep1. Install YOLOX from source.\n```shell\ngit clone git@github.com:Megvii-BaseDetection/YOLOX.git\ncd YOLOX\npip3 install -v -e .  # or  python3 setup.py develop\n```\n\n</details>\n\n<details>\n<summary>Demo</summary>\n\nStep1. Download a pretrained model from the benchmark table.\n\nStep2. Use either -n or -f to specify your detector's config. For example:\n\n```shell\npython tools/demo.py image -n yolox-s -c /path/to/your/yolox_s.pth --path assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result --device [cpu/gpu]\n```\nor\n```shell\npython tools/demo.py image -f exps/default/yolox_s.py -c /path/to/your/yolox_s.pth --path assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result --device [cpu/gpu]\n```\nDemo for video:\n```shell\npython tools/demo.py video -n yolox-s -c /path/to/your/yolox_s.pth --path /path/to/your/video --conf 0.25 --nms 0.45 --tsize 640 --save_result --device [cpu/gpu]\n```\n\n\n</details>\n\n<details>\n<summary>Reproduce our results on COCO</summary>\n\nStep1. Prepare COCO dataset\n```shell\ncd <YOLOX_HOME>\nln -s /path/to/your/COCO ./datasets/COCO\n```\n\nStep2. Reproduce our results on COCO by specifying -n:\n\n```shell\npython -m yolox.tools.train -n yolox-s -d 8 -b 64 --fp16 -o [--cache]\n                               yolox-m\n                               yolox-l\n                               yolox-x\n```\n* -d: number of gpu devices\n* -b: total batch size, the recommended number for -b is num-gpu * 8\n* --fp16: mixed precision training\n* --cache: caching imgs into RAM to accelarate training, which need large system RAM.\n\n\n\nWhen using -f, the above commands are equivalent to:\n```shell\npython -m yolox.tools.train -f exps/default/yolox_s.py -d 8 -b 64 --fp16 -o [--cache]\n                               exps/default/yolox_m.py\n                               exps/default/yolox_l.py\n                               exps/default/yolox_x.py\n```\n\n**Multi Machine Training**\n\nWe also support multi-nodes training. Just add the following args:\n* --num\\_machines: num of your total training nodes\n* --machine\\_rank: specify the rank of each node\n\nSuppose you want to train YOLOX on 2 machines, and your master machines's IP is 123.123.123.123, use port 12312 and TCP.\n\nOn master machine, run\n```shell\npython tools/train.py -n yolox-s -b 128 --dist-url tcp://123.123.123.123:12312 --num_machines 2 --machine_rank 0\n```\nOn the second machine, run\n```shell\npython tools/train.py -n yolox-s -b 128 --dist-url tcp://123.123.123.123:12312 --num_machines 2 --machine_rank 1\n```\n\n**Logging to Weights & Biases**\n\nTo log metrics, predictions and model checkpoints to [W&B](https://docs.wandb.ai/guides/integrations/other/yolox) use the command line argument `--logger wandb` and use the prefix \"wandb-\" to specify arguments for initializing the wandb run.\n\n```shell\npython tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o [--cache] --logger wandb wandb-project <project name>\n                         yolox-m\n                         yolox-l\n                         yolox-x\n```\n\nAn example wandb dashboard is available [here](https://wandb.ai/manan-goel/yolox-nano/runs/3pzfeom0)\n\n**Others**\n\nSee more information with the following command:\n```shell\npython -m yolox.tools.train --help\n```\n\n</details>\n\n\n<details>\n<summary>Evaluation</summary>\n\nWe support batch testing for fast evaluation:\n\n```shell\npython -m yolox.tools.eval -n  yolox-s -c yolox_s.pth -b 64 -d 8 --conf 0.001 [--fp16] [--fuse]\n                               yolox-m\n                               yolox-l\n                               yolox-x\n```\n* --fuse: fuse conv and bn\n* -d: number of GPUs used for evaluation. DEFAULT: All GPUs available will be used.\n* -b: total batch size across on all GPUs\n\nTo reproduce speed test, we use the following command:\n```shell\npython -m yolox.tools.eval -n  yolox-s -c yolox_s.pth -b 1 -d 1 --conf 0.001 --fp16 --fuse\n                               yolox-m\n                               yolox-l\n                               yolox-x\n```\n\n</details>\n\n\n<details>\n<summary>Tutorials</summary>\n\n*  [Training on custom data](docs/train_custom_data.md)\n*  [Caching for custom data](docs/cache.md)\n*  [Manipulating training image size](docs/manipulate_training_image_size.md)\n*  [Assignment visualization](docs/assignment_visualization.md)\n*  [Freezing model](docs/freeze_module.md)\n\n</details>\n\n## Deployment\n\n\n1. [MegEngine in C++ and Python](./demo/MegEngine)\n2. [ONNX export and an ONNXRuntime](./demo/ONNXRuntime)\n3. [TensorRT in C++ and Python](./demo/TensorRT)\n4. [ncnn in C++ and Java](./demo/ncnn)\n5. [OpenVINO in C++ and Python](./demo/OpenVINO)\n6. [Accelerate YOLOX inference with nebullvm in Python](./demo/nebullvm)\n\n## Third-party resources\n* YOLOX for streaming perception: [StreamYOLO (CVPR 2022 Oral)](https://github.com/yancie-yjr/StreamYOLO)\n* The YOLOX-s and YOLOX-nano are Integrated into [ModelScope](https://www.modelscope.cn/home). Try out the Online Demo at [YOLOX-s](https://www.modelscope.cn/models/damo/cv_cspnet_image-object-detection_yolox/summary) and [YOLOX-Nano](https://www.modelscope.cn/models/damo/cv_cspnet_image-object-detection_yolox_nano_coco/summary) respectively 🚀.\n* Integrated into [Huggingface Spaces 🤗](https://huggingface.co/spaces) using [Gradio](https://github.com/gradio-app/gradio). Try out the Web Demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Sultannn/YOLOX-Demo)\n* The ncnn android app with video support: [ncnn-android-yolox](https://github.com/FeiGeChuanShu/ncnn-android-yolox) from [FeiGeChuanShu](https://github.com/FeiGeChuanShu)\n* YOLOX with Tengine support: [Tengine](https://github.com/OAID/Tengine/blob/tengine-lite/examples/tm_yolox.cpp) from [BUG1989](https://github.com/BUG1989)\n* YOLOX + ROS2 Foxy: [YOLOX-ROS](https://github.com/Ar-Ray-code/YOLOX-ROS) from [Ar-Ray](https://github.com/Ar-Ray-code)\n* YOLOX Deploy DeepStream: [YOLOX-deepstream](https://github.com/nanmi/YOLOX-deepstream) from [nanmi](https://github.com/nanmi)\n* YOLOX MNN/TNN/ONNXRuntime: [YOLOX-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_yolox.cpp)、[YOLOX-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_yolox.cpp) and [YOLOX-ONNXRuntime C++](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/yolox.cpp) from [DefTruth](https://github.com/DefTruth)\n* Converting darknet or yolov5 datasets to COCO format for YOLOX: [YOLO2COCO](https://github.com/RapidAI/YOLO2COCO) from [Daniel](https://github.com/znsoftm)\n\n## Cite YOLOX\nIf you use YOLOX in your research, please cite our work by using the following BibTeX entry:\n\n```latex\n @article{yolox2021,\n  title={YOLOX: Exceeding YOLO Series in 2021},\n  author={Ge, Zheng and Liu, Songtao and Wang, Feng and Li, Zeming and Sun, Jian},\n  journal={arXiv preprint arXiv:2107.08430},\n  year={2021}\n}\n```\n## In memory of Dr. Jian Sun\nWithout the guidance of [Dr. Jian Sun](https://scholar.google.com/citations?user=ALVSZAYAAAAJ), YOLOX would not have been released and open sourced to the community.\nThe passing away of Dr. Sun is a huge loss to the Computer Vision field. We add this section here to express our remembrance and condolences to our captain Dr. Sun.\nIt is hoped that every AI practitioner in the world will stick to the belief of \"continuous innovation to expand cognitive boundaries, and extraordinary technology to achieve product value\" and move forward all the way.\n\n<div align=\"center\"><img src=\"assets/sunjian.png\" width=\"200\"></div>\n没有孙剑博士的指导，YOLOX也不会问世并开源给社区使用。\n孙剑博士的离去是CV领域的一大损失，我们在此特别添加了这个部分来表达对我们的“船长”孙老师的纪念和哀思。\n希望世界上的每个AI从业者秉持着“持续创新拓展认知边界，非凡科技成就产品价值”的观念，一路向前。\n"
  },
  {
    "path": "SECURITY.md",
    "content": "# Security Policy\n\n## Reporting a Vulnerability\n\n### Types of Security Issues\nWe actively monitor:  \n- Code vulnerabilities (RCE, XSS, authentication bypass)  \n- Dependency risks (critical vulnerabilities in project dependencies, such as requirements.txt, pyproject.toml, or equivalent files)  \n- Configuration flaws (insecure defaults in deployment scripts)  \n\n### Disclosure Channels (Choose one):\n\n1. **Encrypted Email**  \n   Contact: `wangfeng19950315@163.com`  \n   *Subject format: `[SECURITY] ModuleName - Brief Description`*\n\n2. **GitHub Private Report**  \n   Use GitHub's [\"Report a vulnerability\"](https://github.com/Megvii-BaseDetection/YOLOX/security/advisories) feature  \n\n3. **Reporting Security Issues**  \n   Please report security issues using Create new issue: https://github.com/Megvii-BaseDetection/YOLOX/issues/new\n\n\n## Response Process  \n1. **Acknowledgement**  \n   - Initial response within **48 business hours**  \n2. **Assessment**  \n   - Triage using CVSS v3.1 scoring  \n3. **Remediation**  \n   - Critical (CVSS ≥9.0): Patch within **7 days**  \n   - High (CVSS 7-8.9): Patch within **30 days**  \n4. **Public Disclosure**  \n   - Published via [GitHub Advisories](https://github.com/Megvii-BaseDetection/YOLOX/security/advisories)  \n   - CVE assignment coordinated with [MITRE](https://cveform.mitre.org)\n  \n## Secure Development Practices  \n- Always verify hashes when downloading dependencies:  \n  ```bash\n  sha256sum -c <your-dependency-hash-file>\n  ```\n"
  },
  {
    "path": "demo/MegEngine/cpp/README.md",
    "content": "# YOLOX-CPP-MegEngine\n\nCpp file compile of YOLOX object detection base on [MegEngine](https://github.com/MegEngine/MegEngine).\n\n## Tutorial\n\n### Step1: install toolchain\n\n\t* host: sudo apt install gcc/g++ (gcc/g++, which version >= 6) build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl zlib1g-dev gcc-multilib g++-multilib cmake\n * cross build android: download [NDK](https://developer.android.com/ndk/downloads)\n   \t* after unzip download NDK, then export NDK_ROOT=\"path of NDK\"\n\n### Step2: build MegEngine\n\n```shell\ngit clone https://github.com/MegEngine/MegEngine.git\n\n# then init third_party\n \nexport megengine_root=\"path of MegEngine\"\ncd $megengine_root && ./third_party/prepare.sh && ./third_party/install-mkl.sh\n\n# build example:\n# build host without cuda:   \n./scripts/cmake-build/host_build.sh\n# or build host with cuda:\n./scripts/cmake-build/host_build.sh -c\n# or cross build for android aarch64: \n./scripts/cmake-build/cross_build_android_arm_inference.sh\n# or cross build for android aarch64(with V8.2+fp16): \n./scripts/cmake-build/cross_build_android_arm_inference.sh -f\n\n# after build MegEngine, you need export the `MGE_INSTALL_PATH`\n# host without cuda: \nexport MGE_INSTALL_PATH=${megengine_root}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_ON/Release/install\n# or host with cuda: \nexport MGE_INSTALL_PATH=${megengine_root}/build_dir/host/MGE_WITH_CUDA_ON/MGE_INFERENCE_ONLY_ON/Release/install\n# or cross build for android aarch64: \nexport MGE_INSTALL_PATH=${megengine_root}/build_dir/android/arm64-v8a/Release/install\n```\n* you can refs [build tutorial of MegEngine](https://github.com/MegEngine/MegEngine/blob/master/scripts/cmake-build/BUILD_README.md) to build other platform, eg, windows/macos/ etc!\n\n### Step3: build OpenCV\n\n```shell\ngit clone https://github.com/opencv/opencv.git\n\ngit checkout 3.4.15 (we test at 3.4.15, if test other version, may need modify some build)\n```\n\n- patch diff for android:\n\n```\n# ```\n#     diff --git a/CMakeLists.txt b/CMakeLists.txt\n#     index f6a2da5310..10354312c9 100644\n#     --- a/CMakeLists.txt\n#     +++ b/CMakeLists.txt\n#     @@ -643,7 +643,7 @@ if(UNIX)\n#        if(NOT APPLE)\n#          CHECK_INCLUDE_FILE(pthread.h HAVE_PTHREAD)\n#          if(ANDROID)\n#     -      set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m log)\n#     +      set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m log z)\n#          elseif(CMAKE_SYSTEM_NAME MATCHES \"FreeBSD|NetBSD|DragonFly|OpenBSD|Haiku\")\n#            set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} m pthread)\n#          elseif(EMSCRIPTEN)\n    \n# ```\n```\n\n- build for host\n\n```shell\ncd root_dir_of_opencv\nmkdir -p build/install\ncd build\ncmake -DBUILD_JAVA=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=$PWD/install \nmake install -j32\n```\n\n* build for android-aarch64\n\n```shell\ncd root_dir_of_opencv\nmkdir -p build_android/install\ncd build_android\n\ncmake -DCMAKE_TOOLCHAIN_FILE=\"$NDK_ROOT/build/cmake/android.toolchain.cmake\" -DANDROID_NDK=\"$NDK_ROOT\"  -DANDROID_ABI=arm64-v8a -DANDROID_NATIVE_API_LEVEL=21 -DBUILD_JAVA=OFF -DBUILD_ANDROID_PROJECTS=OFF -DBUILD_ANDROID_EXAMPLES=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=$PWD/install ..\n\nmake install -j32\n```\n\n* after build OpenCV, you need export  `OPENCV_INSTALL_INCLUDE_PATH ` and `OPENCV_INSTALL_LIB_PATH`\n\n```shell\n# host build: \nexport OPENCV_INSTALL_INCLUDE_PATH=${path of opencv}/build/install/include\nexport OPENCV_INSTALL_LIB_PATH=${path of opencv}/build/install/lib\n# or cross build for android aarch64:\nexport OPENCV_INSTALL_INCLUDE_PATH=${path of opencv}/build_android/install/sdk/native/jni/include\nexport OPENCV_INSTALL_LIB_PATH=${path of opencv}/build_android/install/sdk/native/libs/arm64-v8a\n```\n\n###  Step4: build test demo\n\n```shell\nrun build.sh\n\n# if host:\nexport CXX=g++\n./build.sh\n# or cross android aarch64\nexport CXX=aarch64-linux-android21-clang++\n./build.sh\n```\n\n### Step5: run demo\n\n> **Note**: two ways to get `yolox_s.mge` model file\n>\n> * reference to python demo's `dump.py` script.\n> * For users with code before 0.1.0 version, wget yolox-s weights [here](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.mge).\n> * For users with code after 0.1.0 version, use [python code in megengine](../python) to generate mge file.\n\n```shell\n# if host:\nLD_LIBRARY_PATH=$MGE_INSTALL_PATH/lib/:$OPENCV_INSTALL_LIB_PATH ./yolox yolox_s.mge ../../../assets/dog.jpg cuda/cpu/multithread <warmup_count> <thread_number>\n\n# or cross android\nadb push/scp $MGE_INSTALL_PATH/lib/libmegengine.so android_phone\nadb push/scp $OPENCV_INSTALL_LIB_PATH/*.so android_phone\nadb push/scp ./yolox yolox_s.mge android_phone\nadb push/scp ../../../assets/dog.jpg android_phone\n\n# login in android_phone by adb or ssh\n# then run: \nLD_LIBRARY_PATH=. ./yolox yolox_s.mge dog.jpg cpu/multithread <warmup_count> <thread_number> <use_fast_run> <use_weight_preprocess>  <run_with_fp16>\n\n# * <warmup_count> means warmup count, valid number >=0\n# * <thread_number> means thread number, valid number >=1, only take effect `multithread` device\n# * <use_fast_run> if >=1 , will use fastrun to choose best algo\n# * <use_weight_preprocess> if >=1, will handle weight preprocess before exe\n# * <run_with_fp16> if >=1, will run with fp16 mode\n```\n\n## Bechmark\n\n* model info: yolox-s @ input(1,3,640,640)\t\t\t\t\t\n\n* test devices\n\n```\n  * x86_64  -- Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz\t\t\t\t\t\n  * aarch64 -- xiamo phone mi9\t\t\t\t\t\n  * cuda    -- 1080TI @ cuda-10.1-cudnn-v7.6.3-TensorRT-6.0.1.5.sh @ Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz\n```\n\n  | megengine @ tag1.4(fastrun + weight\\_preprocess)/sec | 1 thread |\n  | ---------------------------------------------------- | -------- |\n  | x86\\_64                                              | 0.516245 |\n  | aarch64(fp32+chw44)                                  | 0.587857 |\n\n  | CUDA @ 1080TI/sec   | 1 batch    | 2 batch   | 4 batch   | 8 batch   | 16 batch  | 32 batch | 64 batch |\n  | ------------------- | ---------- | --------- | --------- | --------- | --------- | -------- | -------- |\n  | megengine(fp32+chw) | 0.00813703 | 0.0132893 | 0.0236633 | 0.0444699 | 0.0864917 | 0.16895  | 0.334248 |\n\n## Acknowledgement\n\n* [MegEngine](https://github.com/MegEngine/MegEngine)\n* [OpenCV](https://github.com/opencv/opencv)\n* [NDK](https://developer.android.com/ndk)\n* [CMAKE](https://cmake.org/)\n"
  },
  {
    "path": "demo/MegEngine/cpp/build.sh",
    "content": "#!/usr/bin/env bash\nset -e\n\nif [ -z $CXX ];then\n    echo \"please export you c++ toolchain to CXX\"\n    echo \"for example:\"\n    echo \"build for host:                                        export CXX=g++\"\n    echo \"cross build for aarch64-android(always locate in NDK): export CXX=aarch64-linux-android21-clang++\"\n    echo \"cross build for aarch64-linux:                         export CXX=aarch64-linux-gnu-g++\"\n    exit -1\nfi\n\nif [ -z $MGE_INSTALL_PATH ];then\n    echo \"please refsi ./README.md to init MGE_INSTALL_PATH env\"\n    exit -1\nfi\n\nif [ -z $OPENCV_INSTALL_INCLUDE_PATH ];then\n    echo \"please refs ./README.md to init OPENCV_INSTALL_INCLUDE_PATH env\"\n    exit -1\nfi\n\nif [ -z $OPENCV_INSTALL_LIB_PATH ];then\n    echo \"please refs ./README.md to init OPENCV_INSTALL_LIB_PATH env\"\n    exit -1\nfi\n\nINCLUDE_FLAG=\"-I$MGE_INSTALL_PATH/include -I$OPENCV_INSTALL_INCLUDE_PATH\"\nLINK_FLAG=\"-L$MGE_INSTALL_PATH/lib/ -lmegengine -L$OPENCV_INSTALL_LIB_PATH -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs\"\nBUILD_FLAG=\"-static-libstdc++ -O3 -pie -fPIE -g\"\n\nif [[ $CXX =~ \"android\" ]]; then\n    LINK_FLAG=\"${LINK_FLAG} -llog -lz\"\nfi\n\necho \"CXX: $CXX\"\necho \"MGE_INSTALL_PATH: $MGE_INSTALL_PATH\"\necho \"INCLUDE_FLAG: $INCLUDE_FLAG\"\necho \"LINK_FLAG: $LINK_FLAG\"\necho \"BUILD_FLAG: $BUILD_FLAG\"\n\necho \"[\" > compile_commands.json\necho \"{\" >> compile_commands.json\necho \"\\\"directory\\\": \\\"$PWD\\\",\" >> compile_commands.json\necho \"\\\"command\\\": \\\"$CXX yolox.cpp -o yolox ${INCLUDE_FLAG} ${LINK_FLAG}\\\",\" >> compile_commands.json\necho \"\\\"file\\\": \\\"$PWD/yolox.cpp\\\",\" >> compile_commands.json\necho \"},\" >> compile_commands.json\necho \"]\" >> compile_commands.json\n$CXX yolox.cpp -o yolox ${INCLUDE_FLAG} ${LINK_FLAG} ${BUILD_FLAG}\n\necho \"build success, output file: yolox\"\nif [[ $CXX =~ \"android\" ]]; then\n    echo \"try command to run:\"\n    echo \"adb push/scp $MGE_INSTALL_PATH/lib/libmegengine.so android_phone\"\n    echo \"adb push/scp $OPENCV_INSTALL_LIB_PATH/*.so android_phone\"\n    echo \"adb push/scp ./yolox yolox_s.mge android_phone\"\n    echo \"adb push/scp ../../../assets/dog.jpg android_phone\"\n    echo \"adb/ssh to android_phone, then run: LD_LIBRARY_PATH=. ./yolox yolox_s.mge dog.jpg cpu/multithread <warmup_count> <thread_number> <use_fast_run> <use_weight_preprocess>\"\nelse\n    echo \"try command to run: LD_LIBRARY_PATH=$MGE_INSTALL_PATH/lib/:$OPENCV_INSTALL_LIB_PATH ./yolox yolox_s.mge ../../../assets/dog.jpg cuda/cpu/multithread <warmup_count> <thread_number> <use_fast_run> <use_weight_preprocess>\"\nfi\n"
  },
  {
    "path": "demo/MegEngine/cpp/yolox.cpp",
    "content": "// Copyright (C) 2018-2021 Intel Corporation\n// SPDX-License-Identifier: Apache-2.0\n\n#include \"megbrain/gopt/inference.h\"\n#include \"megbrain/opr/search_policy/algo_chooser_helper.h\"\n#include \"megbrain/serialization/serializer.h\"\n#include <iostream>\n#include <iterator>\n#include <memory>\n#include <opencv2/opencv.hpp>\n#include <stdlib.h>\n#include <string>\n#include <vector>\n\n/**\n * @brief Define names based depends on Unicode path support\n */\n#define NMS_THRESH 0.45\n#define BBOX_CONF_THRESH 0.25\n\nconstexpr int INPUT_W = 640;\nconstexpr int INPUT_H = 640;\n\nusing namespace mgb;\n\ncv::Mat static_resize(cv::Mat &img) {\n  float r = std::min(INPUT_W / (img.cols * 1.0), INPUT_H / (img.rows * 1.0));\n  int unpad_w = r * img.cols;\n  int unpad_h = r * img.rows;\n  cv::Mat re(unpad_h, unpad_w, CV_8UC3);\n  cv::resize(img, re, re.size());\n  cv::Mat out(INPUT_W, INPUT_H, CV_8UC3, cv::Scalar(114, 114, 114));\n  re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));\n  return out;\n}\n\nvoid blobFromImage(cv::Mat &img, float *blob_data) {\n  int channels = 3;\n  int img_h = img.rows;\n  int img_w = img.cols;\n  for (size_t c = 0; c < channels; c++) {\n    for (size_t h = 0; h < img_h; h++) {\n      for (size_t w = 0; w < img_w; w++) {\n        blob_data[c * img_w * img_h + h * img_w + w] =\n            (float)img.at<cv::Vec3b>(h, w)[c];\n      }\n    }\n  }\n}\n\nstruct Object {\n  cv::Rect_<float> rect;\n  int label;\n  float prob;\n};\n\nstruct GridAndStride {\n  int grid0;\n  int grid1;\n  int stride;\n};\n\nstatic void\ngenerate_grids_and_stride(const int target_size, std::vector<int> &strides,\n                          std::vector<GridAndStride> &grid_strides) {\n  for (auto stride : strides) {\n    int num_grid = target_size / stride;\n    for (int g1 = 0; g1 < num_grid; g1++) {\n      for (int g0 = 0; g0 < num_grid; g0++) {\n        grid_strides.push_back((GridAndStride){g0, g1, stride});\n      }\n    }\n  }\n}\n\nstatic void generate_yolox_proposals(std::vector<GridAndStride> grid_strides,\n                                     const float *feat_ptr,\n                                     float prob_threshold,\n                                     std::vector<Object> &objects) {\n  const int num_class = 80;\n  const int num_anchors = grid_strides.size();\n\n  for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) {\n    const int grid0 = grid_strides[anchor_idx].grid0;\n    const int grid1 = grid_strides[anchor_idx].grid1;\n    const int stride = grid_strides[anchor_idx].stride;\n\n    const int basic_pos = anchor_idx * 85;\n\n    float x_center = (feat_ptr[basic_pos + 0] + grid0) * stride;\n    float y_center = (feat_ptr[basic_pos + 1] + grid1) * stride;\n    float w = exp(feat_ptr[basic_pos + 2]) * stride;\n    float h = exp(feat_ptr[basic_pos + 3]) * stride;\n    float x0 = x_center - w * 0.5f;\n    float y0 = y_center - h * 0.5f;\n\n    float box_objectness = feat_ptr[basic_pos + 4];\n    for (int class_idx = 0; class_idx < num_class; class_idx++) {\n      float box_cls_score = feat_ptr[basic_pos + 5 + class_idx];\n      float box_prob = box_objectness * box_cls_score;\n      if (box_prob > prob_threshold) {\n        Object obj;\n        obj.rect.x = x0;\n        obj.rect.y = y0;\n        obj.rect.width = w;\n        obj.rect.height = h;\n        obj.label = class_idx;\n        obj.prob = box_prob;\n\n        objects.push_back(obj);\n      }\n\n    } // class loop\n\n  } // point anchor loop\n}\n\nstatic inline float intersection_area(const Object &a, const Object &b) {\n  cv::Rect_<float> inter = a.rect & b.rect;\n  return inter.area();\n}\n\nstatic void qsort_descent_inplace(std::vector<Object> &faceobjects, int left,\n                                  int right) {\n  int i = left;\n  int j = right;\n  float p = faceobjects[(left + right) / 2].prob;\n\n  while (i <= j) {\n    while (faceobjects[i].prob > p)\n      i++;\n\n    while (faceobjects[j].prob < p)\n      j--;\n\n    if (i <= j) {\n      // swap\n      std::swap(faceobjects[i], faceobjects[j]);\n\n      i++;\n      j--;\n    }\n  }\n\n#pragma omp parallel sections\n  {\n#pragma omp section\n    {\n      if (left < j)\n        qsort_descent_inplace(faceobjects, left, j);\n    }\n#pragma omp section\n    {\n      if (i < right)\n        qsort_descent_inplace(faceobjects, i, right);\n    }\n  }\n}\n\nstatic void qsort_descent_inplace(std::vector<Object> &objects) {\n  if (objects.empty())\n    return;\n\n  qsort_descent_inplace(objects, 0, objects.size() - 1);\n}\n\nstatic void nms_sorted_bboxes(const std::vector<Object> &faceobjects,\n                              std::vector<int> &picked, float nms_threshold) {\n  picked.clear();\n\n  const int n = faceobjects.size();\n\n  std::vector<float> areas(n);\n  for (int i = 0; i < n; i++) {\n    areas[i] = faceobjects[i].rect.area();\n  }\n\n  for (int i = 0; i < n; i++) {\n    const Object &a = faceobjects[i];\n\n    int keep = 1;\n    for (int j = 0; j < (int)picked.size(); j++) {\n      const Object &b = faceobjects[picked[j]];\n\n      // intersection over union\n      float inter_area = intersection_area(a, b);\n      float union_area = areas[i] + areas[picked[j]] - inter_area;\n      // float IoU = inter_area / union_area\n      if (inter_area / union_area > nms_threshold)\n        keep = 0;\n    }\n\n    if (keep)\n      picked.push_back(i);\n  }\n}\n\nstatic void decode_outputs(const float *prob, std::vector<Object> &objects,\n                           float scale, const int img_w, const int img_h) {\n  std::vector<Object> proposals;\n  std::vector<int> strides = {8, 16, 32};\n  std::vector<GridAndStride> grid_strides;\n\n  generate_grids_and_stride(INPUT_W, strides, grid_strides);\n  generate_yolox_proposals(grid_strides, prob, BBOX_CONF_THRESH, proposals);\n  qsort_descent_inplace(proposals);\n\n  std::vector<int> picked;\n  nms_sorted_bboxes(proposals, picked, NMS_THRESH);\n  int count = picked.size();\n  objects.resize(count);\n\n  for (int i = 0; i < count; i++) {\n    objects[i] = proposals[picked[i]];\n\n    // adjust offset to original unpadded\n    float x0 = (objects[i].rect.x) / scale;\n    float y0 = (objects[i].rect.y) / scale;\n    float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;\n    float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;\n\n    // clip\n    x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);\n    y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);\n    x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);\n    y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);\n\n    objects[i].rect.x = x0;\n    objects[i].rect.y = y0;\n    objects[i].rect.width = x1 - x0;\n    objects[i].rect.height = y1 - y0;\n  }\n}\n\nconst float color_list[80][3] = {\n    {0.000, 0.447, 0.741}, {0.850, 0.325, 0.098}, {0.929, 0.694, 0.125},\n    {0.494, 0.184, 0.556}, {0.466, 0.674, 0.188}, {0.301, 0.745, 0.933},\n    {0.635, 0.078, 0.184}, {0.300, 0.300, 0.300}, {0.600, 0.600, 0.600},\n    {1.000, 0.000, 0.000}, {1.000, 0.500, 0.000}, {0.749, 0.749, 0.000},\n    {0.000, 1.000, 0.000}, {0.000, 0.000, 1.000}, {0.667, 0.000, 1.000},\n    {0.333, 0.333, 0.000}, {0.333, 0.667, 0.000}, {0.333, 1.000, 0.000},\n    {0.667, 0.333, 0.000}, {0.667, 0.667, 0.000}, {0.667, 1.000, 0.000},\n    {1.000, 0.333, 0.000}, {1.000, 0.667, 0.000}, {1.000, 1.000, 0.000},\n    {0.000, 0.333, 0.500}, {0.000, 0.667, 0.500}, {0.000, 1.000, 0.500},\n    {0.333, 0.000, 0.500}, {0.333, 0.333, 0.500}, {0.333, 0.667, 0.500},\n    {0.333, 1.000, 0.500}, {0.667, 0.000, 0.500}, {0.667, 0.333, 0.500},\n    {0.667, 0.667, 0.500}, {0.667, 1.000, 0.500}, {1.000, 0.000, 0.500},\n    {1.000, 0.333, 0.500}, {1.000, 0.667, 0.500}, {1.000, 1.000, 0.500},\n    {0.000, 0.333, 1.000}, {0.000, 0.667, 1.000}, {0.000, 1.000, 1.000},\n    {0.333, 0.000, 1.000}, {0.333, 0.333, 1.000}, {0.333, 0.667, 1.000},\n    {0.333, 1.000, 1.000}, {0.667, 0.000, 1.000}, {0.667, 0.333, 1.000},\n    {0.667, 0.667, 1.000}, {0.667, 1.000, 1.000}, {1.000, 0.000, 1.000},\n    {1.000, 0.333, 1.000}, {1.000, 0.667, 1.000}, {0.333, 0.000, 0.000},\n    {0.500, 0.000, 0.000}, {0.667, 0.000, 0.000}, {0.833, 0.000, 0.000},\n    {1.000, 0.000, 0.000}, {0.000, 0.167, 0.000}, {0.000, 0.333, 0.000},\n    {0.000, 0.500, 0.000}, {0.000, 0.667, 0.000}, {0.000, 0.833, 0.000},\n    {0.000, 1.000, 0.000}, {0.000, 0.000, 0.167}, {0.000, 0.000, 0.333},\n    {0.000, 0.000, 0.500}, {0.000, 0.000, 0.667}, {0.000, 0.000, 0.833},\n    {0.000, 0.000, 1.000}, {0.000, 0.000, 0.000}, {0.143, 0.143, 0.143},\n    {0.286, 0.286, 0.286}, {0.429, 0.429, 0.429}, {0.571, 0.571, 0.571},\n    {0.714, 0.714, 0.714}, {0.857, 0.857, 0.857}, {0.000, 0.447, 0.741},\n    {0.314, 0.717, 0.741}, {0.50, 0.5, 0}};\n\nstatic void draw_objects(const cv::Mat &bgr,\n                         const std::vector<Object> &objects) {\n  static const char *class_names[] = {\n      \"person\",        \"bicycle\",      \"car\",\n      \"motorcycle\",    \"airplane\",     \"bus\",\n      \"train\",         \"truck\",        \"boat\",\n      \"traffic light\", \"fire hydrant\", \"stop sign\",\n      \"parking meter\", \"bench\",        \"bird\",\n      \"cat\",           \"dog\",          \"horse\",\n      \"sheep\",         \"cow\",          \"elephant\",\n      \"bear\",          \"zebra\",        \"giraffe\",\n      \"backpack\",      \"umbrella\",     \"handbag\",\n      \"tie\",           \"suitcase\",     \"frisbee\",\n      \"skis\",          \"snowboard\",    \"sports ball\",\n      \"kite\",          \"baseball bat\", \"baseball glove\",\n      \"skateboard\",    \"surfboard\",    \"tennis racket\",\n      \"bottle\",        \"wine glass\",   \"cup\",\n      \"fork\",          \"knife\",        \"spoon\",\n      \"bowl\",          \"banana\",       \"apple\",\n      \"sandwich\",      \"orange\",       \"broccoli\",\n      \"carrot\",        \"hot dog\",      \"pizza\",\n      \"donut\",         \"cake\",         \"chair\",\n      \"couch\",         \"potted plant\", \"bed\",\n      \"dining table\",  \"toilet\",       \"tv\",\n      \"laptop\",        \"mouse\",        \"remote\",\n      \"keyboard\",      \"cell phone\",   \"microwave\",\n      \"oven\",          \"toaster\",      \"sink\",\n      \"refrigerator\",  \"book\",         \"clock\",\n      \"vase\",          \"scissors\",     \"teddy bear\",\n      \"hair drier\",    \"toothbrush\"};\n\n  cv::Mat image = bgr.clone();\n\n  for (size_t i = 0; i < objects.size(); i++) {\n    const Object &obj = objects[i];\n\n    fprintf(stderr, \"%d = %.5f at %.2f %.2f %.2f x %.2f\\n\", obj.label, obj.prob,\n            obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);\n\n    cv::Scalar color =\n        cv::Scalar(color_list[obj.label][0], color_list[obj.label][1],\n                   color_list[obj.label][2]);\n    float c_mean = cv::mean(color)[0];\n    cv::Scalar txt_color;\n    if (c_mean > 0.5) {\n      txt_color = cv::Scalar(0, 0, 0);\n    } else {\n      txt_color = cv::Scalar(255, 255, 255);\n    }\n\n    cv::rectangle(image, obj.rect, color * 255, 2);\n\n    char text[256];\n    sprintf(text, \"%s %.1f%%\", class_names[obj.label], obj.prob * 100);\n\n    int baseLine = 0;\n    cv::Size label_size =\n        cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);\n\n    cv::Scalar txt_bk_color = color * 0.7 * 255;\n\n    int x = obj.rect.x;\n    int y = obj.rect.y + 1;\n    // int y = obj.rect.y - label_size.height - baseLine;\n    if (y > image.rows)\n      y = image.rows;\n    // if (x + label_size.width > image.cols)\n    // x = image.cols - label_size.width;\n\n    cv::rectangle(\n        image,\n        cv::Rect(cv::Point(x, y),\n                 cv::Size(label_size.width, label_size.height + baseLine)),\n        txt_bk_color, -1);\n\n    cv::putText(image, text, cv::Point(x, y + label_size.height),\n                cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);\n  }\n\n  cv::imwrite(\"out.jpg\", image);\n  std::cout << \"save output to out.jpg\" << std::endl;\n}\n\ncg::ComputingGraph::OutputSpecItem make_callback_copy(SymbolVar dev,\n                                                      HostTensorND &host) {\n  auto cb = [&host](DeviceTensorND &d) { host.copy_from(d); };\n  return {dev, cb};\n}\n\nint main(int argc, char *argv[]) {\n  serialization::GraphLoader::LoadConfig load_config;\n  load_config.comp_graph = ComputingGraph::make();\n  auto &&graph_opt = load_config.comp_graph->options();\n  graph_opt.graph_opt_level = 0;\n\n  if (argc != 9) {\n    std::cout << \"Usage : \" << argv[0]\n              << \" <path_to_model> <path_to_image> <device> <warmup_count> \"\n                 \"<thread_number> <use_fast_run> <use_weight_preprocess> \"\n                 \"<run_with_fp16>\"\n              << std::endl;\n    return EXIT_FAILURE;\n  }\n\n  const std::string input_model{argv[1]};\n  const std::string input_image_path{argv[2]};\n  const std::string device{argv[3]};\n  const size_t warmup_count = atoi(argv[4]);\n  const size_t thread_number = atoi(argv[5]);\n  const size_t use_fast_run = atoi(argv[6]);\n  const size_t use_weight_preprocess = atoi(argv[7]);\n  const size_t run_with_fp16 = atoi(argv[8]);\n\n  if (device == \"cuda\") {\n    load_config.comp_node_mapper = [](CompNode::Locator &loc) {\n      loc.type = CompNode::DeviceType::CUDA;\n    };\n  } else if (device == \"cpu\") {\n    load_config.comp_node_mapper = [](CompNode::Locator &loc) {\n      loc.type = CompNode::DeviceType::CPU;\n    };\n  } else if (device == \"multithread\") {\n    load_config.comp_node_mapper = [thread_number](CompNode::Locator &loc) {\n      loc.type = CompNode::DeviceType::MULTITHREAD;\n      loc.device = 0;\n      loc.stream = thread_number;\n    };\n    std::cout << \"use \" << thread_number << \" thread\" << std::endl;\n  } else {\n    std::cout << \"device only support cuda or cpu or multithread\" << std::endl;\n    return EXIT_FAILURE;\n  }\n\n  if (use_weight_preprocess) {\n    std::cout << \"use weight preprocess\" << std::endl;\n    graph_opt.graph_opt.enable_weight_preprocess();\n  }\n  if (run_with_fp16) {\n    std::cout << \"run with fp16\" << std::endl;\n    graph_opt.graph_opt.enable_f16_io_comp();\n  }\n\n  if (device == \"cuda\") {\n    std::cout << \"choose format for cuda\" << std::endl;\n  } else {\n    std::cout << \"choose format for non-cuda\" << std::endl;\n#if defined(__arm__) || defined(__aarch64__)\n    if (run_with_fp16) {\n      std::cout << \"use chw format when enable fp16\" << std::endl;\n    } else {\n      std::cout << \"choose format for nchw44 for aarch64\" << std::endl;\n      graph_opt.graph_opt.enable_nchw44();\n    }\n#endif\n#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)\n    // graph_opt.graph_opt.enable_nchw88();\n#endif\n  }\n\n  std::unique_ptr<serialization::InputFile> inp_file =\n      serialization::InputFile::make_fs(input_model.c_str());\n  auto loader = serialization::GraphLoader::make(std::move(inp_file));\n  serialization::GraphLoader::LoadResult network =\n      loader->load(load_config, false);\n\n  if (use_fast_run) {\n    std::cout << \"use fastrun\" << std::endl;\n    using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;\n    S strategy = static_cast<S>(0);\n    strategy = S::PROFILE | S::OPTIMIZED | strategy;\n    mgb::gopt::modify_opr_algo_strategy_inplace(network.output_var_list,\n                                                strategy);\n  }\n\n  auto data = network.tensor_map[\"data\"];\n  cv::Mat image = cv::imread(input_image_path);\n  cv::Mat pr_img = static_resize(image);\n  float *data_ptr = data->resize({1, 3, 640, 640}).ptr<float>();\n  blobFromImage(pr_img, data_ptr);\n  HostTensorND predict;\n  std::unique_ptr<cg::AsyncExecutable> func = network.graph->compile(\n      {make_callback_copy(network.output_var_map.begin()->second, predict)});\n\n  for (auto i = 0; i < warmup_count; i++) {\n    std::cout << \"warmup: \" << i << std::endl;\n    func->execute();\n    func->wait();\n  }\n  auto start = std::chrono::system_clock::now();\n  func->execute();\n  func->wait();\n  auto end = std::chrono::system_clock::now();\n  std::chrono::duration<double> exec_seconds = end - start;\n  std::cout << \"elapsed time: \" << exec_seconds.count() << \"s\" << std::endl;\n\n  float *predict_ptr = predict.ptr<float>();\n  int img_w = image.cols;\n  int img_h = image.rows;\n  float scale =\n      std::min(INPUT_W / (image.cols * 1.0), INPUT_H / (image.rows * 1.0));\n  std::vector<Object> objects;\n\n  decode_outputs(predict_ptr, objects, scale, img_w, img_h);\n  draw_objects(image, objects);\n\n  return EXIT_SUCCESS;\n}\n"
  },
  {
    "path": "demo/MegEngine/python/README.md",
    "content": "# YOLOX-Python-MegEngine\n\nPython version of YOLOX object detection base on [MegEngine](https://github.com/MegEngine/MegEngine).\n\n## Tutorial\n\n### Step1: install requirements\n\n```\npython3 -m pip install megengine -f https://megengine.org.cn/whl/mge.html\n```\n\n### Step2: convert checkpoint weights from torch's path file\n\n```\npython3 convert_weights.py -w yolox_s.pth -o yolox_s_mge.pkl\n```\n\n### Step3: run demo\n\nThis part is the same as torch's python demo, but no need to specify device.\n\n```\npython3 demo.py image -n yolox-s -c yolox_s_mge.pkl --path ../../../assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result\n```\n\n###  [Optional]Step4: dump model for cpp inference\n\n> **Note**: result model is dumped with `optimize_for_inference` and `enable_fuse_conv_bias_nonlinearity`.\n\n```\npython3 dump.py -n yolox-s -c yolox_s_mge.pkl --dump_path yolox_s.mge\n```\n"
  },
  {
    "path": "demo/MegEngine/python/build.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n\nimport megengine as mge\nimport megengine.module as M\n\nfrom models.yolo_fpn import YOLOFPN\nfrom models.yolo_head import YOLOXHead\nfrom models.yolo_pafpn import YOLOPAFPN\nfrom models.yolox import YOLOX\n\n\ndef build_yolox(name=\"yolox-s\"):\n    num_classes = 80\n\n    # value meaning: depth, width\n    param_dict = {\n        \"yolox-nano\": (0.33, 0.25),\n        \"yolox-tiny\": (0.33, 0.375),\n        \"yolox-s\": (0.33, 0.50),\n        \"yolox-m\": (0.67, 0.75),\n        \"yolox-l\": (1.0, 1.0),\n        \"yolox-x\": (1.33, 1.25),\n    }\n    if name == \"yolov3\":\n        depth = 1.0\n        width = 1.0\n        backbone = YOLOFPN()\n        head = YOLOXHead(num_classes, width, in_channels=[128, 256, 512], act=\"lrelu\")\n        model = YOLOX(backbone, head)\n    else:\n        assert name in param_dict\n        kwargs = {}\n        depth, width = param_dict[name]\n        if name == \"yolox-nano\":\n            kwargs[\"depthwise\"] = True\n        in_channels = [256, 512, 1024]\n        backbone = YOLOPAFPN(depth, width, in_channels=in_channels, **kwargs)\n        head = YOLOXHead(num_classes, width, in_channels=in_channels, **kwargs)\n        model = YOLOX(backbone, head)\n\n    for m in model.modules():\n        if isinstance(m, M.BatchNorm2d):\n            m.eps = 1e-3\n\n    return model\n\n\ndef build_and_load(weight_file, name=\"yolox-s\"):\n    model = build_yolox(name)\n    model_weights = mge.load(weight_file)\n    model.load_state_dict(model_weights, strict=False)\n    return model\n"
  },
  {
    "path": "demo/MegEngine/python/convert_weights.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\nimport argparse\nfrom collections import OrderedDict\n\nimport megengine as mge\nimport torch\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"-w\", \"--weights\", type=str, help=\"path of weight file\")\n    parser.add_argument(\n        \"-o\",\n        \"--output\",\n        default=\"weight_mge.pkl\",\n        type=str,\n        help=\"path of weight file\",\n    )\n    return parser\n\n\ndef numpy_weights(weight_file):\n    torch_weights = torch.load(weight_file, map_location=\"cpu\")\n    if \"model\" in torch_weights:\n        torch_weights = torch_weights[\"model\"]\n    new_dict = OrderedDict()\n    for k, v in torch_weights.items():\n        new_dict[k] = v.cpu().numpy()\n    return new_dict\n\n\ndef map_weights(weight_file, output_file):\n    torch_weights = numpy_weights(weight_file)\n\n    new_dict = OrderedDict()\n    for k, v in torch_weights.items():\n        if \"num_batches_tracked\" in k:\n            print(\"drop: {}\".format(k))\n            continue\n        if k.endswith(\"bias\"):\n            print(\"bias key: {}\".format(k))\n            v = v.reshape(1, -1, 1, 1)\n            new_dict[k] = v\n        elif \"dconv\" in k and \"conv.weight\" in k:\n            print(\"depthwise conv key: {}\".format(k))\n            cout, cin, k1, k2 = v.shape\n            v = v.reshape(cout, 1, cin, k1, k2)\n            new_dict[k] = v\n        else:\n            new_dict[k] = v\n\n    mge.save(new_dict, output_file)\n    print(\"save weights to {}\".format(output_file))\n\n\ndef main():\n    parser = make_parser()\n    args = parser.parse_args()\n    map_weights(args.weights, args.output)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "demo/MegEngine/python/demo.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport os\nimport time\n\nimport cv2\nimport megengine as mge\nimport megengine.functional as F\nfrom loguru import logger\n\nfrom yolox.data.datasets import COCO_CLASSES\nfrom yolox.utils import vis\nfrom yolox.data.data_augment import preproc as preprocess\n\nfrom build import build_and_load\n\nIMAGE_EXT = [\".jpg\", \".jpeg\", \".webp\", \".bmp\", \".png\"]\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"YOLOX Demo!\")\n    parser.add_argument(\n        \"demo\", default=\"image\", help=\"demo type, eg. image, video and webcam\"\n    )\n    parser.add_argument(\"-n\", \"--name\", type=str, default=\"yolox-s\", help=\"model name\")\n    parser.add_argument(\"--path\", default=\"./test.png\", help=\"path to images or video\")\n    parser.add_argument(\"--camid\", type=int, default=0, help=\"webcam demo camera id\")\n    parser.add_argument(\n        \"--save_result\",\n        action=\"store_true\",\n        help=\"whether to save the inference result of image/video\",\n    )\n\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"ckpt for eval\")\n    parser.add_argument(\"--conf\", default=None, type=float, help=\"test conf\")\n    parser.add_argument(\"--nms\", default=None, type=float, help=\"test nms threshold\")\n    parser.add_argument(\"--tsize\", default=None, type=int, help=\"test img size\")\n    return parser\n\n\ndef get_image_list(path):\n    image_names = []\n    for maindir, subdir, file_name_list in os.walk(path):\n        for filename in file_name_list:\n            apath = os.path.join(maindir, filename)\n            ext = os.path.splitext(apath)[1]\n            if ext in IMAGE_EXT:\n                image_names.append(apath)\n    return image_names\n\n\ndef postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):\n    box_corner = F.zeros_like(prediction)\n    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2\n    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2\n    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2\n    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2\n    prediction[:, :, :4] = box_corner[:, :, :4]\n\n    output = [None for _ in range(len(prediction))]\n    for i, image_pred in enumerate(prediction):\n\n        # If none are remaining => process next image\n        if not image_pred.shape[0]:\n            continue\n        # Get score and class with highest confidence\n        class_conf = F.max(image_pred[:, 5: 5 + num_classes], 1, keepdims=True)\n        class_pred = F.argmax(image_pred[:, 5: 5 + num_classes], 1, keepdims=True)\n\n        class_conf_squeeze = F.squeeze(class_conf)\n        conf_mask = image_pred[:, 4] * class_conf_squeeze >= conf_thre\n        detections = F.concat((image_pred[:, :5], class_conf, class_pred), 1)\n        detections = detections[conf_mask]\n        if not detections.shape[0]:\n            continue\n\n        nms_out_index = F.vision.nms(\n            detections[:, :4], detections[:, 4] * detections[:, 5], nms_thre,\n        )\n        detections = detections[nms_out_index]\n        if output[i] is None:\n            output[i] = detections\n        else:\n            output[i] = F.concat((output[i], detections))\n\n    return output\n\n\nclass Predictor(object):\n    def __init__(\n        self,\n        model,\n        confthre=0.01,\n        nmsthre=0.65,\n        test_size=(640, 640),\n        cls_names=COCO_CLASSES,\n        trt_file=None,\n        decoder=None,\n    ):\n        self.model = model\n        self.cls_names = cls_names\n        self.decoder = decoder\n        self.num_classes = 80\n        self.confthre = confthre\n        self.nmsthre = nmsthre\n        self.test_size = test_size\n\n    def inference(self, img):\n        img_info = {\"id\": 0}\n        if isinstance(img, str):\n            img_info[\"file_name\"] = os.path.basename(img)\n            img = cv2.imread(img)\n            if img is None:\n                raise ValueError(\"test image path is invalid!\")\n        else:\n            img_info[\"file_name\"] = None\n\n        height, width = img.shape[:2]\n        img_info[\"height\"] = height\n        img_info[\"width\"] = width\n        img_info[\"raw_img\"] = img\n\n        img, ratio = preprocess(img, self.test_size)\n        img_info[\"ratio\"] = ratio\n        img = F.expand_dims(mge.tensor(img), 0)\n\n        t0 = time.time()\n        outputs = self.model(img)\n        outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)\n        logger.info(\"Infer time: {:.4f}s\".format(time.time() - t0))\n        return outputs, img_info\n\n    def visual(self, output, img_info, cls_conf=0.35):\n        ratio = img_info[\"ratio\"]\n        img = img_info[\"raw_img\"]\n        if output is None:\n            return img\n        output = output.numpy()\n\n        # preprocessing: resize\n        bboxes = output[:, 0:4] / ratio\n\n        cls = output[:, 6]\n        scores = output[:, 4] * output[:, 5]\n\n        vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)\n        return vis_res\n\n\ndef image_demo(predictor, vis_folder, path, current_time, save_result):\n    if os.path.isdir(path):\n        files = get_image_list(path)\n    else:\n        files = [path]\n    files.sort()\n    for image_name in files:\n        outputs, img_info = predictor.inference(image_name)\n        result_image = predictor.visual(outputs[0], img_info)\n        if save_result:\n            save_folder = os.path.join(\n                vis_folder, time.strftime(\"%Y_%m_%d_%H_%M_%S\", current_time)\n            )\n            os.makedirs(save_folder, exist_ok=True)\n            save_file_name = os.path.join(save_folder, os.path.basename(image_name))\n            logger.info(\"Saving detection result in {}\".format(save_file_name))\n            cv2.imwrite(save_file_name, result_image)\n        ch = cv2.waitKey(0)\n        if ch == 27 or ch == ord(\"q\") or ch == ord(\"Q\"):\n            break\n\n\ndef imageflow_demo(predictor, vis_folder, current_time, args):\n    cap = cv2.VideoCapture(args.path if args.demo == \"video\" else args.camid)\n    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float\n    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float\n    fps = cap.get(cv2.CAP_PROP_FPS)\n    save_folder = os.path.join(\n        vis_folder, time.strftime(\"%Y_%m_%d_%H_%M_%S\", current_time)\n    )\n    os.makedirs(save_folder, exist_ok=True)\n    if args.demo == \"video\":\n        save_path = os.path.join(save_folder, os.path.basename(args.path))\n    else:\n        save_path = os.path.join(save_folder, \"camera.mp4\")\n    logger.info(f\"video save_path is {save_path}\")\n    vid_writer = cv2.VideoWriter(\n        save_path, cv2.VideoWriter_fourcc(*\"mp4v\"), fps, (int(width), int(height))\n    )\n    while True:\n        ret_val, frame = cap.read()\n        if ret_val:\n            outputs, img_info = predictor.inference(frame)\n            result_frame = predictor.visual(outputs[0], img_info)\n            if args.save_result:\n                vid_writer.write(result_frame)\n            ch = cv2.waitKey(1)\n            if ch == 27 or ch == ord(\"q\") or ch == ord(\"Q\"):\n                break\n        else:\n            break\n\n\ndef main(args):\n    file_name = os.path.join(\"./yolox_outputs\", args.name)\n    os.makedirs(file_name, exist_ok=True)\n\n    if args.save_result:\n        vis_folder = os.path.join(file_name, \"vis_res\")\n        os.makedirs(vis_folder, exist_ok=True)\n\n    confthre = 0.01\n    nmsthre = 0.65\n    test_size = (640, 640)\n    if args.conf is not None:\n        confthre = args.conf\n    if args.nms is not None:\n        nmsthre = args.nms\n    if args.tsize is not None:\n        test_size = (args.tsize, args.tsize)\n\n    model = build_and_load(args.ckpt, name=args.name)\n    model.eval()\n\n    predictor = Predictor(model, confthre, nmsthre, test_size, COCO_CLASSES, None, None)\n    current_time = time.localtime()\n    if args.demo == \"image\":\n        image_demo(predictor, vis_folder, args.path, current_time, args.save_result)\n    elif args.demo == \"video\" or args.demo == \"webcam\":\n        imageflow_demo(predictor, vis_folder, current_time, args)\n\n\nif __name__ == \"__main__\":\n    args = make_parser().parse_args()\n    main(args)\n"
  },
  {
    "path": "demo/MegEngine/python/dump.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\n\nimport megengine as mge\nimport numpy as np\nfrom megengine import jit\n\nfrom build import build_and_load\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"YOLOX Demo Dump\")\n    parser.add_argument(\"-n\", \"--name\", type=str, default=\"yolox-s\", help=\"model name\")\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"ckpt for eval\")\n    parser.add_argument(\n        \"--dump_path\", default=\"model.mge\", help=\"path to save the dumped model\"\n    )\n    return parser\n\n\ndef dump_static_graph(model, graph_name=\"model.mge\"):\n    model.eval()\n    model.head.decode_in_inference = False\n\n    data = mge.Tensor(np.random.random((1, 3, 640, 640)))\n\n    @jit.trace(capture_as_const=True)\n    def pred_func(data):\n        outputs = model(data)\n        return outputs\n\n    pred_func(data)\n    pred_func.dump(\n        graph_name,\n        arg_names=[\"data\"],\n        optimize_for_inference=True,\n        enable_fuse_conv_bias_nonlinearity=True,\n    )\n\n\ndef main(args):\n    model = build_and_load(args.ckpt, name=args.name)\n    dump_static_graph(model, args.dump_path)\n\n\nif __name__ == \"__main__\":\n    args = make_parser().parse_args()\n    main(args)\n"
  },
  {
    "path": "demo/MegEngine/python/models/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nfrom .darknet import CSPDarknet, Darknet\nfrom .yolo_fpn import YOLOFPN\nfrom .yolo_head import YOLOXHead\nfrom .yolo_pafpn import YOLOPAFPN\nfrom .yolox import YOLOX\n"
  },
  {
    "path": "demo/MegEngine/python/models/darknet.py",
    "content": "#!/usr/bin/env python3\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport megengine.module as M\n\nfrom .network_blocks import BaseConv, CSPLayer, DWConv, Focus, ResLayer, SPPBottleneck\n\n\nclass Darknet(M.Module):\n    # number of blocks from dark2 to dark5.\n    depth2blocks = {21: [1, 2, 2, 1], 53: [2, 8, 8, 4]}\n\n    def __init__(\n        self, depth, in_channels=3, stem_out_channels=32, out_features=(\"dark3\", \"dark4\", \"dark5\"),\n    ):\n        \"\"\"\n        Args:\n            depth (int): depth of darknet used in model, usually use [21, 53] for this param.\n            in_channels (int): number of input channels, for example, use 3 for RGB image.\n            stem_out_channels (int): number of output channels of darknet stem.\n                It decides channels of darknet layer2 to layer5.\n            out_features (Tuple[str]): desired output layer name.\n        \"\"\"\n        super().__init__()\n        assert out_features, \"please provide output features of Darknet\"\n        self.out_features = out_features\n        self.stem = M.Sequential(\n            BaseConv(in_channels, stem_out_channels, ksize=3, stride=1, act=\"lrelu\"),\n            *self.make_group_layer(stem_out_channels, num_blocks=1, stride=2),\n        )\n        in_channels = stem_out_channels * 2  # 64\n\n        num_blocks = Darknet.depth2blocks[depth]\n        # create darknet with `stem_out_channels` and `num_blocks` layers.\n        # to make model structure more clear, we don't use `for` statement in python.\n        self.dark2 = M.Sequential(*self.make_group_layer(in_channels, num_blocks[0], stride=2))\n        in_channels *= 2  # 128\n        self.dark3 = M.Sequential(*self.make_group_layer(in_channels, num_blocks[1], stride=2))\n        in_channels *= 2  # 256\n        self.dark4 = M.Sequential(*self.make_group_layer(in_channels, num_blocks[2], stride=2))\n        in_channels *= 2  # 512\n\n        self.dark5 = M.Sequential(\n            *self.make_group_layer(in_channels, num_blocks[3], stride=2),\n            *self.make_spp_block([in_channels, in_channels * 2], in_channels * 2),\n        )\n\n    def make_group_layer(self, in_channels: int, num_blocks: int, stride: int = 1):\n        \"starts with conv layer then has `num_blocks` `ResLayer`\"\n        return [\n            BaseConv(in_channels, in_channels * 2, ksize=3, stride=stride, act=\"lrelu\"),\n            *[(ResLayer(in_channels * 2)) for _ in range(num_blocks)]\n        ]\n\n    def make_spp_block(self, filters_list, in_filters):\n        m = M.Sequential(\n            *[\n                BaseConv(in_filters, filters_list[0], 1, stride=1, act=\"lrelu\"),\n                BaseConv(filters_list[0], filters_list[1], 3, stride=1, act=\"lrelu\"),\n                SPPBottleneck(\n                    in_channels=filters_list[1],\n                    out_channels=filters_list[0],\n                    activation=\"lrelu\"\n                ),\n                BaseConv(filters_list[0], filters_list[1], 3, stride=1, act=\"lrelu\"),\n                BaseConv(filters_list[1], filters_list[0], 1, stride=1, act=\"lrelu\"),\n            ]\n        )\n        return m\n\n    def forward(self, x):\n        outputs = {}\n        x = self.stem(x)\n        outputs[\"stem\"] = x\n        x = self.dark2(x)\n        outputs[\"dark2\"] = x\n        x = self.dark3(x)\n        outputs[\"dark3\"] = x\n        x = self.dark4(x)\n        outputs[\"dark4\"] = x\n        x = self.dark5(x)\n        outputs[\"dark5\"] = x\n        return {k: v for k, v in outputs.items() if k in self.out_features}\n\n\nclass CSPDarknet(M.Module):\n\n    def __init__(\n        self, dep_mul, wid_mul,\n        out_features=(\"dark3\", \"dark4\", \"dark5\"),\n        depthwise=False, act=\"silu\",\n    ):\n        super().__init__()\n        assert out_features, \"please provide output features of Darknet\"\n        self.out_features = out_features\n        Conv = DWConv if depthwise else BaseConv\n\n        base_channels = int(wid_mul * 64)  # 64\n        base_depth = max(round(dep_mul * 3), 1)  # 3\n\n        # stem\n        self.stem = Focus(3, base_channels, ksize=3, act=act)\n\n        # dark2\n        self.dark2 = M.Sequential(\n            Conv(base_channels, base_channels * 2, 3, 2, act=act),\n            CSPLayer(\n                base_channels * 2, base_channels * 2,\n                n=base_depth, depthwise=depthwise, act=act\n            ),\n        )\n\n        # dark3\n        self.dark3 = M.Sequential(\n            Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),\n            CSPLayer(\n                base_channels * 4, base_channels * 4,\n                n=base_depth * 3, depthwise=depthwise, act=act,\n            ),\n        )\n\n        # dark4\n        self.dark4 = M.Sequential(\n            Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),\n            CSPLayer(\n                base_channels * 8, base_channels * 8,\n                n=base_depth * 3, depthwise=depthwise, act=act,\n            ),\n        )\n\n        # dark5\n        self.dark5 = M.Sequential(\n            Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),\n            SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),\n            CSPLayer(\n                base_channels * 16, base_channels * 16, n=base_depth,\n                shortcut=False, depthwise=depthwise, act=act,\n            ),\n        )\n\n    def forward(self, x):\n        outputs = {}\n        x = self.stem(x)\n        outputs[\"stem\"] = x\n        x = self.dark2(x)\n        outputs[\"dark2\"] = x\n        x = self.dark3(x)\n        outputs[\"dark3\"] = x\n        x = self.dark4(x)\n        outputs[\"dark4\"] = x\n        x = self.dark5(x)\n        outputs[\"dark5\"] = x\n        return {k: v for k, v in outputs.items() if k in self.out_features}\n"
  },
  {
    "path": "demo/MegEngine/python/models/network_blocks.py",
    "content": "#!/usr/bin/env python3\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport megengine.functional as F\nimport megengine.module as M\n\n\nclass UpSample(M.Module):\n\n    def __init__(self, scale_factor=2, mode=\"bilinear\"):\n        super().__init__()\n        self.scale_factor = scale_factor\n        self.mode = mode\n\n    def forward(self, x):\n        return F.vision.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)\n\n\nclass SiLU(M.Module):\n    \"\"\"export-friendly version of M.SiLU()\"\"\"\n\n    @staticmethod\n    def forward(x):\n        return x * F.sigmoid(x)\n\n\ndef get_activation(name=\"silu\"):\n    if name == \"silu\":\n        module = SiLU()\n    elif name == \"relu\":\n        module = M.ReLU()\n    elif name == \"lrelu\":\n        module = M.LeakyReLU(0.1)\n    else:\n        raise AttributeError(\"Unsupported act type: {}\".format(name))\n    return module\n\n\nclass BaseConv(M.Module):\n    \"\"\"A Conv2d -> Batchnorm -> silu/leaky relu block\"\"\"\n\n    def __init__(self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act=\"silu\"):\n        super().__init__()\n        # same padding\n        pad = (ksize - 1) // 2\n        self.conv = M.Conv2d(\n            in_channels,\n            out_channels,\n            kernel_size=ksize,\n            stride=stride,\n            padding=pad,\n            groups=groups,\n            bias=bias,\n        )\n        self.bn = M.BatchNorm2d(out_channels)\n        self.act = get_activation(act)\n\n    def forward(self, x):\n        return self.act(self.bn(self.conv(x)))\n\n    def fuseforward(self, x):\n        return self.act(self.conv(x))\n\n\nclass DWConv(M.Module):\n    \"\"\"Depthwise Conv + Conv\"\"\"\n    def __init__(self, in_channels, out_channels, ksize, stride=1, act=\"silu\"):\n        super().__init__()\n        self.dconv = BaseConv(\n            in_channels, in_channels, ksize=ksize,\n            stride=stride, groups=in_channels, act=act\n        )\n        self.pconv = BaseConv(\n            in_channels, out_channels, ksize=1,\n            stride=1, groups=1, act=act\n        )\n\n    def forward(self, x):\n        x = self.dconv(x)\n        return self.pconv(x)\n\n\nclass Bottleneck(M.Module):\n    # Standard bottleneck\n    def __init__(\n        self, in_channels, out_channels, shortcut=True,\n        expansion=0.5, depthwise=False, act=\"silu\"\n    ):\n        super().__init__()\n        hidden_channels = int(out_channels * expansion)\n        Conv = DWConv if depthwise else BaseConv\n        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)\n        self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act)\n        self.use_add = shortcut and in_channels == out_channels\n\n    def forward(self, x):\n        y = self.conv2(self.conv1(x))\n        if self.use_add:\n            y = y + x\n        return y\n\n\nclass ResLayer(M.Module):\n    \"Residual layer with `in_channels` inputs.\"\n    def __init__(self, in_channels: int):\n        super().__init__()\n        mid_channels = in_channels // 2\n        self.layer1 = BaseConv(in_channels, mid_channels, ksize=1, stride=1, act=\"lrelu\")\n        self.layer2 = BaseConv(mid_channels, in_channels, ksize=3, stride=1, act=\"lrelu\")\n\n    def forward(self, x):\n        out = self.layer2(self.layer1(x))\n        return x + out\n\n\nclass SPPBottleneck(M.Module):\n    \"\"\"Spatial pyramid pooling layer used in YOLOv3-SPP\"\"\"\n    def __init__(self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation=\"silu\"):\n        super().__init__()\n        hidden_channels = in_channels // 2\n        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation)\n        self.m = [M.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes]\n        conv2_channels = hidden_channels * (len(kernel_sizes) + 1)\n        self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = F.concat([x] + [m(x) for m in self.m], axis=1)\n        x = self.conv2(x)\n        return x\n\n\nclass CSPLayer(M.Module):\n    \"\"\"C3 in yolov5, CSP Bottleneck with 3 convolutions\"\"\"\n\n    def __init__(\n        self, in_channels, out_channels, n=1,\n        shortcut=True, expansion=0.5, depthwise=False, act=\"silu\"\n    ):\n        \"\"\"\n        Args:\n            in_channels (int): input channels.\n            out_channels (int): output channels.\n            n (int): number of Bottlenecks. Default value: 1.\n        \"\"\"\n        # ch_in, ch_out, number, shortcut, groups, expansion\n        super().__init__()\n        hidden_channels = int(out_channels * expansion)  # hidden channels\n        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)\n        self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)\n        self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act)\n        module_list = [\n            Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act)\n            for _ in range(n)\n        ]\n        self.m = M.Sequential(*module_list)\n\n    def forward(self, x):\n        x_1 = self.conv1(x)\n        x_2 = self.conv2(x)\n        x_1 = self.m(x_1)\n        x = F.concat((x_1, x_2), axis=1)\n        return self.conv3(x)\n\n\nclass Focus(M.Module):\n    \"\"\"Focus width and height information into channel space.\"\"\"\n\n    def __init__(self, in_channels, out_channels, ksize=1, stride=1, act=\"silu\"):\n        super().__init__()\n        self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act)\n\n    def forward(self, x):\n        # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)\n        patch_top_left = x[..., ::2, ::2]\n        patch_top_right = x[..., ::2, 1::2]\n        patch_bot_left = x[..., 1::2, ::2]\n        patch_bot_right = x[..., 1::2, 1::2]\n        x = F.concat(\n            (patch_top_left, patch_bot_left, patch_top_right, patch_bot_right,), axis=1,\n        )\n        return self.conv(x)\n"
  },
  {
    "path": "demo/MegEngine/python/models/yolo_fpn.py",
    "content": "#!/usr/bin/env python3\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport megengine.functional as F\nimport megengine.module as M\n\nfrom .darknet import Darknet\nfrom .network_blocks import BaseConv, UpSample\n\n\nclass YOLOFPN(M.Module):\n    \"\"\"\n    YOLOFPN module. Darknet 53 is the default backbone of this model.\n    \"\"\"\n\n    def __init__(\n        self, depth=53, in_features=[\"dark3\", \"dark4\", \"dark5\"],\n    ):\n        super().__init__()\n\n        self.backbone = Darknet(depth)\n        self.in_features = in_features\n\n        # out 1\n        self.out1_cbl = self._make_cbl(512, 256, 1)\n        self.out1 = self._make_embedding([256, 512], 512 + 256)\n\n        # out 2\n        self.out2_cbl = self._make_cbl(256, 128, 1)\n        self.out2 = self._make_embedding([128, 256], 256 + 128)\n\n        # upsample\n        self.upsample = UpSample(scale_factor=2, mode=\"bilinear\")\n\n    def _make_cbl(self, _in, _out, ks):\n        return BaseConv(_in, _out, ks, stride=1, act=\"lrelu\")\n\n    def _make_embedding(self, filters_list, in_filters):\n        m = M.Sequential(\n            *[\n                self._make_cbl(in_filters, filters_list[0], 1),\n                self._make_cbl(filters_list[0], filters_list[1], 3),\n\n                self._make_cbl(filters_list[1], filters_list[0], 1),\n\n                self._make_cbl(filters_list[0], filters_list[1], 3),\n                self._make_cbl(filters_list[1], filters_list[0], 1),\n            ]\n        )\n        return m\n\n    def forward(self, inputs):\n        \"\"\"\n        Args:\n            inputs (Tensor): input image.\n\n        Returns:\n            Tuple[Tensor]: FPN output features..\n        \"\"\"\n        #  backbone\n        out_features = self.backbone(inputs)\n        x2, x1, x0 = [out_features[f] for f in self.in_features]\n\n        #  yolo branch 1\n        x1_in = self.out1_cbl(x0)\n        x1_in = self.upsample(x1_in)\n        x1_in = F.concat([x1_in, x1], 1)\n        out_dark4 = self.out1(x1_in)\n\n        #  yolo branch 2\n        x2_in = self.out2_cbl(out_dark4)\n        x2_in = self.upsample(x2_in)\n        x2_in = F.concat([x2_in, x2], 1)\n        out_dark3 = self.out2(x2_in)\n\n        outputs = (out_dark3, out_dark4, x0)\n        return outputs\n"
  },
  {
    "path": "demo/MegEngine/python/models/yolo_head.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport megengine.functional as F\nimport megengine.module as M\n\nfrom .network_blocks import BaseConv, DWConv\n\n\ndef meshgrid(x, y):\n    \"\"\"meshgrid wrapper for megengine\"\"\"\n    assert len(x.shape) == 1\n    assert len(y.shape) == 1\n    mesh_shape = (y.shape[0], x.shape[0])\n    mesh_x = F.broadcast_to(x, mesh_shape)\n    mesh_y = F.broadcast_to(y.reshape(-1, 1), mesh_shape)\n    return mesh_x, mesh_y\n\n\nclass YOLOXHead(M.Module):\n    def __init__(\n        self, num_classes, width=1.0, strides=[8, 16, 32],\n        in_channels=[256, 512, 1024], act=\"silu\", depthwise=False\n    ):\n        \"\"\"\n        Args:\n            act (str): activation type of conv. Defalut value: \"silu\".\n            depthwise (bool): whether apply depthwise conv in conv branch. Defalut value: False.\n        \"\"\"\n        super().__init__()\n\n        self.n_anchors = 1\n        self.num_classes = num_classes\n        self.decode_in_inference = True  # save for matching\n\n        self.cls_convs = []\n        self.reg_convs = []\n        self.cls_preds = []\n        self.reg_preds = []\n        self.obj_preds = []\n        self.stems = []\n        Conv = DWConv if depthwise else BaseConv\n\n        for i in range(len(in_channels)):\n            self.stems.append(\n                BaseConv(\n                    in_channels=int(in_channels[i] * width),\n                    out_channels=int(256 * width),\n                    ksize=1,\n                    stride=1,\n                    act=act,\n                )\n            )\n            self.cls_convs.append(\n                M.Sequential(\n                    *[\n                        Conv(\n                            in_channels=int(256 * width),\n                            out_channels=int(256 * width),\n                            ksize=3,\n                            stride=1,\n                            act=act,\n                        ),\n                        Conv(\n                            in_channels=int(256 * width),\n                            out_channels=int(256 * width),\n                            ksize=3,\n                            stride=1,\n                            act=act,\n                        ),\n                    ]\n                )\n            )\n            self.reg_convs.append(\n                M.Sequential(\n                    *[\n                        Conv(\n                            in_channels=int(256 * width),\n                            out_channels=int(256 * width),\n                            ksize=3,\n                            stride=1,\n                            act=act,\n                        ),\n                        Conv(\n                            in_channels=int(256 * width),\n                            out_channels=int(256 * width),\n                            ksize=3,\n                            stride=1,\n                            act=act,\n                        ),\n                    ]\n                )\n            )\n            self.cls_preds.append(\n                M.Conv2d(\n                    in_channels=int(256 * width),\n                    out_channels=self.n_anchors * self.num_classes,\n                    kernel_size=1,\n                    stride=1,\n                    padding=0,\n                )\n            )\n            self.reg_preds.append(\n                M.Conv2d(\n                    in_channels=int(256 * width),\n                    out_channels=4,\n                    kernel_size=1,\n                    stride=1,\n                    padding=0,\n                )\n            )\n            self.obj_preds.append(\n                M.Conv2d(\n                    in_channels=int(256 * width),\n                    out_channels=self.n_anchors * 1,\n                    kernel_size=1,\n                    stride=1,\n                    padding=0,\n                )\n            )\n\n        self.use_l1 = False\n        self.strides = strides\n        self.grids = [F.zeros(1)] * len(in_channels)\n\n    def forward(self, xin, labels=None, imgs=None):\n        outputs = []\n        assert not self.training\n\n        for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate(\n            zip(self.cls_convs, self.reg_convs, self.strides, xin)\n        ):\n            x = self.stems[k](x)\n            cls_x = x\n            reg_x = x\n\n            cls_feat = cls_conv(cls_x)\n            cls_output = self.cls_preds[k](cls_feat)\n\n            reg_feat = reg_conv(reg_x)\n            reg_output = self.reg_preds[k](reg_feat)\n            obj_output = self.obj_preds[k](reg_feat)\n            output = F.concat([reg_output, F.sigmoid(obj_output), F.sigmoid(cls_output)], 1)\n            outputs.append(output)\n\n        self.hw = [x.shape[-2:] for x in outputs]\n        # [batch, n_anchors_all, 85]\n        outputs = F.concat([F.flatten(x, start_axis=2) for x in outputs], axis=2)\n        outputs = F.transpose(outputs, (0, 2, 1))\n        if self.decode_in_inference:\n            return self.decode_outputs(outputs)\n        else:\n            return outputs\n\n    def get_output_and_grid(self, output, k, stride, dtype):\n        grid = self.grids[k]\n\n        batch_size = output.shape[0]\n        n_ch = 5 + self.num_classes\n        hsize, wsize = output.shape[-2:]\n        if grid.shape[2:4] != output.shape[2:4]:\n            yv, xv = meshgrid([F.arange(hsize), F.arange(wsize)])\n            grid = F.stack((xv, yv), 2).reshape(1, 1, hsize, wsize, 2).type(dtype)\n            self.grids[k] = grid\n\n        output = output.view(batch_size, self.n_anchors, n_ch, hsize, wsize)\n        output = (\n            output.permute(0, 1, 3, 4, 2)\n            .reshape(batch_size, self.n_anchors * hsize * wsize, -1)\n        )\n        grid = grid.view(1, -1, 2)\n        output[..., :2] = (output[..., :2] + grid) * stride\n        output[..., 2:4] = F.exp(output[..., 2:4]) * stride\n        return output, grid\n\n    def decode_outputs(self, outputs):\n        grids = []\n        strides = []\n        for (hsize, wsize), stride in zip(self.hw, self.strides):\n            xv, yv = meshgrid(F.arange(hsize), F.arange(wsize))\n            grid = F.stack((xv, yv), 2).reshape(1, -1, 2)\n            grids.append(grid)\n            shape = grid.shape[:2]\n            strides.append(F.full((*shape, 1), stride))\n\n        grids = F.concat(grids, axis=1)\n        strides = F.concat(strides, axis=1)\n\n        outputs[..., :2] = (outputs[..., :2] + grids) * strides\n        outputs[..., 2:4] = F.exp(outputs[..., 2:4]) * strides\n        return outputs\n"
  },
  {
    "path": "demo/MegEngine/python/models/yolo_pafpn.py",
    "content": "#!/usr/bin/env python3\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport megengine.module as M\nimport megengine.functional as F\n\nfrom .darknet import CSPDarknet\nfrom .network_blocks import BaseConv, CSPLayer, DWConv, UpSample\n\n\nclass YOLOPAFPN(M.Module):\n    \"\"\"\n    YOLOv3 model. Darknet 53 is the default backbone of this model.\n    \"\"\"\n\n    def __init__(\n        self, depth=1.0, width=1.0, in_features=(\"dark3\", \"dark4\", \"dark5\"),\n        in_channels=[256, 512, 1024], depthwise=False, act=\"silu\",\n    ):\n        super().__init__()\n        self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)\n        self.in_features = in_features\n        self.in_channels = in_channels\n        Conv = DWConv if depthwise else BaseConv\n\n        self.upsample = UpSample(scale_factor=2, mode=\"bilinear\")\n        self.lateral_conv0 = BaseConv(\n            int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act\n        )\n        self.C3_p4 = CSPLayer(\n            int(2 * in_channels[1] * width),\n            int(in_channels[1] * width),\n            round(3 * depth),\n            False,\n            depthwise=depthwise,\n            act=act,\n        )  # cat\n\n        self.reduce_conv1 = BaseConv(\n            int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act\n        )\n        self.C3_p3 = CSPLayer(\n            int(2 * in_channels[0] * width),\n            int(in_channels[0] * width),\n            round(3 * depth),\n            False,\n            depthwise=depthwise,\n            act=act,\n        )\n\n        # bottom-up conv\n        self.bu_conv2 = Conv(\n            int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act\n        )\n        self.C3_n3 = CSPLayer(\n            int(2 * in_channels[0] * width),\n            int(in_channels[1] * width),\n            round(3 * depth),\n            False,\n            depthwise=depthwise,\n            act=act,\n        )\n\n        # bottom-up conv\n        self.bu_conv1 = Conv(\n            int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act\n        )\n        self.C3_n4 = CSPLayer(\n            int(2 * in_channels[1] * width),\n            int(in_channels[2] * width),\n            round(3 * depth),\n            False,\n            depthwise=depthwise,\n            act=act,\n        )\n\n    def forward(self, input):\n        \"\"\"\n        Args:\n            inputs: input images.\n\n        Returns:\n            Tuple[Tensor]: FPN feature.\n        \"\"\"\n\n        #  backbone\n        out_features = self.backbone(input)\n        features = [out_features[f] for f in self.in_features]\n        [x2, x1, x0] = features\n\n        fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32\n        f_out0 = self.upsample(fpn_out0)  # 512/16\n        f_out0 = F.concat([f_out0, x1], 1)  # 512->1024/16\n        f_out0 = self.C3_p4(f_out0)  # 1024->512/16\n\n        fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16\n        f_out1 = self.upsample(fpn_out1)  # 256/8\n        f_out1 = F.concat([f_out1, x2], 1)  # 256->512/8\n        pan_out2 = self.C3_p3(f_out1)  # 512->256/8\n\n        p_out1 = self.bu_conv2(pan_out2)  # 256->256/16\n        p_out1 = F.concat([p_out1, fpn_out1], 1)  # 256->512/16\n        pan_out1 = self.C3_n3(p_out1)  # 512->512/16\n\n        p_out0 = self.bu_conv1(pan_out1)  # 512->512/32\n        p_out0 = F.concat([p_out0, fpn_out0], 1)  # 512->1024/32\n        pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32\n\n        outputs = (pan_out2, pan_out1, pan_out0)\n        return outputs\n"
  },
  {
    "path": "demo/MegEngine/python/models/yolox.py",
    "content": "#!/usr/bin/env python3\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport megengine.module as M\n\nfrom .yolo_head import YOLOXHead\nfrom .yolo_pafpn import YOLOPAFPN\n\n\nclass YOLOX(M.Module):\n    \"\"\"\n    YOLOX model module. The module list is defined by create_yolov3_modules function.\n    The network returns loss values from three YOLO layers during training\n    and detection results during test.\n    \"\"\"\n\n    def __init__(self, backbone=None, head=None):\n        super().__init__()\n        if backbone is None:\n            backbone = YOLOPAFPN()\n        if head is None:\n            head = YOLOXHead(80)\n\n        self.backbone = backbone\n        self.head = head\n\n    def forward(self, x):\n        # fpn output content features of [dark3, dark4, dark5]\n        fpn_outs = self.backbone(x)\n        assert not self.training\n        outputs = self.head(fpn_outs)\n\n        return outputs\n"
  },
  {
    "path": "demo/ONNXRuntime/README.md",
    "content": "## YOLOX-ONNXRuntime in Python\n\nThis doc introduces how to convert your pytorch model into onnx, and how to run an onnxruntime demo to verify your convertion.\n\n### Step1: Install onnxruntime\n\nrun the following command to install onnxruntime:\n```shell\npip install onnxruntime\n```\n\n### Step2: Get ONNX models\n\nUsers might download our pre-generated ONNX models or convert their own models to ONNX.\n\n#### Download ONNX models.\n\n| Model | Parameters | GFLOPs | Test Size | mAP | Weights |\n|:------| :----: | :----: | :---: | :---: | :---: |\n|  YOLOX-Nano |  0.91M  | 1.08 | 416x416 | 25.8 |[github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano.onnx) |\n|  YOLOX-Tiny | 5.06M     | 6.45 | 416x416 |32.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny.onnx) |\n|  YOLOX-S | 9.0M | 26.8 | 640x640 |40.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.onnx) |\n|  YOLOX-M | 25.3M | 73.8 | 640x640 |47.2 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.onnx) |\n|  YOLOX-L | 54.2M | 155.6 | 640x640 |50.1 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.onnx) |\n|  YOLOX-Darknet53| 63.72M | 185.3 | 640x640 |48.0 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_darknet.onnx) |\n|  YOLOX-X | 99.1M | 281.9 | 640x640 |51.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x.onnx) |\n\n#### Convert Your Model to ONNX\n\nFirst, you should move to <YOLOX_HOME> by:\n```shell\ncd <YOLOX_HOME>\n```\nThen, you can:\n\n1. Convert a standard YOLOX model by -n:\n```shell\npython3 tools/export_onnx.py --output-name yolox_s.onnx -n yolox-s -c yolox_s.pth\n```\nNotes:\n* -n: specify a model name. The model name must be one of the [yolox-s,m,l,x and yolox-nano, yolox-tiny, yolov3]\n* -c: the model you have trained\n* -o: opset version, default 11. **However, if you will further convert your onnx model to [OpenVINO](https://github.com/Megvii-BaseDetection/YOLOX/demo/OpenVINO/), please specify the opset version to 10.**\n* --no-onnxsim: disable onnxsim\n* To customize an input shape for onnx model,  modify the following code in tools/export.py:\n\n    ```python\n    dummy_input = torch.randn(1, 3, exp.test_size[0], exp.test_size[1])\n    ```\n\n1. Convert a standard YOLOX model by -f. When using -f, the above command is equivalent to:\n\n```shell\npython3 tools/export_onnx.py --output-name yolox_s.onnx -f exps/default/yolox_s.py -c yolox_s.pth\n```\n\n3. To convert your customized model, please use -f:\n\n```shell\npython3 tools/export_onnx.py --output-name your_yolox.onnx -f exps/your_dir/your_yolox.py -c your_yolox.pth\n```\n\n### Step3: ONNXRuntime Demo\n\nStep1.\n```shell\ncd <YOLOX_HOME>/demo/ONNXRuntime\n```\n\nStep2. \n```shell\npython3 onnx_inference.py -m <ONNX_MODEL_PATH> -i <IMAGE_PATH> -o <OUTPUT_DIR> -s 0.3 --input_shape 640,640\n```\nNotes:\n* -m: your converted onnx model\n* -i: input_image\n* -s: score threshold for visualization.\n* --input_shape: should be consistent with the shape you used for onnx convertion.\n"
  },
  {
    "path": "demo/ONNXRuntime/onnx_inference.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport os\n\nimport cv2\nimport numpy as np\n\nimport onnxruntime\n\nfrom yolox.data.data_augment import preproc as preprocess\nfrom yolox.data.datasets import COCO_CLASSES\nfrom yolox.utils import mkdir, multiclass_nms, demo_postprocess, vis\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"onnxruntime inference sample\")\n    parser.add_argument(\n        \"-m\",\n        \"--model\",\n        type=str,\n        default=\"yolox.onnx\",\n        help=\"Input your onnx model.\",\n    )\n    parser.add_argument(\n        \"-i\",\n        \"--image_path\",\n        type=str,\n        default='test_image.png',\n        help=\"Path to your input image.\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--output_dir\",\n        type=str,\n        default='demo_output',\n        help=\"Path to your output directory.\",\n    )\n    parser.add_argument(\n        \"-s\",\n        \"--score_thr\",\n        type=float,\n        default=0.3,\n        help=\"Score threshould to filter the result.\",\n    )\n    parser.add_argument(\n        \"--input_shape\",\n        type=str,\n        default=\"640,640\",\n        help=\"Specify an input shape for inference.\",\n    )\n    return parser\n\n\nif __name__ == '__main__':\n    args = make_parser().parse_args()\n\n    input_shape = tuple(map(int, args.input_shape.split(',')))\n    origin_img = cv2.imread(args.image_path)\n    img, ratio = preprocess(origin_img, input_shape)\n\n    session = onnxruntime.InferenceSession(args.model)\n\n    ort_inputs = {session.get_inputs()[0].name: img[None, :, :, :]}\n    output = session.run(None, ort_inputs)\n    predictions = demo_postprocess(output[0], input_shape)[0]\n\n    boxes = predictions[:, :4]\n    scores = predictions[:, 4:5] * predictions[:, 5:]\n\n    boxes_xyxy = np.ones_like(boxes)\n    boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.\n    boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2.\n    boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2.\n    boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2.\n    boxes_xyxy /= ratio\n    dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)\n    if dets is not None:\n        final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]\n        origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds,\n                         conf=args.score_thr, class_names=COCO_CLASSES)\n\n    mkdir(args.output_dir)\n    output_path = os.path.join(args.output_dir, os.path.basename(args.image_path))\n    cv2.imwrite(output_path, origin_img)\n"
  },
  {
    "path": "demo/OpenVINO/README.md",
    "content": "## YOLOX for OpenVINO\n\n* [C++ Demo](./cpp)\n* [Python Demo](./python)"
  },
  {
    "path": "demo/OpenVINO/cpp/CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.4.1)\nset(CMAKE_CXX_STANDARD 14)\n\nproject(yolox_openvino_demo)\n\nfind_package(OpenCV REQUIRED)\nfind_package(InferenceEngine REQUIRED)\nfind_package(ngraph REQUIRED)\n\ninclude_directories(\n    ${OpenCV_INCLUDE_DIRS}\n    ${CMAKE_CURRENT_SOURCE_DIR}\n    ${CMAKE_CURRENT_BINARY_DIR}\n)\n\nadd_executable(yolox_openvino yolox_openvino.cpp)\n\ntarget_link_libraries(\n     yolox_openvino\n    ${InferenceEngine_LIBRARIES}\n    ${NGRAPH_LIBRARIES}\n    ${OpenCV_LIBS} \n)"
  },
  {
    "path": "demo/OpenVINO/cpp/README.md",
    "content": "# YOLOX-OpenVINO in C++\n\nThis tutorial includes a C++ demo for OpenVINO, as well as some converted models.\n\n### Download OpenVINO models.\n\n| Model | Parameters | GFLOPs | Test Size | mAP | Weights |\n|:------| :----: | :----: | :---: | :---: | :---: |\n|  [YOLOX-Nano](../../../exps/default/nano.py) |  0.91M  | 1.08 | 416x416 | 25.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano_openvino.tar.gz) |\n|  [YOLOX-Tiny](../../../exps/default/yolox_tiny.py) | 5.06M     | 6.45 | 416x416 |32.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny_openvino.tar.gz) |\n|  [YOLOX-S](../../../exps/default/yolox_s.py) | 9.0M | 26.8 | 640x640 |40.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s_openvino.tar.gz) |\n|  [YOLOX-M](../../../exps/default/yolox_m.py) | 25.3M | 73.8 | 640x640 |47.2 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m_openvino.tar.gz) |\n|  [YOLOX-L](../../../exps/default/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.1 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l_openvino.tar.gz) |\n|  [YOLOX-Darknet53](../../../exps/default/yolov3.py) | 63.72M | 185.3 | 640x640 |48.0 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_dark_openvino.tar.gz) | \n|  [YOLOX-X](../../../exps/default/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x_openvino.tar.gz) |\n\n## Install OpenVINO Toolkit\n\nPlease visit [Openvino Homepage](https://docs.openvinotoolkit.org/latest/get_started_guides.html) for more details.\n\n## Set up the Environment\n\n### For Linux\n\n**Option1. Set up the environment tempororally. You need to run this command everytime you start a new shell window.**\n\n```shell\nsource /opt/intel/openvino_2021/bin/setupvars.sh\n```\n\n**Option2. Set up the environment permenantly.**\n\n*Step1.* For Linux:\n```shell\nvim ~/.bashrc \n```\n\n*Step2.* Add the following line into your file:\n\n```shell\nsource /opt/intel/openvino_2021/bin/setupvars.sh\n```\n\n*Step3.* Save and exit the file, then run:\n\n```shell\nsource ~/.bashrc\n```\n\n\n## Convert model\n\n1. Export ONNX model\n   \n   Please refer to the [ONNX tutorial](../../ONNXRuntime). **Note that you should set --opset to 10, otherwise your next step will fail.**\n\n2. Convert ONNX to OpenVINO \n\n   ``` shell\n   cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer\n   ```\n\n   Install requirements for convert tool\n\n   ```shell\n   sudo ./install_prerequisites/install_prerequisites_onnx.sh\n   ```\n\n   Then convert model.\n   ```shell\n   python3 mo.py --input_model <ONNX_MODEL> --input_shape <INPUT_SHAPE> [--data_type FP16]\n   ```\n   For example:\n   ```shell\n   python3 mo.py --input_model yolox_tiny.onnx --input_shape [1,3,416,416] --data_type FP16\n   ```  \n\n   Make sure the input shape is consistent with [those](yolox_openvino.cpp#L24-L25) in cpp file. \n\n## Build \n\n### Linux\n```shell\nsource /opt/intel/openvino_2021/bin/setupvars.sh\nmkdir build\ncd build\ncmake ..\nmake\n```\n\n## Demo\n\n### c++\n\n```shell\n./yolox_openvino <XML_MODEL_PATH> <IMAGE_PATH> <DEVICE>\n```\n"
  },
  {
    "path": "demo/OpenVINO/cpp/yolox_openvino.cpp",
    "content": "// Copyright (C) 2018-2021 Intel Corporation\n// SPDX-License-Identifier: Apache-2.0\n//\n\n#include <iterator>\n#include <memory>\n#include <string>\n#include <vector>\n#include <opencv2/opencv.hpp>\n#include <iostream>\n#include <inference_engine.hpp>\n\nusing namespace InferenceEngine;\n\n/**\n * @brief Define names based depends on Unicode path support\n */\n#define tcout                  std::cout\n#define file_name_t            std::string\n#define imread_t               cv::imread\n#define NMS_THRESH 0.45\n#define BBOX_CONF_THRESH 0.3\n\nstatic const int INPUT_W = 416;\nstatic const int INPUT_H = 416;\nstatic const int NUM_CLASSES = 80; // COCO has 80 classes. Modify this value on your own dataset.\n\ncv::Mat static_resize(cv::Mat& img) {\n    float r = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));\n    // r = std::min(r, 1.0f);\n    int unpad_w = r * img.cols;\n    int unpad_h = r * img.rows;\n    cv::Mat re(unpad_h, unpad_w, CV_8UC3);\n    cv::resize(img, re, re.size());\n    //cv::Mat out(INPUT_W, INPUT_H, CV_8UC3, cv::Scalar(114, 114, 114));\n    cv::Mat out(INPUT_H, INPUT_W, CV_8UC3, cv::Scalar(114, 114, 114));\n    re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));\n    return out;\n}\n\nvoid blobFromImage(cv::Mat& img, Blob::Ptr& blob){\n    int channels = 3;\n    int img_h = img.rows;\n    int img_w = img.cols;\n    InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);\n    if (!mblob) \n    {\n        THROW_IE_EXCEPTION << \"We expect blob to be inherited from MemoryBlob in matU8ToBlob, \"\n            << \"but by fact we were not able to cast inputBlob to MemoryBlob\";\n    }\n    // locked memory holder should be alive all time while access to its buffer happens\n    auto mblobHolder = mblob->wmap();\n\n    float *blob_data = mblobHolder.as<float *>();\n\n    for (size_t c = 0; c < channels; c++) \n    {\n        for (size_t  h = 0; h < img_h; h++) \n        {\n            for (size_t w = 0; w < img_w; w++) \n            {\n                blob_data[c * img_w * img_h + h * img_w + w] =\n                    (float)img.at<cv::Vec3b>(h, w)[c];\n            }\n        }\n    }\n}\n\n\nstruct Object\n{\n    cv::Rect_<float> rect;\n    int label;\n    float prob;\n};\n\nstruct GridAndStride\n{\n    int grid0;\n    int grid1;\n    int stride;\n};\n\nstatic void generate_grids_and_stride(const int target_w, const int target_h, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)\n{\n    for (auto stride : strides)\n    {\n        int num_grid_w = target_w / stride;\n        int num_grid_h = target_h / stride;\n        for (int g1 = 0; g1 < num_grid_h; g1++)\n        {\n            for (int g0 = 0; g0 < num_grid_w; g0++)\n            {\n                grid_strides.push_back((GridAndStride){g0, g1, stride});\n            }\n        }\n    }\n}\n\n\nstatic void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const float* feat_ptr, float prob_threshold, std::vector<Object>& objects)\n{\n\n    const int num_anchors = grid_strides.size();\n\n    for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)\n    {\n        const int grid0 = grid_strides[anchor_idx].grid0;\n        const int grid1 = grid_strides[anchor_idx].grid1;\n        const int stride = grid_strides[anchor_idx].stride;\n\n\tconst int basic_pos = anchor_idx * (NUM_CLASSES + 5);\n\n        // yolox/models/yolo_head.py decode logic\n        //  outputs[..., :2] = (outputs[..., :2] + grids) * strides\n        //  outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides\n        float x_center = (feat_ptr[basic_pos + 0] + grid0) * stride;\n        float y_center = (feat_ptr[basic_pos + 1] + grid1) * stride;\n        float w = exp(feat_ptr[basic_pos + 2]) * stride;\n        float h = exp(feat_ptr[basic_pos + 3]) * stride;\n        float x0 = x_center - w * 0.5f;\n        float y0 = y_center - h * 0.5f;\n\n        float box_objectness = feat_ptr[basic_pos + 4];\n        for (int class_idx = 0; class_idx < NUM_CLASSES; class_idx++)\n        {\n            float box_cls_score = feat_ptr[basic_pos + 5 + class_idx];\n            float box_prob = box_objectness * box_cls_score;\n            if (box_prob > prob_threshold)\n            {\n                Object obj;\n                obj.rect.x = x0;\n                obj.rect.y = y0;\n                obj.rect.width = w;\n                obj.rect.height = h;\n                obj.label = class_idx;\n                obj.prob = box_prob;\n\n                objects.push_back(obj);\n            }\n\n        } // class loop\n\n    } // point anchor loop\n}\n\nstatic inline float intersection_area(const Object& a, const Object& b)\n{\n    cv::Rect_<float> inter = a.rect & b.rect;\n    return inter.area();\n}\n\nstatic void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)\n{\n    int i = left;\n    int j = right;\n    float p = faceobjects[(left + right) / 2].prob;\n\n    while (i <= j)\n    {\n        while (faceobjects[i].prob > p)\n            i++;\n\n        while (faceobjects[j].prob < p)\n            j--;\n\n        if (i <= j)\n        {\n            // swap\n            std::swap(faceobjects[i], faceobjects[j]);\n\n            i++;\n            j--;\n        }\n    }\n\n    #pragma omp parallel sections\n    {\n        #pragma omp section\n        {\n            if (left < j) qsort_descent_inplace(faceobjects, left, j);\n        }\n        #pragma omp section\n        {\n            if (i < right) qsort_descent_inplace(faceobjects, i, right);\n        }\n    }\n}\n\n\nstatic void qsort_descent_inplace(std::vector<Object>& objects)\n{\n    if (objects.empty())\n        return;\n\n    qsort_descent_inplace(objects, 0, objects.size() - 1);\n}\n\nstatic void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)\n{\n    picked.clear();\n\n    const int n = faceobjects.size();\n\n    std::vector<float> areas(n);\n    for (int i = 0; i < n; i++)\n    {\n        areas[i] = faceobjects[i].rect.area();\n    }\n\n    for (int i = 0; i < n; i++)\n    {\n        const Object& a = faceobjects[i];\n\n        int keep = 1;\n        for (int j = 0; j < (int)picked.size(); j++)\n        {\n            const Object& b = faceobjects[picked[j]];\n\n            // intersection over union\n            float inter_area = intersection_area(a, b);\n            float union_area = areas[i] + areas[picked[j]] - inter_area;\n            // float IoU = inter_area / union_area\n            if (inter_area / union_area > nms_threshold)\n                keep = 0;\n        }\n\n        if (keep)\n            picked.push_back(i);\n    }\n}\n\n\nstatic void decode_outputs(const float* prob, std::vector<Object>& objects, float scale, const int img_w, const int img_h) {\n        std::vector<Object> proposals;\n        std::vector<int> strides = {8, 16, 32};\n        std::vector<GridAndStride> grid_strides;\n\n        generate_grids_and_stride(INPUT_W, INPUT_H, strides, grid_strides);\n        generate_yolox_proposals(grid_strides, prob,  BBOX_CONF_THRESH, proposals);\n        qsort_descent_inplace(proposals);\n\n        std::vector<int> picked;\n        nms_sorted_bboxes(proposals, picked, NMS_THRESH);\n        int count = picked.size();\n        objects.resize(count);\n\n        for (int i = 0; i < count; i++)\n        {\n            objects[i] = proposals[picked[i]];\n\n            // adjust offset to original unpadded\n            float x0 = (objects[i].rect.x) / scale;\n            float y0 = (objects[i].rect.y) / scale;\n            float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;\n            float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;\n\n            // clip\n            x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);\n            y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);\n            x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);\n            y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);\n\n            objects[i].rect.x = x0;\n            objects[i].rect.y = y0;\n            objects[i].rect.width = x1 - x0;\n            objects[i].rect.height = y1 - y0;\n        }\n}\n\nconst float color_list[80][3] =\n{\n    {0.000, 0.447, 0.741},\n    {0.850, 0.325, 0.098},\n    {0.929, 0.694, 0.125},\n    {0.494, 0.184, 0.556},\n    {0.466, 0.674, 0.188},\n    {0.301, 0.745, 0.933},\n    {0.635, 0.078, 0.184},\n    {0.300, 0.300, 0.300},\n    {0.600, 0.600, 0.600},\n    {1.000, 0.000, 0.000},\n    {1.000, 0.500, 0.000},\n    {0.749, 0.749, 0.000},\n    {0.000, 1.000, 0.000},\n    {0.000, 0.000, 1.000},\n    {0.667, 0.000, 1.000},\n    {0.333, 0.333, 0.000},\n    {0.333, 0.667, 0.000},\n    {0.333, 1.000, 0.000},\n    {0.667, 0.333, 0.000},\n    {0.667, 0.667, 0.000},\n    {0.667, 1.000, 0.000},\n    {1.000, 0.333, 0.000},\n    {1.000, 0.667, 0.000},\n    {1.000, 1.000, 0.000},\n    {0.000, 0.333, 0.500},\n    {0.000, 0.667, 0.500},\n    {0.000, 1.000, 0.500},\n    {0.333, 0.000, 0.500},\n    {0.333, 0.333, 0.500},\n    {0.333, 0.667, 0.500},\n    {0.333, 1.000, 0.500},\n    {0.667, 0.000, 0.500},\n    {0.667, 0.333, 0.500},\n    {0.667, 0.667, 0.500},\n    {0.667, 1.000, 0.500},\n    {1.000, 0.000, 0.500},\n    {1.000, 0.333, 0.500},\n    {1.000, 0.667, 0.500},\n    {1.000, 1.000, 0.500},\n    {0.000, 0.333, 1.000},\n    {0.000, 0.667, 1.000},\n    {0.000, 1.000, 1.000},\n    {0.333, 0.000, 1.000},\n    {0.333, 0.333, 1.000},\n    {0.333, 0.667, 1.000},\n    {0.333, 1.000, 1.000},\n    {0.667, 0.000, 1.000},\n    {0.667, 0.333, 1.000},\n    {0.667, 0.667, 1.000},\n    {0.667, 1.000, 1.000},\n    {1.000, 0.000, 1.000},\n    {1.000, 0.333, 1.000},\n    {1.000, 0.667, 1.000},\n    {0.333, 0.000, 0.000},\n    {0.500, 0.000, 0.000},\n    {0.667, 0.000, 0.000},\n    {0.833, 0.000, 0.000},\n    {1.000, 0.000, 0.000},\n    {0.000, 0.167, 0.000},\n    {0.000, 0.333, 0.000},\n    {0.000, 0.500, 0.000},\n    {0.000, 0.667, 0.000},\n    {0.000, 0.833, 0.000},\n    {0.000, 1.000, 0.000},\n    {0.000, 0.000, 0.167},\n    {0.000, 0.000, 0.333},\n    {0.000, 0.000, 0.500},\n    {0.000, 0.000, 0.667},\n    {0.000, 0.000, 0.833},\n    {0.000, 0.000, 1.000},\n    {0.000, 0.000, 0.000},\n    {0.143, 0.143, 0.143},\n    {0.286, 0.286, 0.286},\n    {0.429, 0.429, 0.429},\n    {0.571, 0.571, 0.571},\n    {0.714, 0.714, 0.714},\n    {0.857, 0.857, 0.857},\n    {0.000, 0.447, 0.741},\n    {0.314, 0.717, 0.741},\n    {0.50, 0.5, 0}\n};\n\nstatic void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)\n{\n    static const char* class_names[] = {\n        \"person\", \"bicycle\", \"car\", \"motorcycle\", \"airplane\", \"bus\", \"train\", \"truck\", \"boat\", \"traffic light\",\n        \"fire hydrant\", \"stop sign\", \"parking meter\", \"bench\", \"bird\", \"cat\", \"dog\", \"horse\", \"sheep\", \"cow\",\n        \"elephant\", \"bear\", \"zebra\", \"giraffe\", \"backpack\", \"umbrella\", \"handbag\", \"tie\", \"suitcase\", \"frisbee\",\n        \"skis\", \"snowboard\", \"sports ball\", \"kite\", \"baseball bat\", \"baseball glove\", \"skateboard\", \"surfboard\",\n        \"tennis racket\", \"bottle\", \"wine glass\", \"cup\", \"fork\", \"knife\", \"spoon\", \"bowl\", \"banana\", \"apple\",\n        \"sandwich\", \"orange\", \"broccoli\", \"carrot\", \"hot dog\", \"pizza\", \"donut\", \"cake\", \"chair\", \"couch\",\n        \"potted plant\", \"bed\", \"dining table\", \"toilet\", \"tv\", \"laptop\", \"mouse\", \"remote\", \"keyboard\", \"cell phone\",\n        \"microwave\", \"oven\", \"toaster\", \"sink\", \"refrigerator\", \"book\", \"clock\", \"vase\", \"scissors\", \"teddy bear\",\n        \"hair drier\", \"toothbrush\"\n    };\n\n    cv::Mat image = bgr.clone();\n\n    for (size_t i = 0; i < objects.size(); i++)\n    {\n        const Object& obj = objects[i];\n\n        fprintf(stderr, \"%d = %.5f at %.2f %.2f %.2f x %.2f\\n\", obj.label, obj.prob,\n                obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);\n\n        cv::Scalar color = cv::Scalar(color_list[obj.label][0], color_list[obj.label][1], color_list[obj.label][2]);\n        float c_mean = cv::mean(color)[0];\n        cv::Scalar txt_color;\n        if (c_mean > 0.5){\n            txt_color = cv::Scalar(0, 0, 0);\n        }else{\n            txt_color = cv::Scalar(255, 255, 255);\n        }\n\n        cv::rectangle(image, obj.rect, color * 255, 2);\n\n        char text[256];\n        sprintf(text, \"%s %.1f%%\", class_names[obj.label], obj.prob * 100);\n\n        int baseLine = 0;\n        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);\n\n        cv::Scalar txt_bk_color = color * 0.7 * 255;\n\n        int x = obj.rect.x;\n        int y = obj.rect.y + 1;\n        //int y = obj.rect.y - label_size.height - baseLine;\n        if (y > image.rows)\n            y = image.rows;\n        //if (x + label_size.width > image.cols)\n            //x = image.cols - label_size.width;\n\n        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),\n                      txt_bk_color, -1);\n\n        cv::putText(image, text, cv::Point(x, y + label_size.height),\n                    cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);\n    }\n\n    cv::imwrite(\"_demo.jpg\" , image);\n    fprintf(stderr, \"save vis file\\n\");\n    /* cv::imshow(\"image\", image); */\n    /* cv::waitKey(0); */\n}\n\n\nint main(int argc, char* argv[]) {\n    try {\n        // ------------------------------ Parsing and validation of input arguments\n        // ---------------------------------\n        if (argc != 4) {\n            tcout << \"Usage : \" << argv[0] << \" <path_to_model> <path_to_image> <device_name>\" << std::endl;\n            return EXIT_FAILURE;\n        }\n\n        const file_name_t input_model {argv[1]};\n        const file_name_t input_image_path {argv[2]};\n        const std::string device_name {argv[3]};\n        // -----------------------------------------------------------------------------------------------------\n\n        // --------------------------- Step 1. Initialize inference engine core\n        // -------------------------------------\n        Core ie;\n        // -----------------------------------------------------------------------------------------------------\n\n        // Step 2. Read a model in OpenVINO Intermediate Representation (.xml and\n        // .bin files) or ONNX (.onnx file) format\n        CNNNetwork network = ie.ReadNetwork(input_model);\n        if (network.getOutputsInfo().size() != 1)\n            throw std::logic_error(\"Sample supports topologies with 1 output only\");\n        if (network.getInputsInfo().size() != 1)\n            throw std::logic_error(\"Sample supports topologies with 1 input only\");\n        // -----------------------------------------------------------------------------------------------------\n\n        // --------------------------- Step 3. Configure input & output\n        // ---------------------------------------------\n        // --------------------------- Prepare input blobs\n        // -----------------------------------------------------\n        InputInfo::Ptr input_info = network.getInputsInfo().begin()->second;\n        std::string input_name = network.getInputsInfo().begin()->first;\n\n        /* Mark input as resizable by setting of a resize algorithm.\n         * In this case we will be able to set an input blob of any shape to an\n         * infer request. Resize and layout conversions are executed automatically\n         * during inference */\n        //input_info->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR);\n        //input_info->setLayout(Layout::NHWC);\n        //input_info->setPrecision(Precision::FP32);\n\n        // --------------------------- Prepare output blobs\n        // ----------------------------------------------------\n        if (network.getOutputsInfo().empty()) {\n            std::cerr << \"Network outputs info is empty\" << std::endl;\n            return EXIT_FAILURE;\n        }\n        DataPtr output_info = network.getOutputsInfo().begin()->second;\n        std::string output_name = network.getOutputsInfo().begin()->first;\n\n        output_info->setPrecision(Precision::FP32);\n        // -----------------------------------------------------------------------------------------------------\n\n        // --------------------------- Step 4. Loading a model to the device\n        // ------------------------------------------\n        ExecutableNetwork executable_network = ie.LoadNetwork(network, device_name);\n        // -----------------------------------------------------------------------------------------------------\n\n        // --------------------------- Step 5. Create an infer request\n        // -------------------------------------------------\n        InferRequest infer_request = executable_network.CreateInferRequest();\n        // -----------------------------------------------------------------------------------------------------\n\n        // --------------------------- Step 6. Prepare input\n        // --------------------------------------------------------\n        /* Read input image to a blob and set it to an infer request without resize\n         * and layout conversions. */\n        cv::Mat image = imread_t(input_image_path);\n\t    cv::Mat pr_img = static_resize(image);\n        Blob::Ptr imgBlob = infer_request.GetBlob(input_name);     // just wrap Mat data by Blob::Ptr\n\t    blobFromImage(pr_img, imgBlob);\n\n        // infer_request.SetBlob(input_name, imgBlob);  // infer_request accepts input blob of any size\n        // -----------------------------------------------------------------------------------------------------\n\n        // --------------------------- Step 7. Do inference\n        // --------------------------------------------------------\n        /* Running the request synchronously */\n        infer_request.Infer();\n        // -----------------------------------------------------------------------------------------------------\n\n        // --------------------------- Step 8. Process output\n        // ------------------------------------------------------\n        const Blob::Ptr output_blob = infer_request.GetBlob(output_name);\n        MemoryBlob::CPtr moutput = as<MemoryBlob>(output_blob);\n        if (!moutput) {\n            throw std::logic_error(\"We expect output to be inherited from MemoryBlob, \"\n                                   \"but by fact we were not able to cast output to MemoryBlob\");\n        }\n        // locked memory holder should be alive all time while access to its buffer\n        // happens\n        auto moutputHolder = moutput->rmap();\n        const float* net_pred = moutputHolder.as<const PrecisionTrait<Precision::FP32>::value_type*>();\n        \n\t    int img_w = image.cols;\n        int img_h = image.rows;\n\t    float scale = std::min(INPUT_W / (image.cols*1.0), INPUT_H / (image.rows*1.0));\n        std::vector<Object> objects;\n\n        decode_outputs(net_pred, objects, scale, img_w, img_h);\n        draw_objects(image, objects);\n\n            // -----------------------------------------------------------------------------------------------------\n        } catch (const std::exception& ex) {\n            std::cerr << ex.what() << std::endl;\n            return EXIT_FAILURE;\n    }\n    return EXIT_SUCCESS;\n}\n"
  },
  {
    "path": "demo/OpenVINO/python/README.md",
    "content": "# YOLOX-OpenVINO in Python\n\nThis tutorial includes a Python demo for OpenVINO, as well as some converted models.\n\n### Download OpenVINO models.\n\n| Model | Parameters | GFLOPs | Test Size | mAP | Weights |\n|:------| :----: | :----: | :---: | :---: | :---: |\n|  [YOLOX-Nano](../../../exps/default/nano.py) |  0.91M  | 1.08 | 416x416 | 25.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano_openvino.tar.gz) |\n|  [YOLOX-Tiny](../../../exps/default/yolox_tiny.py) | 5.06M     | 6.45 | 416x416 |32.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny_openvino.tar.gz) |\n|  [YOLOX-S](../../../exps/default/yolox_s.py) | 9.0M | 26.8 | 640x640 |40.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s_openvino.tar.gz) |\n|  [YOLOX-M](../../../exps/default/yolox_m.py) | 25.3M | 73.8 | 640x640 |47.2 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m_openvino.tar.gz) |\n|  [YOLOX-L](../../../exps/default/yolox_l.py) | 54.2M | 155.6 | 640x640 |50.1 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l_openvino.tar.gz) |\n|  [YOLOX-Darknet53](../../../exps/default/yolov3.py) | 63.72M | 185.3 | 640x640 |48.0 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_dark_openvino.tar.gz) | \n|  [YOLOX-X](../../../exps/default/yolox_x.py) | 99.1M | 281.9 | 640x640 |51.5 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x_openvino.tar.gz) |\n\n## Install OpenVINO Toolkit\n\nPlease visit [Openvino Homepage](https://docs.openvinotoolkit.org/latest/get_started_guides.html) for more details.\n\n## Set up the Environment\n\n### For Linux\n\n**Option1. Set up the environment tempororally. You need to run this command everytime you start a new shell window.**\n\n```shell\nsource /opt/intel/openvino_2021/bin/setupvars.sh\n```\n\n**Option2. Set up the environment permenantly.**\n\n*Step1.* For Linux:\n```shell\nvim ~/.bashrc\n```\n\n*Step2.* Add the following line into your file:\n\n```shell\nsource /opt/intel/openvino_2021/bin/setupvars.sh\n```\n\n*Step3.* Save and exit the file, then run:\n\n```shell\nsource ~/.bashrc\n```\n\n\n## Convert model\n\n1. Export ONNX model\n\n   Please refer to the [ONNX tutorial](https://github.com/Megvii-BaseDetection/YOLOX/demo/ONNXRuntime). **Note that you should set --opset to 10, otherwise your next step will fail.**\n\n2. Convert ONNX to OpenVINO\n\n   ``` shell\n   cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer\n   ```\n\n   Install requirements for convert tool\n\n   ```shell\n   sudo ./install_prerequisites/install_prerequisites_onnx.sh\n   ```\n\n   Then convert model.\n   ```shell\n   python3 mo.py --input_model <ONNX_MODEL> --input_shape <INPUT_SHAPE> [--data_type FP16]\n   ```\n   For example:\n   ```shell\n   python3 mo.py --input_model yolox.onnx --input_shape [1,3,640,640] --data_type FP16 --output_dir converted_output\n   ```\n\n## Demo\n\n### python\n\n```shell\npython openvino_inference.py -m <XML_MODEL_PATH> -i <IMAGE_PATH> \n```\nor\n```shell\npython openvino_inference.py -m <XML_MODEL_PATH> -i <IMAGE_PATH> -o <OUTPUT_DIR> -s <SCORE_THR> -d <DEVICE>\n```\n\n"
  },
  {
    "path": "demo/OpenVINO/python/openvino_inference.py",
    "content": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n# Copyright (C) 2018-2021 Intel Corporation\n# SPDX-License-Identifier: Apache-2.0\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport logging as log\nimport os\nimport sys\n\nimport cv2\nimport numpy as np\n\nfrom openvino.inference_engine import IECore\n\nfrom yolox.data.data_augment import preproc as preprocess\nfrom yolox.data.datasets import COCO_CLASSES\nfrom yolox.utils import mkdir, multiclass_nms, demo_postprocess, vis\n\n\ndef parse_args() -> argparse.Namespace:\n    \"\"\"Parse and return command line arguments\"\"\"\n    parser = argparse.ArgumentParser(add_help=False)\n    args = parser.add_argument_group('Options')\n    args.add_argument(\n        '-h',\n        '--help',\n        action='help',\n        help='Show this help message and exit.')\n    args.add_argument(\n        '-m',\n        '--model',\n        required=True,\n        type=str,\n        help='Required. Path to an .xml or .onnx file with a trained model.')\n    args.add_argument(\n        '-i',\n        '--input',\n        required=True,\n        type=str,\n        help='Required. Path to an image file.')\n    args.add_argument(\n        '-o',\n        '--output_dir',\n        type=str,\n        default='demo_output',\n        help='Path to your output dir.')\n    args.add_argument(\n        '-s',\n        '--score_thr',\n        type=float,\n        default=0.3,\n        help=\"Score threshould to visualize the result.\")\n    args.add_argument(\n        '-d',\n        '--device',\n        default='CPU',\n        type=str,\n        help='Optional. Specify the target device to infer on; CPU, GPU, \\\n              MYRIAD, HDDL or HETERO: is acceptable. The sample will look \\\n              for a suitable plugin for device specified. Default value \\\n              is CPU.')\n    args.add_argument(\n        '--labels',\n        default=None,\n        type=str,\n        help='Option:al. Path to a labels mapping file.')\n    args.add_argument(\n        '-nt',\n        '--number_top',\n        default=10,\n        type=int,\n        help='Optional. Number of top results.')\n    return parser.parse_args()\n\n\ndef main():\n    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)\n    args = parse_args()\n\n    # ---------------------------Step 1. Initialize inference engine core--------------------------------------------------\n    log.info('Creating Inference Engine')\n    ie = IECore()\n\n    # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation or ONNX format---------------\n    log.info(f'Reading the network: {args.model}')\n    # (.xml and .bin files) or (.onnx file)\n    net = ie.read_network(model=args.model)\n\n    if len(net.input_info) != 1:\n        log.error('Sample supports only single input topologies')\n        return -1\n    if len(net.outputs) != 1:\n        log.error('Sample supports only single output topologies')\n        return -1\n\n    # ---------------------------Step 3. Configure input & output----------------------------------------------------------\n    log.info('Configuring input and output blobs')\n    # Get names of input and output blobs\n    input_blob = next(iter(net.input_info))\n    out_blob = next(iter(net.outputs))\n\n    # Set input and output precision manually\n    net.input_info[input_blob].precision = 'FP32'\n    net.outputs[out_blob].precision = 'FP16'\n\n    # Get a number of classes recognized by a model\n    num_of_classes = max(net.outputs[out_blob].shape)\n\n    # ---------------------------Step 4. Loading model to the device-------------------------------------------------------\n    log.info('Loading the model to the plugin')\n    exec_net = ie.load_network(network=net, device_name=args.device)\n\n    # ---------------------------Step 5. Create infer request--------------------------------------------------------------\n    # load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork\n    # instance which stores infer requests. So you already created Infer requests in the previous step.\n\n    # ---------------------------Step 6. Prepare input---------------------------------------------------------------------\n    origin_img = cv2.imread(args.input)\n    _, _, h, w = net.input_info[input_blob].input_data.shape\n    image, ratio = preprocess(origin_img, (h, w))\n\n    # ---------------------------Step 7. Do inference----------------------------------------------------------------------\n    log.info('Starting inference in synchronous mode')\n    res = exec_net.infer(inputs={input_blob: image})\n\n    # ---------------------------Step 8. Process output--------------------------------------------------------------------\n    res = res[out_blob]\n\n    predictions = demo_postprocess(res, (h, w))[0]\n\n    boxes = predictions[:, :4]\n    scores = predictions[:, 4, None] * predictions[:, 5:]\n\n    boxes_xyxy = np.ones_like(boxes)\n    boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.\n    boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2.\n    boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2.\n    boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2.\n    boxes_xyxy /= ratio\n    dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)\n\n    if dets is not None:\n        final_boxes = dets[:, :4]\n        final_scores, final_cls_inds = dets[:, 4], dets[:, 5]\n        origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds,\n                         conf=args.score_thr, class_names=COCO_CLASSES)\n\n    mkdir(args.output_dir)\n    output_path = os.path.join(args.output_dir, os.path.basename(args.input))\n    cv2.imwrite(output_path, origin_img)\n\n\nif __name__ == '__main__':\n    sys.exit(main())\n"
  },
  {
    "path": "demo/TensorRT/cpp/CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 2.6)\n\nproject(yolox)\n\nadd_definitions(-std=c++11)\n\noption(CUDA_USE_STATIC_CUDA_RUNTIME OFF)\nset(CMAKE_CXX_STANDARD 11)\nset(CMAKE_BUILD_TYPE Debug)\n\nfind_package(CUDA REQUIRED)\n\ninclude_directories(${PROJECT_SOURCE_DIR}/include)\n# include and link dirs of cuda and tensorrt, you need adapt them if yours are different\n# cuda\ninclude_directories(/data/cuda/cuda-10.2/cuda/include)\nlink_directories(/data/cuda/cuda-10.2/cuda/lib64)\n# cudnn\ninclude_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/include)\nlink_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/lib64)\n# tensorrt\ninclude_directories(/data/cuda/cuda-10.2/TensorRT/v7.2.1.6/include)\nlink_directories(/data/cuda/cuda-10.2/TensorRT/v7.2.1.6/lib)\n\nset(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED\")\n\nfind_package(OpenCV)\ninclude_directories(${OpenCV_INCLUDE_DIRS})\n\nadd_executable(yolox ${PROJECT_SOURCE_DIR}/yolox.cpp)\ntarget_link_libraries(yolox nvinfer)\ntarget_link_libraries(yolox cudart)\ntarget_link_libraries(yolox ${OpenCV_LIBS})\n\nadd_definitions(-O2 -pthread)\n\n"
  },
  {
    "path": "demo/TensorRT/cpp/README.md",
    "content": "# YOLOX-TensorRT in C++\n\nAs YOLOX models are easy to convert to tensorrt using [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt), \nour C++ demo does not include the model converting or constructing like other tenorrt demos.\n\n\n## Step 1: Prepare serialized engine file\n\nFollow the trt [python demo README](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/demo/TensorRT/python/README.md) to convert and save the serialized engine file.\n\nCheck the 'model_trt.engine' file generated from Step 1, which will be automatically saved at the current demo dir.\n\n\n## Step 2: build the demo\n\nPlease follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) to install TensorRT.\n\nAnd you should set the TensorRT path and CUDA path in CMakeLists.txt.\n\nIf you train your custom dataset, you may need to modify the value of `num_class`.\n\n```c++\nconst int num_class = 80;\n```\n\nInstall opencv with ```sudo apt-get install libopencv-dev``` (we don't need a higher version of opencv like v3.3+). \n\nbuild the demo:\n\n```shell\nmkdir build\ncd build\ncmake ..\nmake\n```\n\nThen run the demo:\n\n```shell\n./yolox ../model_trt.engine -i ../../../../assets/dog.jpg\n```\n\nor\n\n```shell\n./yolox <path/to/your/engine_file> -i <path/to/image>\n```\n\nNOTE: for `trtexec` users, modify `INPUT_BLOB_NAME` and `OUTPUT_BLOB_NAME` as the following code.\n```\nconst char* INPUT_BLOB_NAME = \"images\";\nconst char* OUTPUT_BLOB_NAME = \"output\";\n```\n\nHere is the command to convert the small onnx model to tensorrt engine file:\n```\ntrtexec --onnx=yolox_s.onnx --saveEngine=yolox_s.trt\n```\n"
  },
  {
    "path": "demo/TensorRT/cpp/logging.h",
    "content": "/*\n * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#ifndef TENSORRT_LOGGING_H\n#define TENSORRT_LOGGING_H\n\n#include \"NvInferRuntimeCommon.h\"\n#include <cassert>\n#include <ctime>\n#include <iomanip>\n#include <iostream>\n#include <ostream>\n#include <sstream>\n#include <string>\n\nusing Severity = nvinfer1::ILogger::Severity;\n\nclass LogStreamConsumerBuffer : public std::stringbuf\n{\npublic:\n    LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)\n        : mOutput(stream)\n        , mPrefix(prefix)\n        , mShouldLog(shouldLog)\n    {\n    }\n\n    LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)\n        : mOutput(other.mOutput)\n    {\n    }\n\n    ~LogStreamConsumerBuffer()\n    {\n        // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence\n        // std::streambuf::pptr() gives a pointer to the current position of the output sequence\n        // if the pointer to the beginning is not equal to the pointer to the current position,\n        // call putOutput() to log the output to the stream\n        if (pbase() != pptr())\n        {\n            putOutput();\n        }\n    }\n\n    // synchronizes the stream buffer and returns 0 on success\n    // synchronizing the stream buffer consists of inserting the buffer contents into the stream,\n    // resetting the buffer and flushing the stream\n    virtual int sync()\n    {\n        putOutput();\n        return 0;\n    }\n\n    void putOutput()\n    {\n        if (mShouldLog)\n        {\n            // prepend timestamp\n            std::time_t timestamp = std::time(nullptr);\n            tm* tm_local = std::localtime(&timestamp);\n            std::cout << \"[\";\n            std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << \"/\";\n            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << \"/\";\n            std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << \"-\";\n            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << \":\";\n            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << \":\";\n            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << \"] \";\n            // std::stringbuf::str() gets the string contents of the buffer\n            // insert the buffer contents pre-appended by the appropriate prefix into the stream\n            mOutput << mPrefix << str();\n            // set the buffer to empty\n            str(\"\");\n            // flush the stream\n            mOutput.flush();\n        }\n    }\n\n    void setShouldLog(bool shouldLog)\n    {\n        mShouldLog = shouldLog;\n    }\n\nprivate:\n    std::ostream& mOutput;\n    std::string mPrefix;\n    bool mShouldLog;\n};\n\n//!\n//! \\class LogStreamConsumerBase\n//! \\brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer\n//!\nclass LogStreamConsumerBase\n{\npublic:\n    LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)\n        : mBuffer(stream, prefix, shouldLog)\n    {\n    }\n\nprotected:\n    LogStreamConsumerBuffer mBuffer;\n};\n\n//!\n//! \\class LogStreamConsumer\n//! \\brief Convenience object used to facilitate use of C++ stream syntax when logging messages.\n//!  Order of base classes is LogStreamConsumerBase and then std::ostream.\n//!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field\n//!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.\n//!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.\n//!  Please do not change the order of the parent classes.\n//!\nclass LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream\n{\npublic:\n    //! \\brief Creates a LogStreamConsumer which logs messages with level severity.\n    //!  Reportable severity determines if the messages are severe enough to be logged.\n    LogStreamConsumer(Severity reportableSeverity, Severity severity)\n        : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)\n        , std::ostream(&mBuffer) // links the stream buffer with the stream\n        , mShouldLog(severity <= reportableSeverity)\n        , mSeverity(severity)\n    {\n    }\n\n    LogStreamConsumer(LogStreamConsumer&& other)\n        : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)\n        , std::ostream(&mBuffer) // links the stream buffer with the stream\n        , mShouldLog(other.mShouldLog)\n        , mSeverity(other.mSeverity)\n    {\n    }\n\n    void setReportableSeverity(Severity reportableSeverity)\n    {\n        mShouldLog = mSeverity <= reportableSeverity;\n        mBuffer.setShouldLog(mShouldLog);\n    }\n\nprivate:\n    static std::ostream& severityOstream(Severity severity)\n    {\n        return severity >= Severity::kINFO ? std::cout : std::cerr;\n    }\n\n    static std::string severityPrefix(Severity severity)\n    {\n        switch (severity)\n        {\n        case Severity::kINTERNAL_ERROR: return \"[F] \";\n        case Severity::kERROR: return \"[E] \";\n        case Severity::kWARNING: return \"[W] \";\n        case Severity::kINFO: return \"[I] \";\n        case Severity::kVERBOSE: return \"[V] \";\n        default: assert(0); return \"\";\n        }\n    }\n\n    bool mShouldLog;\n    Severity mSeverity;\n};\n\n//! \\class Logger\n//!\n//! \\brief Class which manages logging of TensorRT tools and samples\n//!\n//! \\details This class provides a common interface for TensorRT tools and samples to log information to the console,\n//! and supports logging two types of messages:\n//!\n//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)\n//! - Test pass/fail messages\n//!\n//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is\n//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.\n//!\n//! In the future, this class could be extended to support dumping test results to a file in some standard format\n//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).\n//!\n//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger\n//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT\n//! library and messages coming from the sample.\n//!\n//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the\n//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger\n//! object.\n\nclass Logger : public nvinfer1::ILogger\n{\npublic:\n    Logger(Severity severity = Severity::kWARNING)\n        : mReportableSeverity(severity)\n    {\n    }\n\n    //!\n    //! \\enum TestResult\n    //! \\brief Represents the state of a given test\n    //!\n    enum class TestResult\n    {\n        kRUNNING, //!< The test is running\n        kPASSED,  //!< The test passed\n        kFAILED,  //!< The test failed\n        kWAIVED   //!< The test was waived\n    };\n\n    //!\n    //! \\brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger\n    //! \\return The nvinfer1::ILogger associated with this Logger\n    //!\n    //! TODO Once all samples are updated to use this method to register the logger with TensorRT,\n    //! we can eliminate the inheritance of Logger from ILogger\n    //!\n    nvinfer1::ILogger& getTRTLogger()\n    {\n        return *this;\n    }\n\n    //!\n    //! \\brief Implementation of the nvinfer1::ILogger::log() virtual method\n    //!\n    //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the\n    //! inheritance from nvinfer1::ILogger\n    //!\n    void log(Severity severity, const char* msg) noexcept override\n    {\n        LogStreamConsumer(mReportableSeverity, severity) << \"[TRT] \" << std::string(msg) << std::endl;\n    }\n\n    //!\n    //! \\brief Method for controlling the verbosity of logging output\n    //!\n    //! \\param severity The logger will only emit messages that have severity of this level or higher.\n    //!\n    void setReportableSeverity(Severity severity)\n    {\n        mReportableSeverity = severity;\n    }\n\n    //!\n    //! \\brief Opaque handle that holds logging information for a particular test\n    //!\n    //! This object is an opaque handle to information used by the Logger to print test results.\n    //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used\n    //! with Logger::reportTest{Start,End}().\n    //!\n    class TestAtom\n    {\n    public:\n        TestAtom(TestAtom&&) = default;\n\n    private:\n        friend class Logger;\n\n        TestAtom(bool started, const std::string& name, const std::string& cmdline)\n            : mStarted(started)\n            , mName(name)\n            , mCmdline(cmdline)\n        {\n        }\n\n        bool mStarted;\n        std::string mName;\n        std::string mCmdline;\n    };\n\n    //!\n    //! \\brief Define a test for logging\n    //!\n    //! \\param[in] name The name of the test.  This should be a string starting with\n    //!                  \"TensorRT\" and containing dot-separated strings containing\n    //!                  the characters [A-Za-z0-9_].\n    //!                  For example, \"TensorRT.sample_googlenet\"\n    //! \\param[in] cmdline The command line used to reproduce the test\n    //\n    //! \\return a TestAtom that can be used in Logger::reportTest{Start,End}().\n    //!\n    static TestAtom defineTest(const std::string& name, const std::string& cmdline)\n    {\n        return TestAtom(false, name, cmdline);\n    }\n\n    //!\n    //! \\brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments\n    //!        as input\n    //!\n    //! \\param[in] name The name of the test\n    //! \\param[in] argc The number of command-line arguments\n    //! \\param[in] argv The array of command-line arguments (given as C strings)\n    //!\n    //! \\return a TestAtom that can be used in Logger::reportTest{Start,End}().\n    static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)\n    {\n        auto cmdline = genCmdlineString(argc, argv);\n        return defineTest(name, cmdline);\n    }\n\n    //!\n    //! \\brief Report that a test has started.\n    //!\n    //! \\pre reportTestStart() has not been called yet for the given testAtom\n    //!\n    //! \\param[in] testAtom The handle to the test that has started\n    //!\n    static void reportTestStart(TestAtom& testAtom)\n    {\n        reportTestResult(testAtom, TestResult::kRUNNING);\n        assert(!testAtom.mStarted);\n        testAtom.mStarted = true;\n    }\n\n    //!\n    //! \\brief Report that a test has ended.\n    //!\n    //! \\pre reportTestStart() has been called for the given testAtom\n    //!\n    //! \\param[in] testAtom The handle to the test that has ended\n    //! \\param[in] result The result of the test. Should be one of TestResult::kPASSED,\n    //!                   TestResult::kFAILED, TestResult::kWAIVED\n    //!\n    static void reportTestEnd(const TestAtom& testAtom, TestResult result)\n    {\n        assert(result != TestResult::kRUNNING);\n        assert(testAtom.mStarted);\n        reportTestResult(testAtom, result);\n    }\n\n    static int reportPass(const TestAtom& testAtom)\n    {\n        reportTestEnd(testAtom, TestResult::kPASSED);\n        return EXIT_SUCCESS;\n    }\n\n    static int reportFail(const TestAtom& testAtom)\n    {\n        reportTestEnd(testAtom, TestResult::kFAILED);\n        return EXIT_FAILURE;\n    }\n\n    static int reportWaive(const TestAtom& testAtom)\n    {\n        reportTestEnd(testAtom, TestResult::kWAIVED);\n        return EXIT_SUCCESS;\n    }\n\n    static int reportTest(const TestAtom& testAtom, bool pass)\n    {\n        return pass ? reportPass(testAtom) : reportFail(testAtom);\n    }\n\n    Severity getReportableSeverity() const\n    {\n        return mReportableSeverity;\n    }\n\nprivate:\n    //!\n    //! \\brief returns an appropriate string for prefixing a log message with the given severity\n    //!\n    static const char* severityPrefix(Severity severity)\n    {\n        switch (severity)\n        {\n        case Severity::kINTERNAL_ERROR: return \"[F] \";\n        case Severity::kERROR: return \"[E] \";\n        case Severity::kWARNING: return \"[W] \";\n        case Severity::kINFO: return \"[I] \";\n        case Severity::kVERBOSE: return \"[V] \";\n        default: assert(0); return \"\";\n        }\n    }\n\n    //!\n    //! \\brief returns an appropriate string for prefixing a test result message with the given result\n    //!\n    static const char* testResultString(TestResult result)\n    {\n        switch (result)\n        {\n        case TestResult::kRUNNING: return \"RUNNING\";\n        case TestResult::kPASSED: return \"PASSED\";\n        case TestResult::kFAILED: return \"FAILED\";\n        case TestResult::kWAIVED: return \"WAIVED\";\n        default: assert(0); return \"\";\n        }\n    }\n\n    //!\n    //! \\brief returns an appropriate output stream (cout or cerr) to use with the given severity\n    //!\n    static std::ostream& severityOstream(Severity severity)\n    {\n        return severity >= Severity::kINFO ? std::cout : std::cerr;\n    }\n\n    //!\n    //! \\brief method that implements logging test results\n    //!\n    static void reportTestResult(const TestAtom& testAtom, TestResult result)\n    {\n        severityOstream(Severity::kINFO) << \"&&&& \" << testResultString(result) << \" \" << testAtom.mName << \" # \"\n                                         << testAtom.mCmdline << std::endl;\n    }\n\n    //!\n    //! \\brief generate a command line string from the given (argc, argv) values\n    //!\n    static std::string genCmdlineString(int argc, char const* const* argv)\n    {\n        std::stringstream ss;\n        for (int i = 0; i < argc; i++)\n        {\n            if (i > 0)\n                ss << \" \";\n            ss << argv[i];\n        }\n        return ss.str();\n    }\n\n    Severity mReportableSeverity;\n};\n\nnamespace\n{\n\n//!\n//! \\brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE\n//!\n//! Example usage:\n//!\n//!     LOG_VERBOSE(logger) << \"hello world\" << std::endl;\n//!\ninline LogStreamConsumer LOG_VERBOSE(const Logger& logger)\n{\n    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);\n}\n\n//!\n//! \\brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO\n//!\n//! Example usage:\n//!\n//!     LOG_INFO(logger) << \"hello world\" << std::endl;\n//!\ninline LogStreamConsumer LOG_INFO(const Logger& logger)\n{\n    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);\n}\n\n//!\n//! \\brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING\n//!\n//! Example usage:\n//!\n//!     LOG_WARN(logger) << \"hello world\" << std::endl;\n//!\ninline LogStreamConsumer LOG_WARN(const Logger& logger)\n{\n    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);\n}\n\n//!\n//! \\brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR\n//!\n//! Example usage:\n//!\n//!     LOG_ERROR(logger) << \"hello world\" << std::endl;\n//!\ninline LogStreamConsumer LOG_ERROR(const Logger& logger)\n{\n    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);\n}\n\n//!\n//! \\brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR\n//         (\"fatal\" severity)\n//!\n//! Example usage:\n//!\n//!     LOG_FATAL(logger) << \"hello world\" << std::endl;\n//!\ninline LogStreamConsumer LOG_FATAL(const Logger& logger)\n{\n    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);\n}\n\n} // anonymous namespace\n\n#endif // TENSORRT_LOGGING_H\n"
  },
  {
    "path": "demo/TensorRT/cpp/yolox.cpp",
    "content": "#include <fstream>\n#include <iostream>\n#include <sstream>\n#include <numeric>\n#include <chrono>\n#include <vector>\n#include <opencv2/opencv.hpp>\n#include <dirent.h>\n#include \"NvInfer.h\"\n#include \"cuda_runtime_api.h\"\n#include \"logging.h\"\n\n#define CHECK(status) \\\n    do\\\n    {\\\n        auto ret = (status);\\\n        if (ret != 0)\\\n        {\\\n            std::cerr << \"Cuda failure: \" << ret << std::endl;\\\n            abort();\\\n        }\\\n    } while (0)\n\n#define DEVICE 0  // GPU id\n#define NMS_THRESH 0.45\n#define BBOX_CONF_THRESH 0.3\n\nusing namespace nvinfer1;\n\n// stuff we know about the network and the input/output blobs\nstatic const int INPUT_W = 640;\nstatic const int INPUT_H = 640;\nstatic const int NUM_CLASSES = 80;\nconst char* INPUT_BLOB_NAME = \"input_0\";\nconst char* OUTPUT_BLOB_NAME = \"output_0\";\nstatic Logger gLogger;\n\ncv::Mat static_resize(cv::Mat& img) {\n    float r = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));\n    // r = std::min(r, 1.0f);\n    int unpad_w = r * img.cols;\n    int unpad_h = r * img.rows;\n    cv::Mat re(unpad_h, unpad_w, CV_8UC3);\n    cv::resize(img, re, re.size());\n    cv::Mat out(INPUT_H, INPUT_W, CV_8UC3, cv::Scalar(114, 114, 114));\n    re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));\n    return out;\n}\n\nstruct Object\n{\n    cv::Rect_<float> rect;\n    int label;\n    float prob;\n};\n\nstruct GridAndStride\n{\n    int grid0;\n    int grid1;\n    int stride;\n};\n\nstatic void generate_grids_and_stride(std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)\n{\n    for (auto stride : strides)\n    {\n        int num_grid_y = INPUT_H / stride;\n        int num_grid_x = INPUT_W / stride;\n        for (int g1 = 0; g1 < num_grid_y; g1++)\n        {\n            for (int g0 = 0; g0 < num_grid_x; g0++)\n            {\n                grid_strides.push_back((GridAndStride){g0, g1, stride});\n            }\n        }\n    }\n}\n\nstatic inline float intersection_area(const Object& a, const Object& b)\n{\n    cv::Rect_<float> inter = a.rect & b.rect;\n    return inter.area();\n}\n\nstatic void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)\n{\n    int i = left;\n    int j = right;\n    float p = faceobjects[(left + right) / 2].prob;\n\n    while (i <= j)\n    {\n        while (faceobjects[i].prob > p)\n            i++;\n\n        while (faceobjects[j].prob < p)\n            j--;\n\n        if (i <= j)\n        {\n            // swap\n            std::swap(faceobjects[i], faceobjects[j]);\n\n            i++;\n            j--;\n        }\n    }\n\n    #pragma omp parallel sections\n    {\n        #pragma omp section\n        {\n            if (left < j) qsort_descent_inplace(faceobjects, left, j);\n        }\n        #pragma omp section\n        {\n            if (i < right) qsort_descent_inplace(faceobjects, i, right);\n        }\n    }\n}\n\nstatic void qsort_descent_inplace(std::vector<Object>& objects)\n{\n    if (objects.empty())\n        return;\n\n    qsort_descent_inplace(objects, 0, objects.size() - 1);\n}\n\nstatic void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)\n{\n    picked.clear();\n\n    const int n = faceobjects.size();\n\n    std::vector<float> areas(n);\n    for (int i = 0; i < n; i++)\n    {\n        areas[i] = faceobjects[i].rect.area();\n    }\n\n    for (int i = 0; i < n; i++)\n    {\n        const Object& a = faceobjects[i];\n\n        int keep = 1;\n        for (int j = 0; j < (int)picked.size(); j++)\n        {\n            const Object& b = faceobjects[picked[j]];\n\n            // intersection over union\n            float inter_area = intersection_area(a, b);\n            float union_area = areas[i] + areas[picked[j]] - inter_area;\n            // float IoU = inter_area / union_area\n            if (inter_area / union_area > nms_threshold)\n                keep = 0;\n        }\n\n        if (keep)\n            picked.push_back(i);\n    }\n}\n\n\nstatic void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, float* feat_blob, float prob_threshold, std::vector<Object>& objects)\n{\n\n    const int num_anchors = grid_strides.size();\n\n    for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)\n    {\n        const int grid0 = grid_strides[anchor_idx].grid0;\n        const int grid1 = grid_strides[anchor_idx].grid1;\n        const int stride = grid_strides[anchor_idx].stride;\n\n        const int basic_pos = anchor_idx * (NUM_CLASSES + 5);\n\n        // yolox/models/yolo_head.py decode logic\n        float x_center = (feat_blob[basic_pos+0] + grid0) * stride;\n        float y_center = (feat_blob[basic_pos+1] + grid1) * stride;\n        float w = exp(feat_blob[basic_pos+2]) * stride;\n        float h = exp(feat_blob[basic_pos+3]) * stride;\n        float x0 = x_center - w * 0.5f;\n        float y0 = y_center - h * 0.5f;\n\n        float box_objectness = feat_blob[basic_pos+4];\n        for (int class_idx = 0; class_idx < NUM_CLASSES; class_idx++)\n        {\n            float box_cls_score = feat_blob[basic_pos + 5 + class_idx];\n            float box_prob = box_objectness * box_cls_score;\n            if (box_prob > prob_threshold)\n            {\n                Object obj;\n                obj.rect.x = x0;\n                obj.rect.y = y0;\n                obj.rect.width = w;\n                obj.rect.height = h;\n                obj.label = class_idx;\n                obj.prob = box_prob;\n\n                objects.push_back(obj);\n            }\n\n        } // class loop\n\n    } // point anchor loop\n}\n\nfloat* blobFromImage(cv::Mat& img){\n    float* blob = new float[img.total()*3];\n    int channels = 3;\n    int img_h = img.rows;\n    int img_w = img.cols;\n    for (size_t c = 0; c < channels; c++) \n    {\n        for (size_t  h = 0; h < img_h; h++) \n        {\n            for (size_t w = 0; w < img_w; w++) \n            {\n                blob[c * img_w * img_h + h * img_w + w] =\n                    (float)img.at<cv::Vec3b>(h, w)[c];\n            }\n        }\n    }\n    return blob;\n}\n\n\nstatic void decode_outputs(float* prob, std::vector<Object>& objects, float scale, const int img_w, const int img_h) {\n        std::vector<Object> proposals;\n        std::vector<int> strides = {8, 16, 32};\n        std::vector<GridAndStride> grid_strides;\n        generate_grids_and_stride(strides, grid_strides);\n        generate_yolox_proposals(grid_strides, prob,  BBOX_CONF_THRESH, proposals);\n        std::cout << \"num of boxes before nms: \" << proposals.size() << std::endl;\n\n        qsort_descent_inplace(proposals);\n\n        std::vector<int> picked;\n        nms_sorted_bboxes(proposals, picked, NMS_THRESH);\n\n\n        int count = picked.size();\n\n        std::cout << \"num of boxes: \" << count << std::endl;\n\n        objects.resize(count);\n        for (int i = 0; i < count; i++)\n        {\n            objects[i] = proposals[picked[i]];\n\n            // adjust offset to original unpadded\n            float x0 = (objects[i].rect.x) / scale;\n            float y0 = (objects[i].rect.y) / scale;\n            float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;\n            float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;\n\n            // clip\n            x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);\n            y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);\n            x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);\n            y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);\n\n            objects[i].rect.x = x0;\n            objects[i].rect.y = y0;\n            objects[i].rect.width = x1 - x0;\n            objects[i].rect.height = y1 - y0;\n        }\n}\n\nconst float color_list[80][3] =\n{\n    {0.000, 0.447, 0.741},\n    {0.850, 0.325, 0.098},\n    {0.929, 0.694, 0.125},\n    {0.494, 0.184, 0.556},\n    {0.466, 0.674, 0.188},\n    {0.301, 0.745, 0.933},\n    {0.635, 0.078, 0.184},\n    {0.300, 0.300, 0.300},\n    {0.600, 0.600, 0.600},\n    {1.000, 0.000, 0.000},\n    {1.000, 0.500, 0.000},\n    {0.749, 0.749, 0.000},\n    {0.000, 1.000, 0.000},\n    {0.000, 0.000, 1.000},\n    {0.667, 0.000, 1.000},\n    {0.333, 0.333, 0.000},\n    {0.333, 0.667, 0.000},\n    {0.333, 1.000, 0.000},\n    {0.667, 0.333, 0.000},\n    {0.667, 0.667, 0.000},\n    {0.667, 1.000, 0.000},\n    {1.000, 0.333, 0.000},\n    {1.000, 0.667, 0.000},\n    {1.000, 1.000, 0.000},\n    {0.000, 0.333, 0.500},\n    {0.000, 0.667, 0.500},\n    {0.000, 1.000, 0.500},\n    {0.333, 0.000, 0.500},\n    {0.333, 0.333, 0.500},\n    {0.333, 0.667, 0.500},\n    {0.333, 1.000, 0.500},\n    {0.667, 0.000, 0.500},\n    {0.667, 0.333, 0.500},\n    {0.667, 0.667, 0.500},\n    {0.667, 1.000, 0.500},\n    {1.000, 0.000, 0.500},\n    {1.000, 0.333, 0.500},\n    {1.000, 0.667, 0.500},\n    {1.000, 1.000, 0.500},\n    {0.000, 0.333, 1.000},\n    {0.000, 0.667, 1.000},\n    {0.000, 1.000, 1.000},\n    {0.333, 0.000, 1.000},\n    {0.333, 0.333, 1.000},\n    {0.333, 0.667, 1.000},\n    {0.333, 1.000, 1.000},\n    {0.667, 0.000, 1.000},\n    {0.667, 0.333, 1.000},\n    {0.667, 0.667, 1.000},\n    {0.667, 1.000, 1.000},\n    {1.000, 0.000, 1.000},\n    {1.000, 0.333, 1.000},\n    {1.000, 0.667, 1.000},\n    {0.333, 0.000, 0.000},\n    {0.500, 0.000, 0.000},\n    {0.667, 0.000, 0.000},\n    {0.833, 0.000, 0.000},\n    {1.000, 0.000, 0.000},\n    {0.000, 0.167, 0.000},\n    {0.000, 0.333, 0.000},\n    {0.000, 0.500, 0.000},\n    {0.000, 0.667, 0.000},\n    {0.000, 0.833, 0.000},\n    {0.000, 1.000, 0.000},\n    {0.000, 0.000, 0.167},\n    {0.000, 0.000, 0.333},\n    {0.000, 0.000, 0.500},\n    {0.000, 0.000, 0.667},\n    {0.000, 0.000, 0.833},\n    {0.000, 0.000, 1.000},\n    {0.000, 0.000, 0.000},\n    {0.143, 0.143, 0.143},\n    {0.286, 0.286, 0.286},\n    {0.429, 0.429, 0.429},\n    {0.571, 0.571, 0.571},\n    {0.714, 0.714, 0.714},\n    {0.857, 0.857, 0.857},\n    {0.000, 0.447, 0.741},\n    {0.314, 0.717, 0.741},\n    {0.50, 0.5, 0}\n};\n\nstatic void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects, std::string f)\n{\n    static const char* class_names[] = {\n        \"person\", \"bicycle\", \"car\", \"motorcycle\", \"airplane\", \"bus\", \"train\", \"truck\", \"boat\", \"traffic light\",\n        \"fire hydrant\", \"stop sign\", \"parking meter\", \"bench\", \"bird\", \"cat\", \"dog\", \"horse\", \"sheep\", \"cow\",\n        \"elephant\", \"bear\", \"zebra\", \"giraffe\", \"backpack\", \"umbrella\", \"handbag\", \"tie\", \"suitcase\", \"frisbee\",\n        \"skis\", \"snowboard\", \"sports ball\", \"kite\", \"baseball bat\", \"baseball glove\", \"skateboard\", \"surfboard\",\n        \"tennis racket\", \"bottle\", \"wine glass\", \"cup\", \"fork\", \"knife\", \"spoon\", \"bowl\", \"banana\", \"apple\",\n        \"sandwich\", \"orange\", \"broccoli\", \"carrot\", \"hot dog\", \"pizza\", \"donut\", \"cake\", \"chair\", \"couch\",\n        \"potted plant\", \"bed\", \"dining table\", \"toilet\", \"tv\", \"laptop\", \"mouse\", \"remote\", \"keyboard\", \"cell phone\",\n        \"microwave\", \"oven\", \"toaster\", \"sink\", \"refrigerator\", \"book\", \"clock\", \"vase\", \"scissors\", \"teddy bear\",\n        \"hair drier\", \"toothbrush\"\n    };\n\n    cv::Mat image = bgr.clone();\n\n    for (size_t i = 0; i < objects.size(); i++)\n    {\n        const Object& obj = objects[i];\n\n        fprintf(stderr, \"%d = %.5f at %.2f %.2f %.2f x %.2f\\n\", obj.label, obj.prob,\n                obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);\n\n        cv::Scalar color = cv::Scalar(color_list[obj.label][0], color_list[obj.label][1], color_list[obj.label][2]);\n        float c_mean = cv::mean(color)[0];\n        cv::Scalar txt_color;\n        if (c_mean > 0.5){\n            txt_color = cv::Scalar(0, 0, 0);\n        }else{\n            txt_color = cv::Scalar(255, 255, 255);\n        }\n\n        cv::rectangle(image, obj.rect, color * 255, 2);\n\n        char text[256];\n        sprintf(text, \"%s %.1f%%\", class_names[obj.label], obj.prob * 100);\n\n        int baseLine = 0;\n        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);\n\n        cv::Scalar txt_bk_color = color * 0.7 * 255;\n\n        int x = obj.rect.x;\n        int y = obj.rect.y + 1;\n        //int y = obj.rect.y - label_size.height - baseLine;\n        if (y > image.rows)\n            y = image.rows;\n        //if (x + label_size.width > image.cols)\n            //x = image.cols - label_size.width;\n\n        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),\n                      txt_bk_color, -1);\n\n        cv::putText(image, text, cv::Point(x, y + label_size.height),\n                    cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);\n    }\n\n    cv::imwrite(\"det_res.jpg\", image);\n    fprintf(stderr, \"save vis file\\n\");\n    /* cv::imshow(\"image\", image); */\n    /* cv::waitKey(0); */\n}\n\n\nvoid doInference(IExecutionContext& context, float* input, float* output, const int output_size, cv::Size input_shape) {\n    const ICudaEngine& engine = context.getEngine();\n\n    // Pointers to input and output device buffers to pass to engine.\n    // Engine requires exactly IEngine::getNbBindings() number of buffers.\n    assert(engine.getNbBindings() == 2);\n    void* buffers[2];\n\n    // In order to bind the buffers, we need to know the names of the input and output tensors.\n    // Note that indices are guaranteed to be less than IEngine::getNbBindings()\n    const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);\n\n    assert(engine.getBindingDataType(inputIndex) == nvinfer1::DataType::kFLOAT);\n    const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);\n    assert(engine.getBindingDataType(outputIndex) == nvinfer1::DataType::kFLOAT);\n    int mBatchSize = engine.getMaxBatchSize();\n\n    // Create GPU buffers on device\n    CHECK(cudaMalloc(&buffers[inputIndex], 3 * input_shape.height * input_shape.width * sizeof(float)));\n    CHECK(cudaMalloc(&buffers[outputIndex], output_size*sizeof(float)));\n\n    // Create stream\n    cudaStream_t stream;\n    CHECK(cudaStreamCreate(&stream));\n\n    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host\n    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, 3 * input_shape.height * input_shape.width * sizeof(float), cudaMemcpyHostToDevice, stream));\n    context.enqueue(1, buffers, stream, nullptr);\n    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], output_size * sizeof(float), cudaMemcpyDeviceToHost, stream));\n    cudaStreamSynchronize(stream);\n\n    // Release stream and buffers\n    cudaStreamDestroy(stream);\n    CHECK(cudaFree(buffers[inputIndex]));\n    CHECK(cudaFree(buffers[outputIndex]));\n}\n\nint main(int argc, char** argv) {\n    cudaSetDevice(DEVICE);\n    // create a model using the API directly and serialize it to a stream\n    char *trtModelStream{nullptr};\n    size_t size{0};\n\n    if (argc == 4 && std::string(argv[2]) == \"-i\") {\n        const std::string engine_file_path {argv[1]};\n        std::ifstream file(engine_file_path, std::ios::binary);\n        if (file.good()) {\n            file.seekg(0, file.end);\n            size = file.tellg();\n            file.seekg(0, file.beg);\n            trtModelStream = new char[size];\n            assert(trtModelStream);\n            file.read(trtModelStream, size);\n            file.close();\n        }\n    } else {\n        std::cerr << \"arguments not right!\" << std::endl;\n        std::cerr << \"run 'python3 yolox/deploy/trt.py -n yolox-{tiny, s, m, l, x}' to serialize model first!\" << std::endl;\n        std::cerr << \"Then use the following command:\" << std::endl;\n        std::cerr << \"./yolox ../model_trt.engine -i ../../../assets/dog.jpg  // deserialize file and run inference\" << std::endl;\n        return -1;\n    }\n    const std::string input_image_path {argv[3]};\n\n    //std::vector<std::string> file_names;\n    //if (read_files_in_dir(argv[2], file_names) < 0) {\n        //std::cout << \"read_files_in_dir failed.\" << std::endl;\n        //return -1;\n    //}\n\n    IRuntime* runtime = createInferRuntime(gLogger);\n    assert(runtime != nullptr);\n    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);\n    assert(engine != nullptr); \n    IExecutionContext* context = engine->createExecutionContext();\n    assert(context != nullptr);\n    delete[] trtModelStream;\n    auto out_dims = engine->getBindingDimensions(1);\n    auto output_size = 1;\n    for(int j=0;j<out_dims.nbDims;j++) {\n        output_size *= out_dims.d[j];\n    }\n    static float* prob = new float[output_size];\n\n    cv::Mat img = cv::imread(input_image_path);\n    int img_w = img.cols;\n    int img_h = img.rows;\n    cv::Mat pr_img = static_resize(img);\n    std::cout << \"blob image\" << std::endl;\n\n    float* blob;\n    blob = blobFromImage(pr_img);\n    float scale = std::min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));\n\n    // run inference\n    auto start = std::chrono::system_clock::now();\n    doInference(*context, blob, prob, output_size, pr_img.size());\n    auto end = std::chrono::system_clock::now();\n    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << \"ms\" << std::endl;\n\n    std::vector<Object> objects;\n    decode_outputs(prob, objects, scale, img_w, img_h);\n    draw_objects(img, objects, input_image_path);\n    // delete the pointer to the float\n    delete blob;\n    // destroy the engine\n    context->destroy();\n    engine->destroy();\n    runtime->destroy();\n    return 0;\n}\n"
  },
  {
    "path": "demo/TensorRT/python/README.md",
    "content": "# YOLOX-TensorRT in Python\n\nThis tutorial includes a Python demo for TensorRT.\n\n## Install TensorRT Toolkit\n\nPlease follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) and [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt) to install TensorRT and torch2trt.\n\n## Convert model\n\nYOLOX models can be easily conveted to TensorRT models using torch2trt\n\n   If you want to convert our model, use the flag -n to specify a model name:\n   ```shell\n   python tools/trt.py -n <YOLOX_MODEL_NAME> -c <YOLOX_CHECKPOINT>\n   ```\n   For example:\n   ```shell\n   python tools/trt.py -n yolox-s -c your_ckpt.pth\n   ```\n   <YOLOX_MODEL_NAME> can be: yolox-nano, yolox-tiny. yolox-s, yolox-m, yolox-l, yolox-x.\n\n   If you want to convert your customized model, use the flag -f to specify you exp file:\n   ```shell\n   python tools/trt.py -f <YOLOX_EXP_FILE> -c <YOLOX_CHECKPOINT>\n   ```\n   For example:\n   ```shell\n   python tools/trt.py -f /path/to/your/yolox/exps/yolox_s.py -c your_ckpt.pth\n   ```\n   *yolox_s.py* can be any exp file modified by you.\n\nThe converted model and the serialized engine file (for C++ demo) will be saved on your experiment output dir.  \n\n## Demo\n\nThe TensorRT python demo is merged on our pytorch demo file, so you can run the pytorch demo command with ```--trt```.\n\n```shell\npython tools/demo.py image -n yolox-s --trt --save_result\n```\nor\n```shell\npython tools/demo.py image -f exps/default/yolox_s.py --trt --save_result\n```\n\n"
  },
  {
    "path": "demo/ncnn/README.md",
    "content": "# YOLOX-ncnn\n\nCompile files of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn).  \nYOLOX is included in ncnn now, you could also try building from ncnn, it's better.\n\n## Acknowledgement\n\n* [ncnn](https://github.com/Tencent/ncnn)\n"
  },
  {
    "path": "demo/ncnn/android/README.md",
    "content": "# YOLOX-Android-ncnn\n\nAndoird app of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn)\n\n\n## Tutorial\n\n### Step1\n\nDownload ncnn-android-vulkan.zip from [releases of ncnn](https://github.com/Tencent/ncnn/releases). This repo uses\n[20210525 release](https://github.com/Tencent/ncnn/releases/download/20210525/ncnn-20210525-android-vulkan.zip) for building.\n\n### Step2\n\nAfter downloading, please extract your zip file. Then, there are two ways to finish this step:\n* put your extracted directory into **app/src/main/jni**\n* change the **ncnn_DIR** path in **app/src/main/jni/CMakeLists.txt** to your extracted directory\n\n### Step3\nDownload example param and bin file from [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ESXBH_GSSmFMszWJ6YG2VkQB5cWDfqVWXgk0D996jH0rpQ?e=qzEqUh) or [github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_ncnn.tar.gz). Unzip the file to **app/src/main/assets**.\n\n### Step4\nOpen this project with Android Studio, build it and enjoy!\n\n## Reference\n\n* [ncnn-android-yolov5](https://github.com/nihui/ncnn-android-yolov5)\n"
  },
  {
    "path": "demo/ncnn/android/app/build.gradle",
    "content": "apply plugin: 'com.android.application'\n\nandroid {\n    compileSdkVersion 24\n    buildToolsVersion \"29.0.2\"\n\n    defaultConfig {\n        applicationId \"com.megvii.yoloXncnn\"\n        archivesBaseName = \"$applicationId\"\n\n        ndk {\n            moduleName \"ncnn\"\n            abiFilters \"armeabi-v7a\", \"arm64-v8a\"\n        }\n        minSdkVersion 24\n    }\n\n    externalNativeBuild {\n        cmake {\n            version \"3.10.2\"\n            path file('src/main/jni/CMakeLists.txt')\n        }\n    }\n}\n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/AndroidManifest.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<manifest xmlns:android=\"http://schemas.android.com/apk/res/android\"\n      package=\"com.megvii.yoloXncnn\"\n      android:versionCode=\"1\"\n      android:versionName=\"1.1\">\n    <application android:label=\"@string/app_name\" >\n        <activity android:name=\"MainActivity\"\n                  android:label=\"@string/app_name\">\n            <intent-filter>\n                <action android:name=\"android.intent.action.MAIN\" />\n                <category android:name=\"android.intent.category.LAUNCHER\" />\n            </intent-filter>\n        </activity>\n    </application>\n</manifest> \n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/assets/yolox.param",
    "content": "7767517\n220 250\nInput                    images                   0 1 images\nYoloV5Focus              focus                    1 1 images 503\nConvolution              Conv_41                  1 1 503 877 0=32 1=3 4=1 5=1 6=3456\nSwish                    Mul_43                   1 1 877 507\nConvolution              Conv_44                  1 1 507 880 0=64 1=3 3=2 4=1 5=1 6=18432\nSwish                    Mul_46                   1 1 880 511\nSplit                    splitncnn_0              1 2 511 511_splitncnn_0 511_splitncnn_1\nConvolution              Conv_47                  1 1 511_splitncnn_1 883 0=32 1=1 5=1 6=2048\nSwish                    Mul_49                   1 1 883 515\nSplit                    splitncnn_1              1 2 515 515_splitncnn_0 515_splitncnn_1\nConvolution              Conv_50                  1 1 511_splitncnn_0 886 0=32 1=1 5=1 6=2048\nSwish                    Mul_52                   1 1 886 519\nConvolution              Conv_53                  1 1 515_splitncnn_1 889 0=32 1=1 5=1 6=1024\nSwish                    Mul_55                   1 1 889 523\nConvolution              Conv_56                  1 1 523 892 0=32 1=3 4=1 5=1 6=9216\nSwish                    Mul_58                   1 1 892 527\nBinaryOp                 Add_59                   2 1 527 515_splitncnn_0 528\nConcat                   Concat_60                2 1 528 519 529\nConvolution              Conv_61                  1 1 529 895 0=64 1=1 5=1 6=4096\nSwish                    Mul_63                   1 1 895 533\nConvolution              Conv_64                  1 1 533 898 0=128 1=3 3=2 4=1 5=1 6=73728\nSwish                    Mul_66                   1 1 898 537\nSplit                    splitncnn_2              1 2 537 537_splitncnn_0 537_splitncnn_1\nConvolution              Conv_67                  1 1 537_splitncnn_1 901 0=64 1=1 5=1 6=8192\nSwish                    Mul_69                   1 1 901 541\nSplit                    splitncnn_3              1 2 541 541_splitncnn_0 541_splitncnn_1\nConvolution              Conv_70                  1 1 537_splitncnn_0 904 0=64 1=1 5=1 6=8192\nSwish                    Mul_72                   1 1 904 545\nConvolution              Conv_73                  1 1 541_splitncnn_1 907 0=64 1=1 5=1 6=4096\nSwish                    Mul_75                   1 1 907 549\nConvolution              Conv_76                  1 1 549 910 0=64 1=3 4=1 5=1 6=36864\nSwish                    Mul_78                   1 1 910 553\nBinaryOp                 Add_79                   2 1 553 541_splitncnn_0 554\nSplit                    splitncnn_4              1 2 554 554_splitncnn_0 554_splitncnn_1\nConvolution              Conv_80                  1 1 554_splitncnn_1 913 0=64 1=1 5=1 6=4096\nSwish                    Mul_82                   1 1 913 558\nConvolution              Conv_83                  1 1 558 916 0=64 1=3 4=1 5=1 6=36864\nSwish                    Mul_85                   1 1 916 562\nBinaryOp                 Add_86                   2 1 562 554_splitncnn_0 563\nSplit                    splitncnn_5              1 2 563 563_splitncnn_0 563_splitncnn_1\nConvolution              Conv_87                  1 1 563_splitncnn_1 919 0=64 1=1 5=1 6=4096\nSwish                    Mul_89                   1 1 919 567\nConvolution              Conv_90                  1 1 567 922 0=64 1=3 4=1 5=1 6=36864\nSwish                    Mul_92                   1 1 922 571\nBinaryOp                 Add_93                   2 1 571 563_splitncnn_0 572\nConcat                   Concat_94                2 1 572 545 573\nConvolution              Conv_95                  1 1 573 925 0=128 1=1 5=1 6=16384\nSwish                    Mul_97                   1 1 925 577\nSplit                    splitncnn_6              1 2 577 577_splitncnn_0 577_splitncnn_1\nConvolution              Conv_98                  1 1 577_splitncnn_1 928 0=256 1=3 3=2 4=1 5=1 6=294912\nSwish                    Mul_100                  1 1 928 581\nSplit                    splitncnn_7              1 2 581 581_splitncnn_0 581_splitncnn_1\nConvolution              Conv_101                 1 1 581_splitncnn_1 931 0=128 1=1 5=1 6=32768\nSwish                    Mul_103                  1 1 931 585\nSplit                    splitncnn_8              1 2 585 585_splitncnn_0 585_splitncnn_1\nConvolution              Conv_104                 1 1 581_splitncnn_0 934 0=128 1=1 5=1 6=32768\nSwish                    Mul_106                  1 1 934 589\nConvolution              Conv_107                 1 1 585_splitncnn_1 937 0=128 1=1 5=1 6=16384\nSwish                    Mul_109                  1 1 937 593\nConvolution              Conv_110                 1 1 593 940 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_112                  1 1 940 597\nBinaryOp                 Add_113                  2 1 597 585_splitncnn_0 598\nSplit                    splitncnn_9              1 2 598 598_splitncnn_0 598_splitncnn_1\nConvolution              Conv_114                 1 1 598_splitncnn_1 943 0=128 1=1 5=1 6=16384\nSwish                    Mul_116                  1 1 943 602\nConvolution              Conv_117                 1 1 602 946 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_119                  1 1 946 606\nBinaryOp                 Add_120                  2 1 606 598_splitncnn_0 607\nSplit                    splitncnn_10             1 2 607 607_splitncnn_0 607_splitncnn_1\nConvolution              Conv_121                 1 1 607_splitncnn_1 949 0=128 1=1 5=1 6=16384\nSwish                    Mul_123                  1 1 949 611\nConvolution              Conv_124                 1 1 611 952 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_126                  1 1 952 615\nBinaryOp                 Add_127                  2 1 615 607_splitncnn_0 616\nConcat                   Concat_128               2 1 616 589 617\nConvolution              Conv_129                 1 1 617 955 0=256 1=1 5=1 6=65536\nSwish                    Mul_131                  1 1 955 621\nSplit                    splitncnn_11             1 2 621 621_splitncnn_0 621_splitncnn_1\nConvolution              Conv_132                 1 1 621_splitncnn_1 958 0=512 1=3 3=2 4=1 5=1 6=1179648\nSwish                    Mul_134                  1 1 958 625\nConvolution              Conv_135                 1 1 625 961 0=256 1=1 5=1 6=131072\nSwish                    Mul_137                  1 1 961 629\nSplit                    splitncnn_12             1 4 629 629_splitncnn_0 629_splitncnn_1 629_splitncnn_2 629_splitncnn_3\nPooling                  MaxPool_138              1 1 629_splitncnn_3 630 1=5 3=2 5=1\nPooling                  MaxPool_139              1 1 629_splitncnn_2 631 1=9 3=4 5=1\nPooling                  MaxPool_140              1 1 629_splitncnn_1 632 1=13 3=6 5=1\nConcat                   Concat_141               4 1 629_splitncnn_0 630 631 632 633\nConvolution              Conv_142                 1 1 633 964 0=512 1=1 5=1 6=524288\nSwish                    Mul_144                  1 1 964 637\nSplit                    splitncnn_13             1 2 637 637_splitncnn_0 637_splitncnn_1\nConvolution              Conv_145                 1 1 637_splitncnn_1 967 0=256 1=1 5=1 6=131072\nSwish                    Mul_147                  1 1 967 641\nConvolution              Conv_148                 1 1 637_splitncnn_0 970 0=256 1=1 5=1 6=131072\nSwish                    Mul_150                  1 1 970 645\nConvolution              Conv_151                 1 1 641 973 0=256 1=1 5=1 6=65536\nSwish                    Mul_153                  1 1 973 649\nConvolution              Conv_154                 1 1 649 976 0=256 1=3 4=1 5=1 6=589824\nSwish                    Mul_156                  1 1 976 653\nConcat                   Concat_157               2 1 653 645 654\nConvolution              Conv_158                 1 1 654 979 0=512 1=1 5=1 6=262144\nSwish                    Mul_160                  1 1 979 658\nConvolution              Conv_161                 1 1 658 982 0=256 1=1 5=1 6=131072\nSwish                    Mul_163                  1 1 982 662\nSplit                    splitncnn_14             1 2 662 662_splitncnn_0 662_splitncnn_1\nInterp                   Resize_165               1 1 662_splitncnn_1 667 0=1 1=2.000000e+00 2=2.000000e+00\nConcat                   Concat_166               2 1 667 621_splitncnn_0 668\nSplit                    splitncnn_15             1 2 668 668_splitncnn_0 668_splitncnn_1\nConvolution              Conv_167                 1 1 668_splitncnn_1 985 0=128 1=1 5=1 6=65536\nSwish                    Mul_169                  1 1 985 672\nConvolution              Conv_170                 1 1 668_splitncnn_0 988 0=128 1=1 5=1 6=65536\nSwish                    Mul_172                  1 1 988 676\nConvolution              Conv_173                 1 1 672 991 0=128 1=1 5=1 6=16384\nSwish                    Mul_175                  1 1 991 680\nConvolution              Conv_176                 1 1 680 994 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_178                  1 1 994 684\nConcat                   Concat_179               2 1 684 676 685\nConvolution              Conv_180                 1 1 685 997 0=256 1=1 5=1 6=65536\nSwish                    Mul_182                  1 1 997 689\nConvolution              Conv_183                 1 1 689 1000 0=128 1=1 5=1 6=32768\nSwish                    Mul_185                  1 1 1000 693\nSplit                    splitncnn_16             1 2 693 693_splitncnn_0 693_splitncnn_1\nInterp                   Resize_187               1 1 693_splitncnn_1 698 0=1 1=2.000000e+00 2=2.000000e+00\nConcat                   Concat_188               2 1 698 577_splitncnn_0 699\nSplit                    splitncnn_17             1 2 699 699_splitncnn_0 699_splitncnn_1\nConvolution              Conv_189                 1 1 699_splitncnn_1 1003 0=64 1=1 5=1 6=16384\nSwish                    Mul_191                  1 1 1003 703\nConvolution              Conv_192                 1 1 699_splitncnn_0 1006 0=64 1=1 5=1 6=16384\nSwish                    Mul_194                  1 1 1006 707\nConvolution              Conv_195                 1 1 703 1009 0=64 1=1 5=1 6=4096\nSwish                    Mul_197                  1 1 1009 711\nConvolution              Conv_198                 1 1 711 1012 0=64 1=3 4=1 5=1 6=36864\nSwish                    Mul_200                  1 1 1012 715\nConcat                   Concat_201               2 1 715 707 716\nConvolution              Conv_202                 1 1 716 1015 0=128 1=1 5=1 6=16384\nSwish                    Mul_204                  1 1 1015 720\nSplit                    splitncnn_18             1 2 720 720_splitncnn_0 720_splitncnn_1\nConvolution              Conv_205                 1 1 720_splitncnn_1 1018 0=128 1=3 3=2 4=1 5=1 6=147456\nSwish                    Mul_207                  1 1 1018 724\nConcat                   Concat_208               2 1 724 693_splitncnn_0 725\nSplit                    splitncnn_19             1 2 725 725_splitncnn_0 725_splitncnn_1\nConvolution              Conv_209                 1 1 725_splitncnn_1 1021 0=128 1=1 5=1 6=32768\nSwish                    Mul_211                  1 1 1021 729\nConvolution              Conv_212                 1 1 725_splitncnn_0 1024 0=128 1=1 5=1 6=32768\nSwish                    Mul_214                  1 1 1024 733\nConvolution              Conv_215                 1 1 729 1027 0=128 1=1 5=1 6=16384\nSwish                    Mul_217                  1 1 1027 737\nConvolution              Conv_218                 1 1 737 1030 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_220                  1 1 1030 741\nConcat                   Concat_221               2 1 741 733 742\nConvolution              Conv_222                 1 1 742 1033 0=256 1=1 5=1 6=65536\nSwish                    Mul_224                  1 1 1033 746\nSplit                    splitncnn_20             1 2 746 746_splitncnn_0 746_splitncnn_1\nConvolution              Conv_225                 1 1 746_splitncnn_1 1036 0=256 1=3 3=2 4=1 5=1 6=589824\nSwish                    Mul_227                  1 1 1036 750\nConcat                   Concat_228               2 1 750 662_splitncnn_0 751\nSplit                    splitncnn_21             1 2 751 751_splitncnn_0 751_splitncnn_1\nConvolution              Conv_229                 1 1 751_splitncnn_1 1039 0=256 1=1 5=1 6=131072\nSwish                    Mul_231                  1 1 1039 755\nConvolution              Conv_232                 1 1 751_splitncnn_0 1042 0=256 1=1 5=1 6=131072\nSwish                    Mul_234                  1 1 1042 759\nConvolution              Conv_235                 1 1 755 1045 0=256 1=1 5=1 6=65536\nSwish                    Mul_237                  1 1 1045 763\nConvolution              Conv_238                 1 1 763 1048 0=256 1=3 4=1 5=1 6=589824\nSwish                    Mul_240                  1 1 1048 767\nConcat                   Concat_241               2 1 767 759 768\nConvolution              Conv_242                 1 1 768 1051 0=512 1=1 5=1 6=262144\nSwish                    Mul_244                  1 1 1051 772\nConvolution              Conv_245                 1 1 720_splitncnn_0 1054 0=128 1=1 5=1 6=16384\nSwish                    Mul_247                  1 1 1054 776\nSplit                    splitncnn_22             1 2 776 776_splitncnn_0 776_splitncnn_1\nConvolution              Conv_248                 1 1 776_splitncnn_1 1057 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_250                  1 1 1057 780\nConvolution              Conv_251                 1 1 780 1060 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_253                  1 1 1060 784\nConvolution              Conv_254                 1 1 784 797 0=80 1=1 5=1 6=10240 9=4\nConvolution              Conv_255                 1 1 776_splitncnn_0 1063 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_257                  1 1 1063 789\nConvolution              Conv_258                 1 1 789 1066 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_260                  1 1 1066 793\nSplit                    splitncnn_23             1 2 793 793_splitncnn_0 793_splitncnn_1\nConvolution              Conv_261                 1 1 793_splitncnn_1 794 0=4 1=1 5=1 6=512\nConvolution              Conv_262                 1 1 793_splitncnn_0 796 0=1 1=1 5=1 6=128 9=4\nConcat                   Concat_265               3 1 794 796 797 798\nConvolution              Conv_266                 1 1 746_splitncnn_0 1069 0=128 1=1 5=1 6=32768\nSwish                    Mul_268                  1 1 1069 802\nSplit                    splitncnn_24             1 2 802 802_splitncnn_0 802_splitncnn_1\nConvolution              Conv_269                 1 1 802_splitncnn_1 1072 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_271                  1 1 1072 806\nConvolution              Conv_272                 1 1 806 1075 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_274                  1 1 1075 810\nConvolution              Conv_275                 1 1 810 823 0=80 1=1 5=1 6=10240 9=4\nConvolution              Conv_276                 1 1 802_splitncnn_0 1078 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_278                  1 1 1078 815\nConvolution              Conv_279                 1 1 815 1081 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_281                  1 1 1081 819\nSplit                    splitncnn_25             1 2 819 819_splitncnn_0 819_splitncnn_1\nConvolution              Conv_282                 1 1 819_splitncnn_1 820 0=4 1=1 5=1 6=512\nConvolution              Conv_283                 1 1 819_splitncnn_0 822 0=1 1=1 5=1 6=128 9=4\nConcat                   Concat_286               3 1 820 822 823 824\nConvolution              Conv_287                 1 1 772 1084 0=128 1=1 5=1 6=65536\nSwish                    Mul_289                  1 1 1084 828\nSplit                    splitncnn_26             1 2 828 828_splitncnn_0 828_splitncnn_1\nConvolution              Conv_290                 1 1 828_splitncnn_1 1087 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_292                  1 1 1087 832\nConvolution              Conv_293                 1 1 832 1090 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_295                  1 1 1090 836\nConvolution              Conv_296                 1 1 836 849 0=80 1=1 5=1 6=10240 9=4\nConvolution              Conv_297                 1 1 828_splitncnn_0 1093 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_299                  1 1 1093 841\nConvolution              Conv_300                 1 1 841 1096 0=128 1=3 4=1 5=1 6=147456\nSwish                    Mul_302                  1 1 1096 845\nSplit                    splitncnn_27             1 2 845 845_splitncnn_0 845_splitncnn_1\nConvolution              Conv_303                 1 1 845_splitncnn_1 846 0=4 1=1 5=1 6=512\nConvolution              Conv_304                 1 1 845_splitncnn_0 848 0=1 1=1 5=1 6=128 9=4\nConcat                   Concat_307               3 1 846 848 849 850\nReshape                  Reshape_315              1 1 798 858 0=-1 1=85\nReshape                  Reshape_323              1 1 824 866 0=-1 1=85\nReshape                  Reshape_331              1 1 850 874 0=-1 1=85\nConcat                   Concat_332               3 1 858 866 874 875 0=1\nPermute                  Transpose_333            1 1 875 output 0=1\n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/java/com/megvii/yoloXncnn/MainActivity.java",
    "content": "// Some code in this file is based on:\n// https://github.com/nihui/ncnn-android-yolov5/blob/master/app/src/main/java/com/tencent/yolov5ncnn/MainActivity.java\n// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.\n// Copyright (C) Megvii, Inc. and its affiliates. All rights reserved.\n\npackage com.megvii.yoloXncnn;\n\nimport android.app.Activity;\nimport android.content.Intent;\nimport android.graphics.Bitmap;\nimport android.graphics.BitmapFactory;\nimport android.graphics.Canvas;\nimport android.graphics.Color;\nimport android.graphics.Paint;\nimport android.media.ExifInterface;\nimport android.graphics.Matrix;\nimport android.net.Uri;\nimport android.os.Bundle;\nimport android.util.Log;\nimport android.view.View;\nimport android.widget.Button;\nimport android.widget.ImageView;\n\nimport java.io.FileNotFoundException;\nimport java.io.InputStream;\nimport java.io.IOException;\n\npublic class MainActivity extends Activity\n{\n    private static final int SELECT_IMAGE = 1;\n\n    private ImageView imageView;\n    private Bitmap bitmap = null;\n    private Bitmap yourSelectedImage = null;\n\n    private YOLOXncnn yoloX = new YOLOXncnn();\n\n    /** Called when the activity is first created. */\n    @Override\n    public void onCreate(Bundle savedInstanceState)\n    {\n        super.onCreate(savedInstanceState);\n        setContentView(R.layout.main);\n\n        boolean ret_init = yoloX.Init(getAssets());\n        if (!ret_init)\n        {\n            Log.e(\"MainActivity\", \"yoloXncnn Init failed\");\n        }\n\n        imageView = (ImageView) findViewById(R.id.imageView);\n\n        Button buttonImage = (Button) findViewById(R.id.buttonImage);\n        buttonImage.setOnClickListener(new View.OnClickListener() {\n            @Override\n            public void onClick(View arg0) {\n                Intent i = new Intent(Intent.ACTION_PICK);\n                i.setType(\"image/*\");\n                startActivityForResult(i, SELECT_IMAGE);\n            }\n        });\n\n        Button buttonDetect = (Button) findViewById(R.id.buttonDetect);\n        buttonDetect.setOnClickListener(new View.OnClickListener() {\n            @Override\n            public void onClick(View arg0) {\n                if (yourSelectedImage == null)\n                    return;\n                YOLOXncnn.Obj[] objects = yoloX.Detect(yourSelectedImage, false);\n\n                showObjects(objects);\n            }\n        });\n\n        Button buttonDetectGPU = (Button) findViewById(R.id.buttonDetectGPU);\n        buttonDetectGPU.setOnClickListener(new View.OnClickListener() {\n            @Override\n            public void onClick(View arg0) {\n                if (yourSelectedImage == null)\n                    return;\n\n                YOLOXncnn.Obj[] objects = yoloX.Detect(yourSelectedImage, true);\n\n                showObjects(objects);\n            }\n        });\n    }\n\n    private void showObjects(YOLOXncnn.Obj[] objects)\n    {\n        if (objects == null)\n        {\n            imageView.setImageBitmap(bitmap);\n            return;\n        }\n\n        // draw objects on bitmap\n        Bitmap rgba = bitmap.copy(Bitmap.Config.ARGB_8888, true);\n\n        final int[] colors = new int[] {\n            Color.rgb( 54,  67, 244),\n            Color.rgb( 99,  30, 233),\n            Color.rgb(176,  39, 156),\n            Color.rgb(183,  58, 103),\n            Color.rgb(181,  81,  63),\n            Color.rgb(243, 150,  33),\n            Color.rgb(244, 169,   3),\n            Color.rgb(212, 188,   0),\n            Color.rgb(136, 150,   0),\n            Color.rgb( 80, 175,  76),\n            Color.rgb( 74, 195, 139),\n            Color.rgb( 57, 220, 205),\n            Color.rgb( 59, 235, 255),\n            Color.rgb(  7, 193, 255),\n            Color.rgb(  0, 152, 255),\n            Color.rgb( 34,  87, 255),\n            Color.rgb( 72,  85, 121),\n            Color.rgb(158, 158, 158),\n            Color.rgb(139, 125,  96)\n        };\n\n        Canvas canvas = new Canvas(rgba);\n\n        Paint paint = new Paint();\n        paint.setStyle(Paint.Style.STROKE);\n        paint.setStrokeWidth(4);\n\n        Paint textbgpaint = new Paint();\n        textbgpaint.setColor(Color.WHITE);\n        textbgpaint.setStyle(Paint.Style.FILL);\n\n        Paint textpaint = new Paint();\n        textpaint.setColor(Color.BLACK);\n        textpaint.setTextSize(26);\n        textpaint.setTextAlign(Paint.Align.LEFT);\n\n        for (int i = 0; i < objects.length; i++)\n        {\n            paint.setColor(colors[i % 19]);\n\n            canvas.drawRect(objects[i].x, objects[i].y, objects[i].x + objects[i].w, objects[i].y + objects[i].h, paint);\n\n            // draw filled text inside image\n            {\n                String text = objects[i].label + \" = \" + String.format(\"%.1f\", objects[i].prob * 100) + \"%\";\n\n                float text_width = textpaint.measureText(text);\n                float text_height = - textpaint.ascent() + textpaint.descent();\n\n                float x = objects[i].x;\n                float y = objects[i].y - text_height;\n                if (y < 0)\n                    y = 0;\n                if (x + text_width > rgba.getWidth())\n                    x = rgba.getWidth() - text_width;\n\n                canvas.drawRect(x, y, x + text_width, y + text_height, textbgpaint);\n\n                canvas.drawText(text, x, y - textpaint.ascent(), textpaint);\n            }\n        }\n\n        imageView.setImageBitmap(rgba);\n    }\n\n    @Override\n    protected void onActivityResult(int requestCode, int resultCode, Intent data)\n    {\n        super.onActivityResult(requestCode, resultCode, data);\n\n        if (resultCode == RESULT_OK && null != data) {\n            Uri selectedImage = data.getData();\n\n            try\n            {\n                if (requestCode == SELECT_IMAGE) {\n                    bitmap = decodeUri(selectedImage);\n\n                    yourSelectedImage = bitmap.copy(Bitmap.Config.ARGB_8888, true);\n\n                    imageView.setImageBitmap(bitmap);\n                }\n            }\n            catch (FileNotFoundException e)\n            {\n                Log.e(\"MainActivity\", \"FileNotFoundException\");\n                return;\n            }\n        }\n    }\n\n    private Bitmap decodeUri(Uri selectedImage) throws FileNotFoundException\n    {\n        // Decode image size\n        BitmapFactory.Options o = new BitmapFactory.Options();\n        o.inJustDecodeBounds = true;\n        BitmapFactory.decodeStream(getContentResolver().openInputStream(selectedImage), null, o);\n\n        // The new size we want to scale to\n        final int REQUIRED_SIZE = 640;\n\n        // Find the correct scale value. It should be the power of 2.\n        int width_tmp = o.outWidth, height_tmp = o.outHeight;\n        int scale = 1;\n        while (true) {\n            if (width_tmp / 2 < REQUIRED_SIZE || height_tmp / 2 < REQUIRED_SIZE) {\n                break;\n            }\n            width_tmp /= 2;\n            height_tmp /= 2;\n            scale *= 2;\n        }\n\n        // Decode with inSampleSize\n        BitmapFactory.Options o2 = new BitmapFactory.Options();\n        o2.inSampleSize = scale;\n        Bitmap bitmap = BitmapFactory.decodeStream(getContentResolver().openInputStream(selectedImage), null, o2);\n\n        // Rotate according to EXIF\n        int rotate = 0;\n        try\n        {\n            ExifInterface exif = new ExifInterface(getContentResolver().openInputStream(selectedImage));\n            int orientation = exif.getAttributeInt(ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL);\n            switch (orientation) {\n                case ExifInterface.ORIENTATION_ROTATE_270:\n                    rotate = 270;\n                    break;\n                case ExifInterface.ORIENTATION_ROTATE_180:\n                    rotate = 180;\n                    break;\n                case ExifInterface.ORIENTATION_ROTATE_90:\n                    rotate = 90;\n                    break;\n            }\n        }\n        catch (IOException e)\n        {\n            Log.e(\"MainActivity\", \"ExifInterface IOException\");\n        }\n\n        Matrix matrix = new Matrix();\n        matrix.postRotate(rotate);\n        return Bitmap.createBitmap(bitmap, 0, 0, bitmap.getWidth(), bitmap.getHeight(), matrix, true);\n    }\n\n}\n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/java/com/megvii/yoloXncnn/YOLOXncnn.java",
    "content": "// Copyright (C) Megvii, Inc. and its affiliates. All rights reserved.\n\npackage com.megvii.yoloXncnn;\n\nimport android.content.res.AssetManager;\nimport android.graphics.Bitmap;\n\npublic class YOLOXncnn\n{\n    public native boolean Init(AssetManager mgr);\n\n    public class Obj\n    {\n        public float x;\n        public float y;\n        public float w;\n        public float h;\n        public String label;\n        public float prob;\n    }\n\n    public native Obj[] Detect(Bitmap bitmap, boolean use_gpu);\n\n    static {\n        System.loadLibrary(\"yoloXncnn\");\n    }\n}\n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/java/com/megvii/yoloXncnn/yoloXncnn.java",
    "content": "// Copyright (C) Megvii, Inc. and its affiliates. All rights reserved.\n\npackage com.megvii.yoloXncnn;\n\nimport android.content.res.AssetManager;\nimport android.graphics.Bitmap;\n\npublic class YOLOXncnn\n{\n    public native boolean Init(AssetManager mgr);\n\n    public class Obj\n    {\n        public float x;\n        public float y;\n        public float w;\n        public float h;\n        public String label;\n        public float prob;\n    }\n\n    public native Obj[] Detect(Bitmap bitmap, boolean use_gpu);\n\n    static {\n        System.loadLibrary(\"yoloXncnn\");\n    }\n}\n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/jni/CMakeLists.txt",
    "content": "project(yoloXncnn)\n\ncmake_minimum_required(VERSION 3.4.1)\n\nset(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20210525-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn)\nfind_package(ncnn REQUIRED)\n\nadd_library(yoloXncnn SHARED yoloXncnn_jni.cpp)\n\ntarget_link_libraries(yoloXncnn\n    ncnn\n\n    jnigraphics\n)\n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/jni/yoloXncnn_jni.cpp",
    "content": "// Some code in this file is based on:\n// https://github.com/nihui/ncnn-android-yolov5/blob/master/app/src/main/jni/yolov5ncnn_jni.cpp\n// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.\n// Copyright (C) Megvii, Inc. and its affiliates. All rights reserved.\n\n#include <android/asset_manager_jni.h>\n#include <android/bitmap.h>\n#include <android/log.h>\n\n#include <jni.h>\n\n#include <string>\n#include <vector>\n\n// ncnn\n#include \"layer.h\"\n#include \"net.h\"\n#include \"benchmark.h\"\n\nstatic ncnn::UnlockedPoolAllocator g_blob_pool_allocator;\nstatic ncnn::PoolAllocator g_workspace_pool_allocator;\n\nstatic ncnn::Net yoloX;\n\nclass YoloV5Focus : public ncnn::Layer\n{\npublic:\n    YoloV5Focus()\n    {\n        one_blob_only = true;\n    }\n\n    virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const\n    {\n        int w = bottom_blob.w;\n        int h = bottom_blob.h;\n        int channels = bottom_blob.c;\n\n        int outw = w / 2;\n        int outh = h / 2;\n        int outc = channels * 4;\n\n        top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);\n        if (top_blob.empty())\n            return -100;\n\n        #pragma omp parallel for num_threads(opt.num_threads)\n        for (int p = 0; p < outc; p++)\n        {\n            const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);\n            float* outptr = top_blob.channel(p);\n\n            for (int i = 0; i < outh; i++)\n            {\n                for (int j = 0; j < outw; j++)\n                {\n                    *outptr = *ptr;\n\n                    outptr += 1;\n                    ptr += 2;\n                }\n\n                ptr += w;\n            }\n        }\n\n        return 0;\n    }\n};\n\nDEFINE_LAYER_CREATOR(YoloV5Focus)\n\nstruct Object\n{\n    float x;\n    float y;\n    float w;\n    float h;\n    int label;\n    float prob;\n};\n\nstruct GridAndStride\n{\n    int grid0;\n    int grid1;\n    int stride;\n};\n\nstatic inline float intersection_area(const Object& a, const Object& b)\n{\n    if (a.x > b.x + b.w || a.x + a.w < b.x || a.y > b.y + b.h || a.y + a.h < b.y)\n    {\n        // no intersection\n        return 0.f;\n    }\n\n    float inter_width = std::min(a.x + a.w, b.x + b.w) - std::max(a.x, b.x);\n    float inter_height = std::min(a.y + a.h, b.y + b.h) - std::max(a.y, b.y);\n\n    return inter_width * inter_height;\n}\n\nstatic void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)\n{\n    int i = left;\n    int j = right;\n    float p = faceobjects[(left + right) / 2].prob;\n\n    while (i <= j)\n    {\n        while (faceobjects[i].prob > p)\n            i++;\n\n        while (faceobjects[j].prob < p)\n            j--;\n\n        if (i <= j)\n        {\n            // swap\n            std::swap(faceobjects[i], faceobjects[j]);\n\n            i++;\n            j--;\n        }\n    }\n\n    #pragma omp parallel sections\n    {\n        #pragma omp section\n        {\n            if (left < j) qsort_descent_inplace(faceobjects, left, j);\n        }\n        #pragma omp section\n        {\n            if (i < right) qsort_descent_inplace(faceobjects, i, right);\n        }\n    }\n}\n\nstatic void qsort_descent_inplace(std::vector<Object>& faceobjects)\n{\n    if (faceobjects.empty())\n        return;\n\n    qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);\n}\n\nstatic void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)\n{\n    picked.clear();\n\n    const int n = faceobjects.size();\n\n    std::vector<float> areas(n);\n    for (int i = 0; i < n; i++)\n    {\n        areas[i] = faceobjects[i].w * faceobjects[i].h;\n    }\n\n    for (int i = 0; i < n; i++)\n    {\n        const Object& a = faceobjects[i];\n\n        int keep = 1;\n        for (int j = 0; j < (int)picked.size(); j++)\n        {\n            const Object& b = faceobjects[picked[j]];\n\n            // intersection over union\n            float inter_area = intersection_area(a, b);\n            float union_area = areas[i] + areas[picked[j]] - inter_area;\n            // float IoU = inter_area / union_area\n            if (inter_area / union_area > nms_threshold)\n                keep = 0;\n        }\n\n        if (keep)\n            picked.push_back(i);\n    }\n}\n\nstatic void generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)\n{\n    for (auto stride : strides)\n    {\n        int num_grid = target_size / stride;\n        for (int g1 = 0; g1 < num_grid; g1++)\n        {\n            for (int g0 = 0; g0 < num_grid; g0++)\n            {\n                grid_strides.push_back((GridAndStride){g0, g1, stride});\n            }\n        }\n    }\n}\n\nstatic void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)\n{\n    const int num_grid = feat_blob.h;\n    fprintf(stderr, \"output height: %d, width: %d, channels: %d, dims:%d\\n\", feat_blob.h, feat_blob.w, feat_blob.c, feat_blob.dims);\n\n    const int num_class = feat_blob.w - 5;\n\n    const int num_anchors = grid_strides.size();\n\n    const float* feat_ptr = feat_blob.channel(0);\n    for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)\n    {\n        const int grid0 = grid_strides[anchor_idx].grid0;\n        const int grid1 = grid_strides[anchor_idx].grid1;\n        const int stride = grid_strides[anchor_idx].stride;\n\n        // yolox/models/yolo_head.py decode logic\n        //  outputs[..., :2] = (outputs[..., :2] + grids) * strides\n        //  outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides\n        float x_center = (feat_ptr[0] + grid0) * stride;\n        float y_center = (feat_ptr[1] + grid1) * stride;\n        float w = exp(feat_ptr[2]) * stride;\n        float h = exp(feat_ptr[3]) * stride;\n        float x0 = x_center - w * 0.5f;\n        float y0 = y_center - h * 0.5f;\n\n        float box_objectness = feat_ptr[4];\n        for (int class_idx = 0; class_idx < num_class; class_idx++)\n        {\n            float box_cls_score = feat_ptr[5 + class_idx];\n            float box_prob = box_objectness * box_cls_score;\n            if (box_prob > prob_threshold)\n            {\n                Object obj;\n                obj.x = x0;\n                obj.y = y0;\n                obj.w = w;\n                obj.h = h;\n                obj.label = class_idx;\n                obj.prob = box_prob;\n\n                objects.push_back(obj);\n            }\n\n        } // class loop\n        feat_ptr += feat_blob.w;\n\n    } // point anchor loop\n}\n\n\nextern \"C\" {\n\n// FIXME DeleteGlobalRef is missing for objCls\nstatic jclass objCls = NULL;\nstatic jmethodID constructortorId;\nstatic jfieldID xId;\nstatic jfieldID yId;\nstatic jfieldID wId;\nstatic jfieldID hId;\nstatic jfieldID labelId;\nstatic jfieldID probId;\n\nJNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved)\n{\n    __android_log_print(ANDROID_LOG_DEBUG, \"YOLOXncnn\", \"JNI_OnLoad\");\n\n    ncnn::create_gpu_instance();\n\n    return JNI_VERSION_1_4;\n}\n\nJNIEXPORT void JNI_OnUnload(JavaVM* vm, void* reserved)\n{\n    __android_log_print(ANDROID_LOG_DEBUG, \"YOLOXncnn\", \"JNI_OnUnload\");\n\n    ncnn::destroy_gpu_instance();\n}\n\n// public native boolean Init(AssetManager mgr);\nJNIEXPORT jboolean JNICALL Java_com_megvii_yoloXncnn_YOLOXncnn_Init(JNIEnv* env, jobject thiz, jobject assetManager)\n{\n    ncnn::Option opt;\n    opt.lightmode = true;\n    opt.num_threads = 4;\n    opt.blob_allocator = &g_blob_pool_allocator;\n    opt.workspace_allocator = &g_workspace_pool_allocator;\n    opt.use_packing_layout = true;\n\n    // use vulkan compute\n    if (ncnn::get_gpu_count() != 0)\n        opt.use_vulkan_compute = true;\n\n    AAssetManager* mgr = AAssetManager_fromJava(env, assetManager);\n\n    yoloX.opt = opt;\n\n    yoloX.register_custom_layer(\"YoloV5Focus\", YoloV5Focus_layer_creator);\n\n    // init param\n    {\n        int ret = yoloX.load_param(mgr, \"yolox.param\");\n        if (ret != 0)\n        {\n            __android_log_print(ANDROID_LOG_DEBUG, \"YOLOXncnn\", \"load_param failed\");\n            return JNI_FALSE;\n        }\n    }\n\n    // init bin\n    {\n        int ret = yoloX.load_model(mgr, \"yolox.bin\");\n        if (ret != 0)\n        {\n            __android_log_print(ANDROID_LOG_DEBUG, \"YOLOXncnn\", \"load_model failed\");\n            return JNI_FALSE;\n        }\n    }\n\n    // init jni glue\n    jclass localObjCls = env->FindClass(\"com/megvii/yoloXncnn/YOLOXncnn$Obj\");\n    objCls = reinterpret_cast<jclass>(env->NewGlobalRef(localObjCls));\n\n    constructortorId = env->GetMethodID(objCls, \"<init>\", \"(Lcom/megvii/yoloXncnn/YOLOXncnn;)V\");\n\n    xId = env->GetFieldID(objCls, \"x\", \"F\");\n    yId = env->GetFieldID(objCls, \"y\", \"F\");\n    wId = env->GetFieldID(objCls, \"w\", \"F\");\n    hId = env->GetFieldID(objCls, \"h\", \"F\");\n    labelId = env->GetFieldID(objCls, \"label\", \"Ljava/lang/String;\");\n    probId = env->GetFieldID(objCls, \"prob\", \"F\");\n\n    return JNI_TRUE;\n}\n\n// public native Obj[] Detect(Bitmap bitmap, boolean use_gpu);\nJNIEXPORT jobjectArray JNICALL Java_com_megvii_yoloXncnn_YOLOXncnn_Detect(JNIEnv* env, jobject thiz, jobject bitmap, jboolean use_gpu)\n{\n    if (use_gpu == JNI_TRUE && ncnn::get_gpu_count() == 0)\n    {\n        return NULL;\n        //return env->NewStringUTF(\"no vulkan capable gpu\");\n    }\n\n    double start_time = ncnn::get_current_time();\n\n    AndroidBitmapInfo info;\n    AndroidBitmap_getInfo(env, bitmap, &info);\n    const int width = info.width;\n    const int height = info.height;\n    if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888)\n        return NULL;\n\n    // parameters which might change for different model\n    const int target_size = 640;\n    const float prob_threshold = 0.3f;\n    const float nms_threshold = 0.65f;\n    std::vector<int> strides = {8, 16, 32}; // might have stride=64\n\n    int w = width;\n    int h = height;\n    float scale = 1.f;\n    if (w > h)\n    {\n        scale = (float)target_size / w;\n        w = target_size;\n        h = h * scale;\n    }\n    else\n    {\n        scale = (float)target_size / h;\n        h = target_size;\n        w = w * scale;\n    }\n\n    ncnn::Mat in = ncnn::Mat::from_android_bitmap_resize(env, bitmap, ncnn::Mat::PIXEL_RGB2BGR, w, h);\n\n    // pad to target_size rectangle\n    int wpad = target_size - w;\n    int hpad = target_size - h;\n    ncnn::Mat in_pad;\n    // different from yolov5, yolox only pad on bottom and right side,\n    // which means users don't need to extra padding info to decode boxes coordinate.\n    ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f);\n\n    // yolox\n    std::vector<Object> objects;\n    {\n\n        ncnn::Extractor ex = yoloX.create_extractor();\n\n        ex.set_vulkan_compute(use_gpu);\n\n        ex.input(\"images\", in_pad);\n\n        std::vector<Object> proposals;\n\n        // yolox decode and generate proposal logic\n        {\n            ncnn::Mat out;\n            ex.extract(\"output\", out);\n\n            std::vector<GridAndStride> grid_strides;\n            generate_grids_and_stride(target_size, strides, grid_strides);\n            generate_yolox_proposals(grid_strides, out, prob_threshold, proposals);\n\n        }\n\n        // sort all proposals by score from highest to lowest\n        qsort_descent_inplace(proposals);\n\n        // apply nms with nms_threshold\n        std::vector<int> picked;\n        nms_sorted_bboxes(proposals, picked, nms_threshold);\n\n        int count = picked.size();\n\n        objects.resize(count);\n        for (int i = 0; i < count; i++)\n        {\n            objects[i] = proposals[picked[i]];\n\n            // adjust offset to original unpadded\n            float x0 = (objects[i].x) / scale;\n            float y0 = (objects[i].y) / scale;\n            float x1 = (objects[i].x + objects[i].w) / scale;\n            float y1 = (objects[i].y + objects[i].h) / scale;\n\n            // clip\n            x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);\n            y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);\n            x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);\n            y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);\n\n            objects[i].x = x0;\n            objects[i].y = y0;\n            objects[i].w = x1 - x0;\n            objects[i].h = y1 - y0;\n        }\n    }\n\n    // objects to Obj[]\n    static const char* class_names[] = {\n        \"person\", \"bicycle\", \"car\", \"motorcycle\", \"airplane\", \"bus\", \"train\", \"truck\", \"boat\", \"traffic light\",\n        \"fire hydrant\", \"stop sign\", \"parking meter\", \"bench\", \"bird\", \"cat\", \"dog\", \"horse\", \"sheep\", \"cow\",\n        \"elephant\", \"bear\", \"zebra\", \"giraffe\", \"backpack\", \"umbrella\", \"handbag\", \"tie\", \"suitcase\", \"frisbee\",\n        \"skis\", \"snowboard\", \"sports ball\", \"kite\", \"baseball bat\", \"baseball glove\", \"skateboard\", \"surfboard\",\n        \"tennis racket\", \"bottle\", \"wine glass\", \"cup\", \"fork\", \"knife\", \"spoon\", \"bowl\", \"banana\", \"apple\",\n        \"sandwich\", \"orange\", \"broccoli\", \"carrot\", \"hot dog\", \"pizza\", \"donut\", \"cake\", \"chair\", \"couch\",\n        \"potted plant\", \"bed\", \"dining table\", \"toilet\", \"tv\", \"laptop\", \"mouse\", \"remote\", \"keyboard\", \"cell phone\",\n        \"microwave\", \"oven\", \"toaster\", \"sink\", \"refrigerator\", \"book\", \"clock\", \"vase\", \"scissors\", \"teddy bear\",\n        \"hair drier\", \"toothbrush\"\n    };\n\n    jobjectArray jObjArray = env->NewObjectArray(objects.size(), objCls, NULL);\n\n    for (size_t i=0; i<objects.size(); i++)\n    {\n        jobject jObj = env->NewObject(objCls, constructortorId, thiz);\n\n        env->SetFloatField(jObj, xId, objects[i].x);\n        env->SetFloatField(jObj, yId, objects[i].y);\n        env->SetFloatField(jObj, wId, objects[i].w);\n        env->SetFloatField(jObj, hId, objects[i].h);\n        env->SetObjectField(jObj, labelId, env->NewStringUTF(class_names[objects[i].label]));\n        env->SetFloatField(jObj, probId, objects[i].prob);\n\n        env->SetObjectArrayElement(jObjArray, i, jObj);\n    }\n\n    double elasped = ncnn::get_current_time() - start_time;\n    __android_log_print(ANDROID_LOG_DEBUG, \"YOLOXncnn\", \"%.2fms   detect\", elasped);\n\n    return jObjArray;\n}\n\n}\n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/res/layout/main.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<LinearLayout xmlns:android=\"http://schemas.android.com/apk/res/android\"\n    android:orientation=\"vertical\"\n    android:layout_width=\"fill_parent\"\n    android:layout_height=\"fill_parent\">\n\n    <LinearLayout\n        android:orientation=\"horizontal\"\n        android:layout_width=\"fill_parent\"\n        android:layout_height=\"wrap_content\">\n\n    <Button\n        android:id=\"@+id/buttonImage\"\n        android:layout_width=\"wrap_content\"\n        android:layout_height=\"wrap_content\"\n        android:text=\"image\" />\n    <Button\n        android:id=\"@+id/buttonDetect\"\n        android:layout_width=\"wrap_content\"\n        android:layout_height=\"wrap_content\"\n        android:text=\"infer-cpu\" />\n    <Button\n        android:id=\"@+id/buttonDetectGPU\"\n        android:layout_width=\"wrap_content\"\n        android:layout_height=\"wrap_content\"\n        android:text=\"infer-gpu\" />\n    </LinearLayout>\n\n    <ImageView\n        android:id=\"@+id/imageView\"\n        android:layout_width=\"fill_parent\"\n        android:layout_height=\"fill_parent\"\n        android:layout_weight=\"1\" />\n\n</LinearLayout>\n"
  },
  {
    "path": "demo/ncnn/android/app/src/main/res/values/strings.xml",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<resources>\n    <string name=\"app_name\">yoloXncnn</string>\n</resources>\n"
  },
  {
    "path": "demo/ncnn/android/build.gradle",
    "content": "// Top-level build file where you can add configuration options common to all sub-projects/modules.\nbuildscript {\n    repositories {\n        jcenter()\n        google()\n    }\n    dependencies {\n        classpath 'com.android.tools.build:gradle:3.5.0'\n    }\n}\n\nallprojects {\n    repositories {\n        jcenter()\n        google()\n    }\n}\n"
  },
  {
    "path": "demo/ncnn/android/gradle/wrapper/gradle-wrapper.properties",
    "content": "#Sun Aug 25 10:34:48 CST 2019\ndistributionBase=GRADLE_USER_HOME\ndistributionPath=wrapper/dists\nzipStoreBase=GRADLE_USER_HOME\nzipStorePath=wrapper/dists\ndistributionUrl=https\\://services.gradle.org/distributions/gradle-5.4.1-all.zip\n"
  },
  {
    "path": "demo/ncnn/android/gradlew",
    "content": "#!/usr/bin/env sh\n\n##############################################################################\n##\n##  Gradle start up script for UN*X\n##\n##############################################################################\n\n# Attempt to set APP_HOME\n# Resolve links: $0 may be a link\nPRG=\"$0\"\n# Need this for relative symlinks.\nwhile [ -h \"$PRG\" ] ; do\n    ls=`ls -ld \"$PRG\"`\n    link=`expr \"$ls\" : '.*-> \\(.*\\)$'`\n    if expr \"$link\" : '/.*' > /dev/null; then\n        PRG=\"$link\"\n    else\n        PRG=`dirname \"$PRG\"`\"/$link\"\n    fi\ndone\nSAVED=\"`pwd`\"\ncd \"`dirname \\\"$PRG\\\"`/\" >/dev/null\nAPP_HOME=\"`pwd -P`\"\ncd \"$SAVED\" >/dev/null\n\nAPP_NAME=\"Gradle\"\nAPP_BASE_NAME=`basename \"$0\"`\n\n# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.\nDEFAULT_JVM_OPTS=\"\"\n\n# Use the maximum available, or set MAX_FD != -1 to use that value.\nMAX_FD=\"maximum\"\n\nwarn () {\n    echo \"$*\"\n}\n\ndie () {\n    echo\n    echo \"$*\"\n    echo\n    exit 1\n}\n\n# OS specific support (must be 'true' or 'false').\ncygwin=false\nmsys=false\ndarwin=false\nnonstop=false\ncase \"`uname`\" in\n  CYGWIN* )\n    cygwin=true\n    ;;\n  Darwin* )\n    darwin=true\n    ;;\n  MINGW* )\n    msys=true\n    ;;\n  NONSTOP* )\n    nonstop=true\n    ;;\nesac\n\nCLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar\n\n# Determine the Java command to use to start the JVM.\nif [ -n \"$JAVA_HOME\" ] ; then\n    if [ -x \"$JAVA_HOME/jre/sh/java\" ] ; then\n        # IBM's JDK on AIX uses strange locations for the executables\n        JAVACMD=\"$JAVA_HOME/jre/sh/java\"\n    else\n        JAVACMD=\"$JAVA_HOME/bin/java\"\n    fi\n    if [ ! -x \"$JAVACMD\" ] ; then\n        die \"ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME\n\nPlease set the JAVA_HOME variable in your environment to match the\nlocation of your Java installation.\"\n    fi\nelse\n    JAVACMD=\"java\"\n    which java >/dev/null 2>&1 || die \"ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.\n\nPlease set the JAVA_HOME variable in your environment to match the\nlocation of your Java installation.\"\nfi\n\n# Increase the maximum file descriptors if we can.\nif [ \"$cygwin\" = \"false\" -a \"$darwin\" = \"false\" -a \"$nonstop\" = \"false\" ] ; then\n    MAX_FD_LIMIT=`ulimit -H -n`\n    if [ $? -eq 0 ] ; then\n        if [ \"$MAX_FD\" = \"maximum\" -o \"$MAX_FD\" = \"max\" ] ; then\n            MAX_FD=\"$MAX_FD_LIMIT\"\n        fi\n        ulimit -n $MAX_FD\n        if [ $? -ne 0 ] ; then\n            warn \"Could not set maximum file descriptor limit: $MAX_FD\"\n        fi\n    else\n        warn \"Could not query maximum file descriptor limit: $MAX_FD_LIMIT\"\n    fi\nfi\n\n# For Darwin, add options to specify how the application appears in the dock\nif $darwin; then\n    GRADLE_OPTS=\"$GRADLE_OPTS \\\"-Xdock:name=$APP_NAME\\\" \\\"-Xdock:icon=$APP_HOME/media/gradle.icns\\\"\"\nfi\n\n# For Cygwin, switch paths to Windows format before running java\nif $cygwin ; then\n    APP_HOME=`cygpath --path --mixed \"$APP_HOME\"`\n    CLASSPATH=`cygpath --path --mixed \"$CLASSPATH\"`\n    JAVACMD=`cygpath --unix \"$JAVACMD\"`\n\n    # We build the pattern for arguments to be converted via cygpath\n    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`\n    SEP=\"\"\n    for dir in $ROOTDIRSRAW ; do\n        ROOTDIRS=\"$ROOTDIRS$SEP$dir\"\n        SEP=\"|\"\n    done\n    OURCYGPATTERN=\"(^($ROOTDIRS))\"\n    # Add a user-defined pattern to the cygpath arguments\n    if [ \"$GRADLE_CYGPATTERN\" != \"\" ] ; then\n        OURCYGPATTERN=\"$OURCYGPATTERN|($GRADLE_CYGPATTERN)\"\n    fi\n    # Now convert the arguments - kludge to limit ourselves to /bin/sh\n    i=0\n    for arg in \"$@\" ; do\n        CHECK=`echo \"$arg\"|egrep -c \"$OURCYGPATTERN\" -`\n        CHECK2=`echo \"$arg\"|egrep -c \"^-\"`                                 ### Determine if an option\n\n        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition\n            eval `echo args$i`=`cygpath --path --ignore --mixed \"$arg\"`\n        else\n            eval `echo args$i`=\"\\\"$arg\\\"\"\n        fi\n        i=$((i+1))\n    done\n    case $i in\n        (0) set -- ;;\n        (1) set -- \"$args0\" ;;\n        (2) set -- \"$args0\" \"$args1\" ;;\n        (3) set -- \"$args0\" \"$args1\" \"$args2\" ;;\n        (4) set -- \"$args0\" \"$args1\" \"$args2\" \"$args3\" ;;\n        (5) set -- \"$args0\" \"$args1\" \"$args2\" \"$args3\" \"$args4\" ;;\n        (6) set -- \"$args0\" \"$args1\" \"$args2\" \"$args3\" \"$args4\" \"$args5\" ;;\n        (7) set -- \"$args0\" \"$args1\" \"$args2\" \"$args3\" \"$args4\" \"$args5\" \"$args6\" ;;\n        (8) set -- \"$args0\" \"$args1\" \"$args2\" \"$args3\" \"$args4\" \"$args5\" \"$args6\" \"$args7\" ;;\n        (9) set -- \"$args0\" \"$args1\" \"$args2\" \"$args3\" \"$args4\" \"$args5\" \"$args6\" \"$args7\" \"$args8\" ;;\n    esac\nfi\n\n# Escape application args\nsave () {\n    for i do printf %s\\\\n \"$i\" | sed \"s/'/'\\\\\\\\''/g;1s/^/'/;\\$s/\\$/' \\\\\\\\/\" ; done\n    echo \" \"\n}\nAPP_ARGS=$(save \"$@\")\n\n# Collect all arguments for the java command, following the shell quoting and substitution rules\neval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS \"\\\"-Dorg.gradle.appname=$APP_BASE_NAME\\\"\" -classpath \"\\\"$CLASSPATH\\\"\" org.gradle.wrapper.GradleWrapperMain \"$APP_ARGS\"\n\n# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong\nif [ \"$(uname)\" = \"Darwin\" ] && [ \"$HOME\" = \"$PWD\" ]; then\n  cd \"$(dirname \"$0\")\"\nfi\n\nexec \"$JAVACMD\" \"$@\"\n"
  },
  {
    "path": "demo/ncnn/android/gradlew.bat",
    "content": "@if \"%DEBUG%\" == \"\" @echo off\n@rem ##########################################################################\n@rem\n@rem  Gradle startup script for Windows\n@rem\n@rem ##########################################################################\n\n@rem Set local scope for the variables with windows NT shell\nif \"%OS%\"==\"Windows_NT\" setlocal\n\nset DIRNAME=%~dp0\nif \"%DIRNAME%\" == \"\" set DIRNAME=.\nset APP_BASE_NAME=%~n0\nset APP_HOME=%DIRNAME%\n\n@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.\nset DEFAULT_JVM_OPTS=\n\n@rem Find java.exe\nif defined JAVA_HOME goto findJavaFromJavaHome\n\nset JAVA_EXE=java.exe\n%JAVA_EXE% -version >NUL 2>&1\nif \"%ERRORLEVEL%\" == \"0\" goto init\n\necho.\necho ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.\necho.\necho Please set the JAVA_HOME variable in your environment to match the\necho location of your Java installation.\n\ngoto fail\n\n:findJavaFromJavaHome\nset JAVA_HOME=%JAVA_HOME:\"=%\nset JAVA_EXE=%JAVA_HOME%/bin/java.exe\n\nif exist \"%JAVA_EXE%\" goto init\n\necho.\necho ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%\necho.\necho Please set the JAVA_HOME variable in your environment to match the\necho location of your Java installation.\n\ngoto fail\n\n:init\n@rem Get command-line arguments, handling Windows variants\n\nif not \"%OS%\" == \"Windows_NT\" goto win9xME_args\n\n:win9xME_args\n@rem Slurp the command line arguments.\nset CMD_LINE_ARGS=\nset _SKIP=2\n\n:win9xME_args_slurp\nif \"x%~1\" == \"x\" goto execute\n\nset CMD_LINE_ARGS=%*\n\n:execute\n@rem Setup the command line\n\nset CLASSPATH=%APP_HOME%\\gradle\\wrapper\\gradle-wrapper.jar\n\n@rem Execute Gradle\n\"%JAVA_EXE%\" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% \"-Dorg.gradle.appname=%APP_BASE_NAME%\" -classpath \"%CLASSPATH%\" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%\n\n:end\n@rem End local scope for the variables with windows NT shell\nif \"%ERRORLEVEL%\"==\"0\" goto mainEnd\n\n:fail\nrem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of\nrem the _cmd.exe /c_ return code!\nif  not \"\" == \"%GRADLE_EXIT_CONSOLE%\" exit 1\nexit /b 1\n\n:mainEnd\nif \"%OS%\"==\"Windows_NT\" endlocal\n\n:omega\n"
  },
  {
    "path": "demo/ncnn/android/settings.gradle",
    "content": "include ':app'\n"
  },
  {
    "path": "demo/ncnn/cpp/README.md",
    "content": "# YOLOX-CPP-ncnn\n\nCpp file compile of YOLOX object detection base on [ncnn](https://github.com/Tencent/ncnn).  \n\n## Tutorial\n\n### Step1\nClone [ncnn](https://github.com/Tencent/ncnn) first, then please following [build tutorial of ncnn](https://github.com/Tencent/ncnn/wiki/how-to-build) to build on your own device.\n\n### Step2\nFirst, we try the original onnx2ncnn solution by using provided tools to generate onnx file.\nFor example, if you want to generate onnx file of yolox-s, please run the following command:\n```shell\ncd <path of yolox>\npython3 tools/export_onnx.py -n yolox-s\n```\nThen a yolox.onnx file is generated.\n\n### Step3\nGenerate ncnn param and bin file.\n```shell\ncd <path of ncnn>\ncd build/tools/ncnn\n./onnx2ncnn yolox.onnx model.param model.bin\n```\n\nSince Focus module is not supported in ncnn. You will see warnings like:\n```shell\nUnsupported slice step!\n```\nHowever, don't worry on this as a C++ version of Focus layer is already implemented in yolox.cpp.\n\n### Step4\nOpen **model.param**, and modify it. For more information on the ncnn param and model file structure, please take a look at this [wiki](https://github.com/Tencent/ncnn/wiki/param-and-model-file-structure).\n\nBefore (just an example):\n```\n295 328\nInput            images                   0 1 images\nSplit            splitncnn_input0         1 4 images images_splitncnn_0 images_splitncnn_1 images_splitncnn_2 images_splitncnn_3\nCrop             Slice_4                  1 1 images_splitncnn_3 647 -23309=1,0 -23310=1,2147483647 -23311=1,1\nCrop             Slice_9                  1 1 647 652 -23309=1,0 -23310=1,2147483647 -23311=1,2\nCrop             Slice_14                 1 1 images_splitncnn_2 657 -23309=1,0 -23310=1,2147483647 -23311=1,1\nCrop             Slice_19                 1 1 657 662 -23309=1,1 -23310=1,2147483647 -23311=1,2\nCrop             Slice_24                 1 1 images_splitncnn_1 667 -23309=1,1 -23310=1,2147483647 -23311=1,1\nCrop             Slice_29                 1 1 667 672 -23309=1,0 -23310=1,2147483647 -23311=1,2\nCrop             Slice_34                 1 1 images_splitncnn_0 677 -23309=1,1 -23310=1,2147483647 -23311=1,1\nCrop             Slice_39                 1 1 677 682 -23309=1,1 -23310=1,2147483647 -23311=1,2\nConcat           Concat_40                4 1 652 672 662 682 683 0=0\n...\n```\n* Change first number for 295 to 295 - 9 = 286 (since we will remove 10 layers and add 1 layers, total layers number should minus 9). \n* Then remove 10 lines of code from Split to Concat, but remember the last but 2nd number: 683.\n* Add YoloV5Focus layer After Input (using previous number 683):\n```\nYoloV5Focus      focus                    1 1 images 683\n```\nAfter(just an example):\n```\n286 328\nInput            images                   0 1 images\nYoloV5Focus      focus                    1 1 images 683\n...\n```\n\n### Step5\nUse ncnn_optimize to generate new param and bin:\n```shell\n# suppose you are still under ncnn/build/tools/ncnn dir.\n../ncnnoptimize model.param model.bin yolox.param yolox.bin 65536\n```\n\n### Step6\nCopy or Move yolox.cpp file into ncnn/examples, modify the CMakeList.txt to add our implementation, then build.\n\n### Step7\nInference image with executable file yolox, enjoy the detect result:\n```shell\n./yolox demo.jpg\n```\n\n### Bounus Solution:\nAs ncnn has released another model conversion tool called [pnnx](https://zhuanlan.zhihu.com/p/427620428) which directly finishs the pytorch2ncnn process via torchscript, we can also try on this.\n\n```shell\n# take yolox-s as an example\npython3 tools/export_torchscript.py -n yolox-s -c /path/to/your_checkpoint_files\n```\nThen a `yolox.torchscript.pt` will be generated. Copy this file to your pnnx build directory (pnnx also provides pre-built packages [here](https://github.com/pnnx/pnnx/releases/tag/20220720)).\n\n```shell\n# suppose you put the yolox.torchscript.pt in a seperate folder\n./pnnx yolox/yolox.torchscript.pt inputshape=[1,3,640,640]\n# for zsh users, please use inputshape='[1,3,640,640]'\n```\nStill, as ncnn does not support `slice` op as we mentioned in [Step3](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ncnn/cpp#step3). You will still see the warnings during this process.\n\nThen multiple pnnx related files will be genreated in your yolox folder. Use `yolox.torchscript.ncnn.param` and `yolox.torchscript.ncnn.bin` as your converted model. \n\nThen we can follow back to our [Step4](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ncnn/cpp#step4) for the rest of our implementation.\n\n## Acknowledgement\n\n* [ncnn](https://github.com/Tencent/ncnn)\n"
  },
  {
    "path": "demo/ncnn/cpp/yolox.cpp",
    "content": "// This file is wirtten base on the following file:\n// https://github.com/Tencent/ncnn/blob/master/examples/yolov5.cpp\n// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.\n// Licensed under the BSD 3-Clause License (the \"License\"); you may not use this file except\n// in compliance with the License. You may obtain a copy of the License at\n//\n// https://opensource.org/licenses/BSD-3-Clause\n//\n// Unless required by applicable law or agreed to in writing, software distributed\n// under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR\n// CONDITIONS OF ANY KIND, either express or implied. See the License for the\n// specific language governing permissions and limitations under the License.\n// ------------------------------------------------------------------------------\n// Copyright (C) 2020-2021, Megvii Inc. All rights reserved.\n\n#include \"layer.h\"\n#include \"net.h\"\n\n#if defined(USE_NCNN_SIMPLEOCV)\n#include \"simpleocv.h\"\n#else\n#include <opencv2/core/core.hpp>\n#include <opencv2/highgui/highgui.hpp>\n#include <opencv2/imgproc/imgproc.hpp>\n#endif\n#include <float.h>\n#include <stdio.h>\n#include <vector>\n\n#define YOLOX_NMS_THRESH  0.45 // nms threshold\n#define YOLOX_CONF_THRESH 0.25 // threshold of bounding box prob\n#define YOLOX_TARGET_SIZE 640  // target image size after resize, might use 416 for small model\n\n// YOLOX use the same focus in yolov5\nclass YoloV5Focus : public ncnn::Layer\n{\npublic:\n    YoloV5Focus()\n    {\n        one_blob_only = true;\n    }\n\n    virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const\n    {\n        int w = bottom_blob.w;\n        int h = bottom_blob.h;\n        int channels = bottom_blob.c;\n\n        int outw = w / 2;\n        int outh = h / 2;\n        int outc = channels * 4;\n\n        top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);\n        if (top_blob.empty())\n            return -100;\n\n        #pragma omp parallel for num_threads(opt.num_threads)\n        for (int p = 0; p < outc; p++)\n        {\n            const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);\n            float* outptr = top_blob.channel(p);\n\n            for (int i = 0; i < outh; i++)\n            {\n                for (int j = 0; j < outw; j++)\n                {\n                    *outptr = *ptr;\n\n                    outptr += 1;\n                    ptr += 2;\n                }\n\n                ptr += w;\n            }\n        }\n\n        return 0;\n    }\n};\n\nDEFINE_LAYER_CREATOR(YoloV5Focus)\n\nstruct Object\n{\n    cv::Rect_<float> rect;\n    int label;\n    float prob;\n};\n\nstruct GridAndStride\n{\n    int grid0;\n    int grid1;\n    int stride;\n};\n\nstatic inline float intersection_area(const Object& a, const Object& b)\n{\n    cv::Rect_<float> inter = a.rect & b.rect;\n    return inter.area();\n}\n\nstatic void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)\n{\n    int i = left;\n    int j = right;\n    float p = faceobjects[(left + right) / 2].prob;\n\n    while (i <= j)\n    {\n        while (faceobjects[i].prob > p)\n            i++;\n\n        while (faceobjects[j].prob < p)\n            j--;\n\n        if (i <= j)\n        {\n            // swap\n            std::swap(faceobjects[i], faceobjects[j]);\n\n            i++;\n            j--;\n        }\n    }\n\n    #pragma omp parallel sections\n    {\n        #pragma omp section\n        {\n            if (left < j) qsort_descent_inplace(faceobjects, left, j);\n        }\n        #pragma omp section\n        {\n            if (i < right) qsort_descent_inplace(faceobjects, i, right);\n        }\n    }\n}\n\nstatic void qsort_descent_inplace(std::vector<Object>& objects)\n{\n    if (objects.empty())\n        return;\n\n    qsort_descent_inplace(objects, 0, objects.size() - 1);\n}\n\nstatic void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)\n{\n    picked.clear();\n\n    const int n = faceobjects.size();\n\n    std::vector<float> areas(n);\n    for (int i = 0; i < n; i++)\n    {\n        areas[i] = faceobjects[i].rect.area();\n    }\n\n    for (int i = 0; i < n; i++)\n    {\n        const Object& a = faceobjects[i];\n\n        int keep = 1;\n        for (int j = 0; j < (int)picked.size(); j++)\n        {\n            const Object& b = faceobjects[picked[j]];\n\n            // intersection over union\n            float inter_area = intersection_area(a, b);\n            float union_area = areas[i] + areas[picked[j]] - inter_area;\n            // float IoU = inter_area / union_area\n            if (inter_area / union_area > nms_threshold)\n                keep = 0;\n        }\n\n        if (keep)\n            picked.push_back(i);\n    }\n}\n\nstatic void generate_grids_and_stride(const int target_size, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)\n{\n    for (int i = 0; i < (int)strides.size(); i++)\n    {\n        int stride = strides[i];\n        int num_grid = target_size / stride;\n        for (int g1 = 0; g1 < num_grid; g1++)\n        {\n            for (int g0 = 0; g0 < num_grid; g0++)\n            {\n                GridAndStride gs;\n                gs.grid0 = g0;\n                gs.grid1 = g1;\n                gs.stride = stride;\n                grid_strides.push_back(gs);\n            }\n        }\n    }\n}\n\nstatic void generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)\n{\n    const int num_grid = feat_blob.h;\n    const int num_class = feat_blob.w - 5;\n    const int num_anchors = grid_strides.size();\n\n    const float* feat_ptr = feat_blob.channel(0);\n    for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)\n    {\n        const int grid0 = grid_strides[anchor_idx].grid0;\n        const int grid1 = grid_strides[anchor_idx].grid1;\n        const int stride = grid_strides[anchor_idx].stride;\n\n        // yolox/models/yolo_head.py decode logic\n        //  outputs[..., :2] = (outputs[..., :2] + grids) * strides\n        //  outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides\n        float x_center = (feat_ptr[0] + grid0) * stride;\n        float y_center = (feat_ptr[1] + grid1) * stride;\n        float w = exp(feat_ptr[2]) * stride;\n        float h = exp(feat_ptr[3]) * stride;\n        float x0 = x_center - w * 0.5f;\n        float y0 = y_center - h * 0.5f;\n\n        float box_objectness = feat_ptr[4];\n        for (int class_idx = 0; class_idx < num_class; class_idx++)\n        {\n            float box_cls_score = feat_ptr[5 + class_idx];\n            float box_prob = box_objectness * box_cls_score;\n            if (box_prob > prob_threshold)\n            {\n                Object obj;\n                obj.rect.x = x0;\n                obj.rect.y = y0;\n                obj.rect.width = w;\n                obj.rect.height = h;\n                obj.label = class_idx;\n                obj.prob = box_prob;\n\n                objects.push_back(obj);\n            }\n\n        } // class loop\n        feat_ptr += feat_blob.w;\n\n    } // point anchor loop\n}\n\nstatic int detect_yolox(const cv::Mat& bgr, std::vector<Object>& objects)\n{\n    ncnn::Net yolox;\n\n    yolox.opt.use_vulkan_compute = true;\n    // yolox.opt.use_bf16_storage = true;\n\n    // Focus in yolov5\n    yolox.register_custom_layer(\"YoloV5Focus\", YoloV5Focus_layer_creator);\n\n    // original pretrained model from https://github.com/Megvii-BaseDetection/YOLOX\n    // ncnn model param: https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s_ncnn.tar.gz\n    yolox.load_param(\"yolox.param\");\n    yolox.load_model(\"yolox.bin\");\n\n    int img_w = bgr.cols;\n    int img_h = bgr.rows;\n\n    int w = img_w;\n    int h = img_h;\n    float scale = 1.f;\n    if (w > h)\n    {\n        scale = (float)YOLOX_TARGET_SIZE / w;\n        w = YOLOX_TARGET_SIZE;\n        h = h * scale;\n    }\n    else\n    {\n        scale = (float)YOLOX_TARGET_SIZE / h;\n        h = YOLOX_TARGET_SIZE;\n        w = w * scale;\n    }\n    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR, img_w, img_h, w, h);\n\n    // pad to YOLOX_TARGET_SIZE rectangle\n    int wpad = YOLOX_TARGET_SIZE - w;\n    int hpad = YOLOX_TARGET_SIZE - h;\n    ncnn::Mat in_pad;\n    // different from yolov5, yolox only pad on bottom and right side,\n    // which means users don't need to extra padding info to decode boxes coordinate.\n    ncnn::copy_make_border(in, in_pad, 0, hpad, 0, wpad, ncnn::BORDER_CONSTANT, 114.f);\n\n    ncnn::Extractor ex = yolox.create_extractor();\n\n    ex.input(\"images\", in_pad);\n\n    std::vector<Object> proposals;\n\n    {\n        ncnn::Mat out;\n        ex.extract(\"output\", out);\n\n        static const int stride_arr[] = {8, 16, 32}; // might have stride=64 in YOLOX\n        std::vector<int> strides(stride_arr, stride_arr + sizeof(stride_arr) / sizeof(stride_arr[0]));\n        std::vector<GridAndStride> grid_strides;\n        generate_grids_and_stride(YOLOX_TARGET_SIZE, strides, grid_strides);\n        generate_yolox_proposals(grid_strides, out, YOLOX_CONF_THRESH, proposals);\n    }\n\n    // sort all proposals by score from highest to lowest\n    qsort_descent_inplace(proposals);\n\n    // apply nms with nms_threshold\n    std::vector<int> picked;\n    nms_sorted_bboxes(proposals, picked, YOLOX_NMS_THRESH);\n\n    int count = picked.size();\n\n    objects.resize(count);\n    for (int i = 0; i < count; i++)\n    {\n        objects[i] = proposals[picked[i]];\n\n        // adjust offset to original unpadded\n        float x0 = (objects[i].rect.x) / scale;\n        float y0 = (objects[i].rect.y) / scale;\n        float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;\n        float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;\n\n        // clip\n        x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);\n        y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);\n        x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);\n        y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);\n\n        objects[i].rect.x = x0;\n        objects[i].rect.y = y0;\n        objects[i].rect.width = x1 - x0;\n        objects[i].rect.height = y1 - y0;\n    }\n\n    return 0;\n}\n\nstatic void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)\n{\n    static const char* class_names[] = {\n        \"person\", \"bicycle\", \"car\", \"motorcycle\", \"airplane\", \"bus\", \"train\", \"truck\", \"boat\", \"traffic light\",\n        \"fire hydrant\", \"stop sign\", \"parking meter\", \"bench\", \"bird\", \"cat\", \"dog\", \"horse\", \"sheep\", \"cow\",\n        \"elephant\", \"bear\", \"zebra\", \"giraffe\", \"backpack\", \"umbrella\", \"handbag\", \"tie\", \"suitcase\", \"frisbee\",\n        \"skis\", \"snowboard\", \"sports ball\", \"kite\", \"baseball bat\", \"baseball glove\", \"skateboard\", \"surfboard\",\n        \"tennis racket\", \"bottle\", \"wine glass\", \"cup\", \"fork\", \"knife\", \"spoon\", \"bowl\", \"banana\", \"apple\",\n        \"sandwich\", \"orange\", \"broccoli\", \"carrot\", \"hot dog\", \"pizza\", \"donut\", \"cake\", \"chair\", \"couch\",\n        \"potted plant\", \"bed\", \"dining table\", \"toilet\", \"tv\", \"laptop\", \"mouse\", \"remote\", \"keyboard\", \"cell phone\",\n        \"microwave\", \"oven\", \"toaster\", \"sink\", \"refrigerator\", \"book\", \"clock\", \"vase\", \"scissors\", \"teddy bear\",\n        \"hair drier\", \"toothbrush\"\n    };\n\n    cv::Mat image = bgr.clone();\n\n    for (size_t i = 0; i < objects.size(); i++)\n    {\n        const Object& obj = objects[i];\n\n        fprintf(stderr, \"%d = %.5f at %.2f %.2f %.2f x %.2f\\n\", obj.label, obj.prob,\n                obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);\n\n        cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));\n\n        char text[256];\n        sprintf(text, \"%s %.1f%%\", class_names[obj.label], obj.prob * 100);\n\n        int baseLine = 0;\n        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);\n\n        int x = obj.rect.x;\n        int y = obj.rect.y - label_size.height - baseLine;\n        if (y < 0)\n            y = 0;\n        if (x + label_size.width > image.cols)\n            x = image.cols - label_size.width;\n\n        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),\n                      cv::Scalar(255, 255, 255), -1);\n\n        cv::putText(image, text, cv::Point(x, y + label_size.height),\n                    cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));\n    }\n\n    cv::imshow(\"image\", image);\n    cv::waitKey(0);\n}\n\nint main(int argc, char** argv)\n{\n    if (argc != 2)\n    {\n        fprintf(stderr, \"Usage: %s [imagepath]\\n\", argv[0]);\n        return -1;\n    }\n\n    const char* imagepath = argv[1];\n\n    cv::Mat m = cv::imread(imagepath, 1);\n    if (m.empty())\n    {\n        fprintf(stderr, \"cv::imread %s failed\\n\", imagepath);\n        return -1;\n    }\n\n    std::vector<Object> objects;\n    detect_yolox(m, objects);\n\n    draw_objects(m, objects);\n\n    return 0;\n}\n"
  },
  {
    "path": "demo/nebullvm/README.md",
    "content": "# **Accelerate YOLOX inference with nebullvm in Python**\n\nThis document shows how to accelerate YOLOX inference time with nebullvm.\n\n[nebullvm](https://github.com/nebuly-ai/nebullvm) is an open-source library designed to accelerate AI inference of deep learning models in a few lines of code. nebullvm leverages state-of-the-art model optimization techniques such as deep learning compilers (TensorRT, Openvino, ONNX Runtime, TVM, TF Lite, DeepSparse, etc.), various quantization and compression strategies to achieve the maximum physically possible acceleration on the user's hardware.\n\n## Benchmarks\nFollowing are the results of the nebullvm optimization on YOLOX without loss of accuracy.\nFor each model-hardware pairing, response time was evaluated as the average over 100 predictions. The test was run on Nvidia Tesla T4 (g4dn.xlarge) and Intel XEON Scalable (m6i.24xlarge and c6i.12xlarge) on AWS.\n\n| Model   | Hardware     | Unoptimized (ms)| Nebullvm optimized (ms) | Speedup |\n|---------|--------------|-----------------|-------------------------|---------|\n| YOLOX-s | g4dn.xlarge  |       13.6      |           9.0           |   1.5x  |\n| YOLOX-s | m6i.24xlarge |       32.7      |           8.8           |   3.7x  |\n| YOLOX-s | c6i.12xlarge |       34.4      |           12.4          |   2.8x  |\n| YOLOX-m | g4dn.xlarge  |       24.2      |           22.4          |   1.1x  |\n| YOLOX-m | m6i.24xlarge |       55.1      |           36.0          |   2.3x  |\n| YOLOX-m | c6i.12xlarge |       62.5      |           26.9          |   2.6x  |\n| YOLOX-l | g4dn.xlarge  |       84.4      |           80.5          |   1.5x  |\n| YOLOX-l | m6i.24xlarge |       88.0      |           33.7          |   2.6x  |\n| YOLOX-l | c6i.12xlarge |      102.8      |           54.2          |   1.9x  |\n| YOLOX-x | g4dn.xlarge  |       87.3      |           34.0          |   2.6x  |\n| YOLOX-x | m6i.24xlarge |      134.5      |           56.6          |   2.4x  |\n| YOLOX-x | c6i.12xlarge |      162.0      |           95.4          |   1.7x  |\n\n## Steps to accelerate YOLOX with nebullvm\n1. Download a YOLOX model from the original [readme](https://github.com/Megvii-BaseDetection/YOLOX)\n2. Optimize YOLOX with nebullvm\n3. Perform inference and compare the latency of the optimized model with that of the original model\n\n[Here](nebullvm_optimization.py) you can find a demo in python.\n\n\nFirst, let's install nebullvm. The simplest way is by using pip.\n```\npip install nebullvm\n```\nNow, let's download one of YOLOX models and optimize it with nebullvm.\n\n```python\n# Import YOLOX model\nfrom yolox.exp import get_exp\nfrom yolox.data.data_augment import ValTransform\n\nexp = get_exp(None, 'yolox-s') # select model name\nmodel = exp.get_model()\nmodel.cuda()\nmodel.eval()\n\n\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\ninput_data =  [((torch.randn(1, 3, 640, 640).to(device), ), 0) for i in range(100)]\n\n# Run nebullvm optimization without performance loss\noptimized_model = optimize_model(model, input_data=input_data, optimization_time=\"constrained\")\n```\nFind [here](nebullvm_optimize.py) the complete script in python with more details.\n\nIn this example, we optimized YOLOX without any loss in accuracy. To further speed up the model by means of more aggressive optimization techniques, proceed as follows:\n- Set *optimization_time=\"unconstrained\"*. With the unconstrained option, nebullvm will test time-consuming techniques such as pruning and quantization-aware training (QAT).\n- Set the *metric_drop_ths* parameter to be greater than zero (by default, *metric_drop_ths=0*). In this way, we will allow nebullvm to test optimization techniques that involve a tradeoff of some trade-off of a certain metric. For example, to test maximum acceleration with a minimum loss of accuracy of 3%, set *metric_drop_ths=0.03* and *metric=\"accuracy\"*.\nFor more information about nebullvm API, see [nebullvm documentation](https://github.com/nebuly-ai/nebullvm).\n\n\nLet's now compare the latency of the optimized model with that of the original model. \nNote that before testing latency of the optimized model, it is necessary to perform some warmup runs, as some optimizers fine-tune certain internal parameters during the first few inferences after optimization.\n\n```python\n# Check perfomance\nwarmup_iters = 30\nnum_iters = 100\n\n# Unoptimized model perfomance\nwith torch.no_grad():\n  for i in range(warmup_iters):\n    o = model(img)\n\n    start = time.time()\n    for i in range(num_iters):\n      o = model(img)\nstop = time.time()\nprint(f\"Average inference time of unoptimized YOLOX: {(stop - start)/num_iters*1000} ms\")\n\n# Optimized model perfomance\nwith torch.no_grad():\n  for i in range(warmup_iters):\n    res = model_opt(img)\n\n    start = time.time()\n    for i in range(num_iters):\n      res = model_opt(img)\nstop = time.time()\nprint(f\"Average inference time of YOLOX otpimized with nebullvm: {(stop - start)/num_iters*1000} ms\")\n```\nFind [here](nebullvm_optimization.py) the complete script in python with more details.\n"
  },
  {
    "path": "demo/nebullvm/nebullvm_optimization.py",
    "content": "import torch\nimport time\nfrom nebullvm.api.functions import optimize_model # Install DL compilers\nfrom yolox.exp import get_exp\n\n# Get YOLO model\nexp = get_exp(None, 'yolox-s') # select model name\nmodel = exp.get_model()\nmodel.cuda()\nmodel.eval()\n\ndevice = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n\n# Create dummy data for the optimizer\ninput_data =  [((torch.randn(1, 3, 640, 640).to(device), ), 0) for i in range(100)] \n\n# ---------- Optimization ---------- \noptimized_model = optimize_model(model, input_data=input_data, optimization_time=\"constrained\")  # Optimization without performance loss\n\n\n# ---------- Benchmarks ---------- \n# Select image to test the latency of the optimized model\n\n# Create dummy image\nimg = torch.randn(1, 3, 640, 640).to(device)\n\n# Check perfomance\nwarmup_iters = 30\nnum_iters = 100\n\n# Unptimized model perfomance\nwith torch.no_grad():\n  for i in range(warmup_iters):\n    o = model(img)\n\n    start = time.time()\n    for i in range(num_iters):\n      o = model(img)\nstop = time.time()\nprint(f\"Average inference time of unoptimized YOLOX: {(stop - start)/num_iters*1000} ms\")\n\n# Optimized model perfomance\nwith torch.no_grad():\n  for i in range(warmup_iters):\n    res = optimized_model(img)\n\n    start = time.time()\n    for i in range(num_iters):\n      res = optimized_model(img)\nstop = time.time()\nprint(f\"Average inference time of YOLOX otpimized with nebullvm: {(stop - start)/num_iters*1000} ms\")\n"
  },
  {
    "path": "docs/.gitignore",
    "content": "_build"
  },
  {
    "path": "docs/Makefile",
    "content": "# Minimal makefile for Sphinx documentation\n# Copyright (c) Facebook, Inc. and its affiliates.\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHINXBUILD   = sphinx-build\nSOURCEDIR     = .\nBUILDDIR      = _build\n\n# Put it first so that \"make\" without argument is like \"make help\".\nhelp:\n\t@$(SPHINXBUILD) -M help \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n.PHONY: help Makefile\n\n# Catch-all target: route all unknown targets to Sphinx using the new\n# \"make mode\" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).\n%: Makefile\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)"
  },
  {
    "path": "docs/_static/css/custom.css",
    "content": "/*\n * Copyright (c) Facebook, Inc. and its affiliates.\n * some extra css to make markdown look similar between github/sphinx\n */\n\n/*\n * Below is for install.md:\n */\n .rst-content code {\n    white-space: pre;\n    border: 0px;\n  }\n  \n  .rst-content th {\n    border: 1px solid #e1e4e5;\n  }\n  \n  .rst-content th p {\n    /* otherwise will be default 24px for regular paragraph */\n    margin-bottom: 0px;\n  }\n  \n  .rst-content .line-block {\n    /* otherwise will be 24px */\n    margin-bottom: 0px;\n  }\n  \n  div.section > details {\n    padding-bottom: 1em;\n  }\n  "
  },
  {
    "path": "docs/assignment_visualization.md",
    "content": "# Visualize label assignment\n\nThis tutorial explains how to visualize your label asssignment result when training with YOLOX.\n\n## 1. Visualization command\n\nWe provide a visualization tool to help you visualize your label assignment result. You can find it in [`tools/visualize_assignment.py`](../tools/visualize_assign.py).\n\nHere is an example of command to visualize your label assignment result:\n\n```shell\npython3 tools/visualize_assign.py -f /path/to/your/exp.py yolox-s -d 1 -b 8 --max-batch 2\n```\n\n`max-batch` here means the maximum number of batches to visualize. The default value is 1, which the tool means only visualize the first batch.\n\nBy the way, the mosaic augmentation is used in default dataloader, so you can also see the mosaic result here.\n\nAfter running the command, the logger will show you where the visualization result is saved, let's open it and into the step 2.\n\n## 2. Check the visualization result\n\nHere is an example of visualization result:\n<div align=\"center\"><img src=\"../assets/assignment.png\" width=\"640\"></div>\n\nThose dots in one box is the matched anchor of gt box. **The color of dots is the same as the color of the box** to help you determine which object is assigned to the anchor. Note the box and dots are **instance level** visualization, which means the same class may have different colors.  \n**If the gt box doesn't match any anchor, the box will be marked as red and the red text \"unmatched\" will be drawn over the box**.\n\nPlease feel free to open an issue if you have any questions.\n"
  },
  {
    "path": "docs/cache.md",
    "content": "# Cache Custom Data\n\nThe caching feature is specifically tailored for users with ample memory resources. However, we still offer the option to cache data to disk, but disk performance can vary and may not guarantee optimal user experience. Implementing custom dataset RAM caching is also more straightforward and user-friendly compared to disk caching. With a few simple modifications, users can expect to see a significant increase in training speed, with speeds nearly double that of non-cached datasets.\n\nThis page explains how to cache your own custom data with YOLOX.\n\n## 0. Before you start\n\n**Step1** Clone this repo and follow the [README](../README.md) to install YOLOX.\n\n**Stpe2** Read the [Training on custom data](./train_custom_data.md) tutorial to understand how to prepare your custom data.\n\n## 1. Inheirit from `CacheDataset`\n\n\n**Step1** Create a custom dataset that inherits from the `CacheDataset` class. Note that whether inheriting from `Dataset` or `CacheDataset `, the `__init__()` method of your custom dataset should take the following keyword arguments: `input_dimension`, `cache`, and `cache_type`. Also, call `super().__init__()` and pass in `input_dimension`, `num_imgs`, `cache`, and `cache_type` as input, where `num_imgs` is the size of the dataset.\n\n**Step2** Implement the abstract function `read_img(self, index, use_cache=True)` of parent class and decorate it with `@cache_read_img`.  This function takes an `index` as input and returns an `image`, and the returned image will be used for caching. It is recommended to put all repetitive and fixed post-processing operations on the image in this function to reduce the post-processing time of the image during training.\n\n```python\n# CustomDataset.py\nfrom yolox.data.datasets import CacheDataset, cache_read_img\n\nclass CustomDataset(CacheDataset):\n    def __init__(self, input_dimension, cache, cache_type, *args, **kwargs):\n        # Get the required keyword arguments of super().__init__()\n        super().__init__(\n            input_dimension=input_dimension,\n            num_imgs=num_imgs,\n            cache=cache,\n            cache_type=cache_type\n        )\n        # ...\n\n    @cache_read_img\n    def read_img(self, index, use_cache=True):\n        # get image ...\n        # (optional) repetitive and fixed post-processing operations for image\n        return image\n```\n\n## 2. Create your Exp file and return your custom dataset\n\n**Step1** Create a new class that inherits from the `Exp` class provided by the `yolox_base.py`. Override the `get_dataset()` and `get_eval_dataset()` method to return an instance of your custom dataset.\n\n**Step2** Implement your own `get_evaluator` method to return an instance of your custom evaluator.\n\n```python\n# CustomeExp.py\nfrom yolox.exp import Exp as MyExp\n\nclass Exp(MyExp):\n    def get_dataset(self, cache, cache_type: str = \"ram\"):\n        return CustomDataset(\n            input_dimension=self.input_size,\n            cache=cache,\n            cache_type=cache_type\n        )\n\n    def get_eval_dataset(self):\n        return CustomDataset(\n            input_dimension=self.input_size,\n        )\n\n    def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):\n        return CustomEvaluator(\n            dataloader=self.get_eval_loader(batch_size, is_distributed, testdev=testdev, legacy=legacy),\n            img_size=self.test_size,\n            confthre=self.test_conf,\n            nmsthre=self.nmsthre,\n            num_classes=self.num_classes,\n            testdev=testdev,\n        )\n```\n\n**(Optional)** `get_data_loader` and `get_eval_loader` are now a default behavior in `yolox_base.py` and generally do not need to be changed. If you have to change `get_data_loader`, you need to add the following code at the beginning.\n\n```python\n# CustomeExp.py\nfrom yolox.exp import Exp as MyExp\n\nclass Exp(MyExp):\n    def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: str = None):\n        if self.dataset is None:\n            with wait_for_the_master():\n                assert cache_img is None\n                self.dataset = self.get_dataset(cache=False, cache_type=cache_img)\n        # ...\n\n```\n\n## 3. Cache to Disk\nIt's important to note that the `cache_type` can be `\"ram\"` or `\"disk\"`, depending on where you want to cache your dataset. If you choose `\"disk\"`, you need to pass in additional parameters to `super().__init__()` of `CustomDataset`: `data_dir`, `cache_dir_name`, `path_filename`.\n\n- `data_dir`: the root directory of the dataset, e.g. `/path/to/COCO`.\n- `cache_dir_name`: the name of the directory to cache to disk, for example `\"custom_cache\"`, then the files cached to disk will be saved under `/path/to/COCO/custom_cache`.\n- `path_filename`: a list of paths to the data relative to the `data_dir`, e.g. if you have data `/path/to/COCO/train/1.jpg`, `/path/to/COCO/train/2.jpg`, then `path_filename = ['train/1.jpg', ' train/2.jpg']`.\n"
  },
  {
    "path": "docs/conf.py",
    "content": "# -*- coding: utf-8 -*-\n# Code are based on\n# https://github.com/facebookresearch/detectron2/blob/master/docs/conf.py\n# Copyright (c) Facebook, Inc. and its affiliates.\n# Copyright (c) Megvii, Inc. and its affiliates.\n\n# flake8: noqa\n\n# Configuration file for the Sphinx documentation builder.\n#\n# This file does only contain a selection of the most common options. For a\n# full list see the documentation:\n# http://www.sphinx-doc.org/en/master/config\n\n# -- Path setup --------------------------------------------------------------\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\n#\nimport os\nimport sys\nfrom unittest import mock\nfrom sphinx.domains import Domain\nfrom typing import Dict, List, Tuple\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\n#\nimport sphinx_rtd_theme\n\n\nclass GithubURLDomain(Domain):\n    \"\"\"\n    Resolve certain links in markdown files to github source.\n    \"\"\"\n\n    name = \"githuburl\"\n    ROOT = \"https://github.com/Megvii-BaseDetection/YOLOX\"\n    # LINKED_DOC = [\"tutorials/install\", \"tutorials/getting_started\"]\n    LINKED_DOC = [\"tutorials/install\",]\n\n    def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode):\n        github_url = None\n        if not target.endswith(\"html\") and target.startswith(\"../../\"):\n            url = target.replace(\"../\", \"\")\n            github_url = url\n        if fromdocname in self.LINKED_DOC:\n            # unresolved links in these docs are all github links\n            github_url = target\n\n        if github_url is not None:\n            if github_url.endswith(\"MODEL_ZOO\") or github_url.endswith(\"README\"):\n                # bug of recommonmark.\n                # https://github.com/readthedocs/recommonmark/blob/ddd56e7717e9745f11300059e4268e204138a6b1/recommonmark/parser.py#L152-L155\n                github_url += \".md\"\n            print(\"Ref {} resolved to github:{}\".format(target, github_url))\n            contnode[\"refuri\"] = self.ROOT + github_url\n            return [(\"githuburl:any\", contnode)]\n        else:\n            return []\n\n\n# to support markdown\nfrom recommonmark.parser import CommonMarkParser\n\nsys.path.insert(0, os.path.abspath(\"../\"))\nos.environ[\"_DOC_BUILDING\"] = \"True\"\nDEPLOY = os.environ.get(\"READTHEDOCS\") == \"True\"\n\n\n# -- Project information -----------------------------------------------------\n\n# fmt: off\ntry:\n    import torch  # noqa\nexcept ImportError:\n    for m in [\n        \"torch\", \"torchvision\", \"torch.nn\", \"torch.nn.parallel\", \"torch.distributed\", \"torch.multiprocessing\", \"torch.autograd\",\n        \"torch.autograd.function\", \"torch.nn.modules\", \"torch.nn.modules.utils\", \"torch.utils\", \"torch.utils.data\", \"torch.onnx\",\n        \"torchvision\", \"torchvision.ops\",\n    ]:\n        sys.modules[m] = mock.Mock(name=m)\n    sys.modules['torch'].__version__ = \"1.7\"  # fake version\n    HAS_TORCH = False\nelse:\n    try:\n        torch.ops.yolox = mock.Mock(name=\"torch.ops.yolox\")\n    except:\n        pass\n    HAS_TORCH = True\n\nfor m in [\n    \"cv2\", \"scipy\", \"portalocker\", \"yolox._C\",\n    \"pycocotools\", \"pycocotools.mask\", \"pycocotools.coco\", \"pycocotools.cocoeval\",\n    \"google\", \"google.protobuf\", \"google.protobuf.internal\", \"onnx\",\n    \"caffe2\", \"caffe2.proto\", \"caffe2.python\", \"caffe2.python.utils\", \"caffe2.python.onnx\", \"caffe2.python.onnx.backend\",\n]:\n    sys.modules[m] = mock.Mock(name=m)\n# fmt: on\nsys.modules[\"cv2\"].__version__ = \"3.4\"\n\nimport yolox  # isort: skip\n\n# if HAS_TORCH:\n#     from detectron2.utils.env import fixup_module_metadata\n\n#     fixup_module_metadata(\"torch.nn\", torch.nn.__dict__)\n#     fixup_module_metadata(\"torch.utils.data\", torch.utils.data.__dict__)\n\n\nproject = \"YOLOX\"\ncopyright = \"2021-2021, YOLOX contributors\"\nauthor = \"YOLOX contributors\"\n\n# The short X.Y version\nversion = yolox.__version__\n# The full version, including alpha/beta/rc tags\nrelease = version\n\n\n# -- General configuration ---------------------------------------------------\n\n# If your documentation needs a minimal Sphinx version, state it here.\n#\nneeds_sphinx = \"3.0\"\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    \"recommonmark\",\n    \"sphinx.ext.autodoc\",\n    \"sphinx.ext.napoleon\",\n    \"sphinx.ext.intersphinx\",\n    \"sphinx.ext.todo\",\n    \"sphinx.ext.coverage\",\n    \"sphinx.ext.mathjax\",\n    \"sphinx.ext.viewcode\",\n    \"sphinx.ext.githubpages\",\n    'sphinx_markdown_tables',\n]\n\n# -- Configurations for plugins ------------\nnapoleon_google_docstring = True\nnapoleon_include_init_with_doc = True\nnapoleon_include_special_with_doc = True\nnapoleon_numpy_docstring = False\nnapoleon_use_rtype = False\nautodoc_inherit_docstrings = False\nautodoc_member_order = \"bysource\"\n\nif DEPLOY:\n    intersphinx_timeout = 10\nelse:\n    # skip this when building locally\n    intersphinx_timeout = 0.5\nintersphinx_mapping = {\n    \"python\": (\"https://docs.python.org/3.6\", None),\n    \"numpy\": (\"https://docs.scipy.org/doc/numpy/\", None),\n    \"torch\": (\"https://pytorch.org/docs/master/\", None),\n}\n# -------------------------\n\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = [\"_templates\"]\n\nsource_suffix = [\".rst\", \".md\"]\n\n# The master toctree document.\nmaster_doc = \"index\"\n\n# The language for content autogenerated by Sphinx. Refer to documentation\n# for a list of supported languages.\n#\n# This is also used if you do content translation via gettext catalogs.\n# Usually you set \"language\" from the command line for these cases.\nlanguage = None\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\n# This pattern also affects html_static_path and html_extra_path.\nexclude_patterns = [\"_build\", \"Thumbs.db\", \".DS_Store\", \"build\", \"README.md\", \"tutorials/README.md\"]\n\n# The name of the Pygments (syntax highlighting) style to use.\npygments_style = \"sphinx\"\n\n\n# -- Options for HTML output -------------------------------------------------\n\nhtml_theme = \"sphinx_rtd_theme\"\nhtml_theme_path = [sphinx_rtd_theme.get_html_theme_path()]\n\n# Theme options are theme-specific and customize the look and feel of a theme\n# further.  For a list of options available for each theme, see the\n# documentation.\n#\n# html_theme_options = {}\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = [\"_static\"]\nhtml_css_files = [\"css/custom.css\"]\n\n# Custom sidebar templates, must be a dictionary that maps document names\n# to template names.\n#\n# The default sidebars (for documents that don't match any pattern) are\n# defined by theme itself.  Builtin themes are using these templates by\n# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',\n# 'searchbox.html']``.\n#\n# html_sidebars = {}\n\n\n# -- Options for HTMLHelp output ---------------------------------------------\n\n# Output file base name for HTML help builder.\nhtmlhelp_basename = \"yoloxdoc\"\n\n\n# -- Options for LaTeX output ------------------------------------------------\n\nlatex_elements = {\n    # The paper size ('letterpaper' or 'a4paper').\n    #\n    # 'papersize': 'letterpaper',\n    # The font size ('10pt', '11pt' or '12pt').\n    #\n    # 'pointsize': '10pt',\n    # Additional stuff for the LaTeX preamble.\n    #\n    # 'preamble': '',\n    # Latex figure (float) alignment\n    #\n    # 'figure_align': 'htbp',\n}\n\n# Grouping the document tree into LaTeX files. List of tuples\n# (source start file, target name, title,\n#  author, documentclass [howto, manual, or own class]).\nlatex_documents = [\n    (master_doc, \"yolox.tex\", \"yolox Documentation\", \"yolox contributors\", \"manual\")\n]\n\n\n# -- Options for manual page output ------------------------------------------\n\n# One entry per manual page. List of tuples\n# (source start file, name, description, authors, manual section).\nman_pages = [(master_doc, \"YOLOX\", \"YOLOX Documentation\", [author], 1)]\n\n\n# -- Options for Texinfo output ----------------------------------------------\n\n# Grouping the document tree into Texinfo files. List of tuples\n# (source start file, target name, title, author,\n#  dir menu entry, description, category)\ntexinfo_documents = [\n    (\n        master_doc,\n        \"YOLOX\",\n        \"YOLOX Documentation\",\n        author,\n        \"YOLOX\",\n        \"One line description of project.\",\n        \"Miscellaneous\",\n    )\n]\n\n\n# -- Options for todo extension ----------------------------------------------\n\n# If true, `todo` and `todoList` produce output, else they produce nothing.\ntodo_include_todos = True\n\n\ndef autodoc_skip_member(app, what, name, obj, skip, options):\n    # we hide something deliberately\n    if getattr(obj, \"__HIDE_SPHINX_DOC__\", False):\n        return True\n\n    # Hide some that are deprecated or not intended to be used\n    HIDDEN = {\n        \"ResNetBlockBase\",\n        \"GroupedBatchSampler\",\n        \"build_transform_gen\",\n        \"export_caffe2_model\",\n        \"export_onnx_model\",\n        \"apply_transform_gens\",\n        \"TransformGen\",\n        \"apply_augmentations\",\n        \"StandardAugInput\",\n        \"build_batch_data_loader\",\n        \"draw_panoptic_seg_predictions\",\n        \"WarmupCosineLR\",\n        \"WarmupMultiStepLR\",\n    }\n    try:\n        if name in HIDDEN or (\n            hasattr(obj, \"__doc__\") and obj.__doc__.lower().strip().startswith(\"deprecated\")\n        ):\n            print(\"Skipping deprecated object: {}\".format(name))\n            return True\n    except:\n        pass\n    return skip\n\n\n# _PAPER_DATA = {\n#     \"resnet\": (\"1512.03385\", \"Deep Residual Learning for Image Recognition\"),\n#     \"fpn\": (\"1612.03144\", \"Feature Pyramid Networks for Object Detection\"),\n#     \"mask r-cnn\": (\"1703.06870\", \"Mask R-CNN\"),\n#     \"faster r-cnn\": (\n#         \"1506.01497\",\n#         \"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks\",\n#     ),\n#     \"deformconv\": (\"1703.06211\", \"Deformable Convolutional Networks\"),\n#     \"deformconv2\": (\"1811.11168\", \"Deformable ConvNets v2: More Deformable, Better Results\"),\n#     \"panopticfpn\": (\"1901.02446\", \"Panoptic Feature Pyramid Networks\"),\n#     \"retinanet\": (\"1708.02002\", \"Focal Loss for Dense Object Detection\"),\n#     \"cascade r-cnn\": (\"1712.00726\", \"Cascade R-CNN: Delving into High Quality Object Detection\"),\n#     \"lvis\": (\"1908.03195\", \"LVIS: A Dataset for Large Vocabulary Instance Segmentation\"),\n#     \"rrpn\": (\"1703.01086\", \"Arbitrary-Oriented Scene Text Detection via Rotation Proposals\"),\n#     \"imagenet in 1h\": (\"1706.02677\", \"Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour\"),\n#     \"xception\": (\"1610.02357\", \"Xception: Deep Learning with Depthwise Separable Convolutions\"),\n#     \"mobilenet\": (\n#         \"1704.04861\",\n#         \"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications\",\n#     ),\n#     \"deeplabv3+\": (\n#         \"1802.02611\",\n#         \"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation\",\n#     ),\n#     \"dds\": (\"2003.13678\", \"Designing Network Design Spaces\"),\n#     \"scaling\": (\"2103.06877\", \"Fast and Accurate Model Scaling\"),\n# }\n\n\n# def paper_ref_role(\n#     typ: str,\n#     rawtext: str,\n#     text: str,\n#     lineno: int,\n#     inliner,\n#     options: Dict = {},\n#     content: List[str] = [],\n# ):\n#     \"\"\"\n#     Parse :paper:`xxx`. Similar to the \"extlinks\" sphinx extension.\n#     \"\"\"\n#     from docutils import nodes, utils\n#     from sphinx.util.nodes import split_explicit_title\n\n#     text = utils.unescape(text)\n#     has_explicit_title, title, link = split_explicit_title(text)\n#     link = link.lower()\n#     if link not in _PAPER_DATA:\n#         inliner.reporter.warning(\"Cannot find paper \" + link)\n#         paper_url, paper_title = \"#\", link\n#     else:\n#         paper_url, paper_title = _PAPER_DATA[link]\n#         if \"/\" not in paper_url:\n#             paper_url = \"https://arxiv.org/abs/\" + paper_url\n#     if not has_explicit_title:\n#         title = paper_title\n#     pnode = nodes.reference(title, title, internal=False, refuri=paper_url)\n#     return [pnode], []\n\n\ndef setup(app):\n    from recommonmark.transform import AutoStructify\n\n    app.add_domain(GithubURLDomain)\n    app.connect(\"autodoc-skip-member\", autodoc_skip_member)\n    # app.add_role(\"paper\", paper_ref_role)\n    app.add_config_value(\n        \"recommonmark_config\",\n        {\"enable_math\": True, \"enable_inline_math\": True, \"enable_eval_rst\": True},\n        True,\n    )\n    app.add_transform(AutoStructify)\n"
  },
  {
    "path": "docs/freeze_module.md",
    "content": "# Freeze module\n\nThis page guide users to freeze module in YOLOX.  \nExp controls everything in YOLOX, so let's start from creating an Exp object.\n\n## 1. Create your own expermiment object\n\nWe take an example of YOLOX-S model on COCO dataset to give a more clear guide.\n\nImport the config you want (or write your own Exp object inherit from `yolox.exp.BaseExp`).\n```python\nfrom yolox.exp.default.yolox_s import Exp as MyExp\n```\n\n## 2. Override `get_model` method\n\nHere is a simple code to freeze backbone (FPN not included) of module.\n```python\nclass Exp(MyExp):\n\n    def get_model(self):\n        from yolox.utils import freeze_module\n        model = super().get_model()\n        freeze_module(model.backbone.backbone)\n        return model\n```\nif you only want to freeze FPN, `freeze_module(model.backbone)` might help.\n\n## 3. Train\nSuppose that the path of your Exp  is `/path/to/my_exp.py`, use the following command to train your model.\n```bash\npython3 -m yolox.tools.train -f /path/to/my_exp.py\n```\nFor more details of training, run the following command.\n```bash\npython3 -m yolox.tools.train --help\n```\n"
  },
  {
    "path": "docs/index.rst",
    "content": "\nWelcome to YOLOX's documentation!\n======================================\n\n.. image:: ../assets/logo.png\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Quick Run\n   \n   quick_run\n   model_zoo\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Tutorials\n\n   train_custom_data\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Deployment\n\n   demo/trt_py_readme\n   demo/trt_cpp_readme\n   demo/megengine_cpp_readme\n   demo/megengine_py_readme\n   demo/ncnn_android_readme\n   demo/ncnn_cpp_readme\n   demo/onnx_readme\n   demo/openvino_py_readme\n   demo/openvino_cpp_readme\n"
  },
  {
    "path": "docs/manipulate_training_image_size.md",
    "content": "# Manipulating Your Training Image Size\n\nThis tutorial explains how to control your image size when training on your own data.\n\n## 1. Introduction\n\nThere are 3 hyperparamters control the training size:\n\n- self.input_size = (640, 640) &emsp; #(height, width)\n- self.multiscale_range = 5\n- self.random_size = (14, 26)\n\nThere is 1 hyperparameter constrols the testing size:\n\n- self.test_size = (640, 640)\n\nThe self.input_size is suggested to set to the same value as self.test_size. By default, it is set to (640, 640) for most models and (416, 416) for yolox-tiny and yolox-nano.\n\n## 2. Multi Scale Training\n\nWhen training on your custom dataset, you can use multiscale training in 2 ways:\n\n1. **【Default】Only specifying the self.input_size and leaving others unchanged.**\n\n   If so, the actual multiscale sizes range from:\n\n   [self.input_size[0] - self.multiscale_range\\*32,  self.input_size[0] + self.multiscale_range\\*32]\n\n   For example, if you only set:\n\n   ```python\n   self.input_size = (640, 640)\n   ```\n\n   the actual multiscale range is [640 - 5*32, 640 + 5\\*32], i.e., [480, 800].\n\n   You can modify self.multiscale_range to change the multiscale range.\n\n2. **Simultaneously specifying the self.input_size and self.random_size**\n\n   ```python\n   self.input_size = (416, 416)\n   self.random_size = (10, 20)\n   ```\n\n   In this case, the actual multiscale range is [self.random_size[0]\\*32, self.random_size[1]\\*32], i.e., [320, 640]\n\n   **Note: You must specify the self.input_size because it is used for initializing resize aug in dataset.**\n\n## 3. Single Scale Training\n\nIf you want to train in a single scale. You need to specify the self.input_size and self.multiscale_range=0:\n\n```python\nself.input_size = (416, 416)\nself.multiscale_range = 0\n```\n\n**DO NOT** set the self.random_size.\n"
  },
  {
    "path": "docs/mlflow_integration.md",
    "content": "## MLFlow Integration\nYOLOX now supports MLFlow integration. MLFlow is an open-source platform for managing the end-to-end machine learning lifecycle. It is designed to work with any ML library, algorithm, deployment tool, or language. MLFlow can be used to track experiments, metrics, and parameters, and to log and visualize model artifacts. \\\nFor more information, please refer to: [MLFlow Documentation](https://www.mlflow.org/docs/latest/index.html)\n\n## Follow these steps to start logging your experiments to MLFlow:\n### Step-1: Install MLFlow via pip \n```bash\npip install mlflow python-dotenv\n```\n\n### Step-2: Set up MLFlow Tracking Server\nStart or connect to a MLFlow tracking server like databricks. You can start a local tracking server by running the following command:\n```bash\nmlflow server --host 127.0.0.1 --port 8080\n```\nRead more about setting up MLFlow tracking server [here](https://mlflow.org/docs/latest/tracking/server.html#mlflow-tracking-server)\n\n### Step-3: Set up MLFlow Environment Variables\nSet the following environment variables in your `.env` file:\n```bash\nMLFLOW_TRACKING_URI=\"127.0.0.1:5000\"  # set to your mlflow server URI\nMLFLOW_EXPERIMENT_NAME=\"/path/to/experiment\"  # set to your experiment name\nMLFLOW_TAGS={\"release.candidate\": \"DEV1\", \"release.version\": \"0.0.0\"}\n# config related to logging model to mlflow as pyfunc\nYOLOX_MLFLOW_LOG_MODEL_ARTIFACTS=\"True\" # whether to log model (best or historical) or not \nYOLOX_MLFLOW_LOG_MODEL_PER_n_EPOCHS=30 # try logging model only after every n epochs\nYOLOX_MLFLOW_LOG_Nth_EPOCH_MODELS=\"False\" # whether to log step model along with best_model or not\nYOLOX_MLFLOW_RUN_NAME=\"\" # give a custom name to your run, otherwise a random name is assign by mlflow\nYOLOX_MLFLOW_FLATTEN_PARAMS=\"True\" # flatten any sub sub params of dict to be logged as simple key value pair\n\n\nMLFLOW_ENABLE_SYSTEM_METRICS_LOGGING=True # log system gpu usage and other metrices\nMLFLOW_NESTED_RUN=\"False\" #whether to run as a nested run of given run_id\nMLFLOW_RUN_ID=\"\" # continue training from a given run_id\n```\n### Step-5: Provide --logger \"mlflow\" to the training script\n```bash\npython tools/train.py -l mlflow -f exps/path/to/exp.py -d 1 -b 8 --fp16 -o -c \npre_trained_model/<model>.pth\n# note the -l mlflow flag\n# one working example is this\npython tools/train.py -l mlflow -f exps/example/custom/yolox_s.py -d 1 -b 8 --fp16 -o -c pre_trained_model/yolox_s.pth\n```\n### Step-4: optional; start the mlflow ui and track your experiments\nIf you log runs to a local mlruns directory, run the following command in the directory above it, then access http://127.0.0.1:5000 in your browser.\n\n```bash\nmlflow ui --port 5000\n```\n\n## Optional Databricks Integration\n\n### Step-1: Install Databricks sdk\n```bash\npip install databricks-sdk\n```\n\n### Step-2: Set up Databricks Environment Variables\nSet the following environment variables in your `.env` file:\n```bash\nMLFLOW_TRACKING_URI=\"databricks\"  # set to databricks\nMLFLOW_EXPERIMENT_NAME=\"/Users/<user>/<experiment_name>/\"\nDATABRICKS_HOST = \"https://dbc-1234567890123456.cloud.databricks.com\" # set to your server URI\nDATABRICKS_TOKEN = \"dapixxxxxxxxxxxxx\"\n```"
  },
  {
    "path": "docs/model_zoo.md",
    "content": "# Model Zoo\n\n## Standard Models.\n\n|Model |size |mAP<sup>val<br>0.5:0.95 |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |\n| ------        |:---: | :---:    | :---:       |:---:     |:---:  | :---: | :----: |\n|[YOLOX-s](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_s.py)    |640  |40.5 |40.5      |9.8      |9.0 | 26.8 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth) |\n|[YOLOX-m](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_m.py)    |640  |46.9 |47.2      |12.3     |25.3 |73.8| [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth) |\n|[YOLOX-l](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_l.py)    |640  |49.7 |50.1      |14.5     |54.2| 155.6 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_l.pth) |\n|[YOLOX-x](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_x.py)   |640   |51.1 |**51.5**  | 17.3    |99.1 |281.9 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_x.pth) |\n|[YOLOX-Darknet53](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolov3.py)   |640  | 47.7 | 48.0 | 11.1 |63.7 | 185.3 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_darknet.pth) \n\n<details>\n<summary>Legacy models</summary>\n\n|Model |size |mAP<sup>test<br>0.5:0.95 | Speed V100<br>(ms) | Params<br>(M) |FLOPs<br>(G)| weights |\n| ------        |:---: | :---:       |:---:     |:---:  | :---: | :----: |\n|[YOLOX-s](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_s.py)    |640  |39.6      |9.8     |9.0 | 26.8 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EW62gmO2vnNNs5npxjzunVwB9p307qqygaCkXdTO88BLUg?e=NMTQYw)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.pth) |\n|[YOLOX-m](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_m.py)    |640  |46.4      |12.3     |25.3 |73.8| [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/ERMTP7VFqrVBrXKMU7Vl4TcBQs0SUeCT7kvc-JdIbej4tQ?e=1MDo9y)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_m.pth) |\n|[YOLOX-l](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_l.py)    |640  |50.0  |14.5 |54.2| 155.6 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EWA8w_IEOzBKvuueBqfaZh0BeoG5sVzR-XYbOJO4YlOkRw?e=wHWOBE)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_l.pth) |\n|[YOLOX-x](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_x.py)   |640  |**51.2**      | 17.3 |99.1 |281.9 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdgVPHBziOVBtGAXHfeHI5kBza0q9yyueMGdT0wXZfI1rQ?e=tABO5u)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_x.pth) |\n|[YOLOX-Darknet53](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolov3.py)   |640  | 47.4      | 11.1 |63.7 | 185.3 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EZ-MV1r_fMFPkPrNjvbJEMoBLOLAnXH-XKEB77w8LhXL6Q?e=mf6wOc)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_darknet53.pth) |\n\n</details>\n\n## Light Models.\n\n|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |\n| ------        |:---:  |  :---:       |:---:     |:---:  | :---: |\n|[YOLOX-Nano](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_nano.py) |416  |25.8  | 0.91 |1.08 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano.pth) |\n|[YOLOX-Tiny](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_tiny.py) |416  |32.8 | 5.06 |6.45 | [github](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_tiny.pth) |\n\n\n<details>\n<summary>Legacy models</summary>\n\n|Model |size |mAP<sup>val<br>0.5:0.95 | Params<br>(M) |FLOPs<br>(G)| weights |\n| ------        |:---:  |  :---:       |:---:     |:---:  | :---: |\n|[YOLOX-Nano](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_nano.py) |416  |25.3  | 0.91 |1.08 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EdcREey-krhLtdtSnxolxiUBjWMy6EFdiaO9bdOwZ5ygCQ?e=yQpdds)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_nano.pth) |\n|[YOLOX-Tiny](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/default/yolox_tiny.py) |416  |32.8 | 5.06 |6.45 | [onedrive](https://megvii-my.sharepoint.cn/:u:/g/personal/gezheng_megvii_com/EbZuinX5X1dJmNy8nqSRegABWspKw3QpXxuO82YSoFN1oQ?e=Q7V7XE)/[github](https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_tiny_32dot8.pth) |\n\n</details>\n"
  },
  {
    "path": "docs/quick_run.md",
    "content": "\n# Get Started\n\n## 1.Installation\n\nStep1. Install YOLOX.\n```shell\ngit clone git@github.com:Megvii-BaseDetection/YOLOX.git\ncd YOLOX\npip3 install -U pip && pip3 install -r requirements.txt\npip3 install -v -e .  # or  python3 setup.py develop\n```\nStep2. Install [pycocotools](https://github.com/cocodataset/cocoapi).\n\n```shell\npip3 install cython; pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'\n```\n\n## 2.Demo\n\nStep1. Download a pretrained model from the benchmark table.\n\nStep2. Use either -n or -f to specify your detector's config. For example:\n\n```shell\npython tools/demo.py image -n yolox-s -c /path/to/your/yolox_s.pth --path assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result --device [cpu/gpu]\n```\nor\n```shell\npython tools/demo.py image -f exps/default/yolox_s.py -c /path/to/your/yolox_s.pth --path assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result --device [cpu/gpu]\n```\nDemo for video:\n```shell\npython tools/demo.py video -n yolox-s -c /path/to/your/yolox_s.pth --path /path/to/your/video --conf 0.25 --nms 0.45 --tsize 640 --save_result --device [cpu/gpu]\n```\n\n\n## 3.Reproduce our results on COCO\n\nStep1. Prepare COCO dataset\n```shell\ncd <YOLOX_HOME>\nln -s /path/to/your/COCO ./datasets/COCO\n```\n\nStep2. Reproduce our results on COCO by specifying -n:\n\n```shell\npython tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o [--cache]\n                         yolox-m\n                         yolox-l\n                         yolox-x\n```\n* -d: number of gpu devices\n* -b: total batch size, the recommended number for -b is num-gpu * 8\n* --fp16: mixed precision training\n* --cache: caching imgs into RAM to accelarate training, which need large system RAM.\n\n**Weights & Biases for Logging**\n\nTo use W&B for logging, install wandb in your environment and log in to your W&B account using\n\n```shell\npip install wandb\nwandb login\n```\n\nLog in to your W&B account\n\nTo start logging metrics to W&B during training add the flag `--logger` to the previous command and use the prefix \"wandb-\" to specify arguments for initializing the wandb run.\n\n```shell\npython tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o [--cache] --logger wandb wandb-project <project name>\n                         yolox-m\n                         yolox-l\n                         yolox-x\n```\n\nMore WandbLogger arguments include\n\n```shell\npython tools/train.py .... --logger wandb wandb-project <project-name> \\\n                wandb-name <run-name> \\\n                wandb-id <run-id> \\\n                wandb-save_dir <save-dir> \\\n                wandb-num_eval_images <num-images> \\\n                wandb-log_checkpoints <bool>\n```\n\nMore information available [here](https://docs.wandb.ai/guides/integrations/other/yolox).\n\n**Multi Machine Training**\n\nWe also support multi-nodes training. Just add the following args:\n* --num\\_machines: num of your total training nodes\n* --machine\\_rank: specify the rank of each node\n\nWhen using -f, the above commands are equivalent to:\n\n```shell\npython tools/train.py -f exps/default/yolox-s.py -d 8 -b 64 --fp16 -o [--cache]\n                         exps/default/yolox-m.py\n                         exps/default/yolox-l.py\n                         exps/default/yolox-x.py\n```\n\n## 4.Evaluation\n\nWe support batch testing for fast evaluation:\n\n```shell\npython tools/eval.py -n  yolox-s -c yolox_s.pth -b 64 -d 8 --conf 0.001 [--fp16] [--fuse]\n                         yolox-m\n                         yolox-l\n                         yolox-x\n```\n* --fuse: fuse conv and bn\n* -d: number of GPUs used for evaluation. DEFAULT: All GPUs available will be used.\n* -b: total batch size across on all GPUs\n\nTo reproduce speed test, we use the following command:\n```shell\npython tools/eval.py -n  yolox-s -c yolox_s.pth -b 1 -d 1 --conf 0.001 --fp16 --fuse\n                         yolox-m\n                         yolox-l\n                         yolox-x\n```\n"
  },
  {
    "path": "docs/requirements-doc.txt",
    "content": "docutils==0.16\n# https://github.com/sphinx-doc/sphinx/commit/7acd3ada3f38076af7b2b5c9f3b60bb9c2587a3d\nsphinx==3.2.0\nrecommonmark==0.6.0\nsphinx_rtd_theme\nomegaconf>=2.1.0.dev24\nhydra-core>=1.1.0.dev5\nsphinx-markdown-tables==0.0.15\n"
  },
  {
    "path": "docs/train_custom_data.md",
    "content": "# Train Custom Data\n\nThis page explains how to train your own custom data with YOLOX.\n\nWe take an example of fine-tuning YOLOX-S model on VOC dataset to give a more clear guide.\n\n## 0. Before you start\nClone this repo and follow the [README](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/README.md) to install YOLOX.\n\n## 1. Create your own dataset\n**Step 1** Prepare your own dataset with images and labels first. For labeling images, you can use tools like [Labelme](https://github.com/wkentaro/labelme) or [CVAT](https://github.com/openvinotoolkit/cvat).\n\n**Step 2** Then, you should write the corresponding Dataset Class which can load images and labels through `__getitem__` method. We currently support COCO format and VOC format.\n\nYou can also write the Dataset by your own. Let's take the [VOC](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/datasets/voc.py#L151) Dataset file for example:\n```python\n    @Dataset.resize_getitem\n    def __getitem__(self, index):\n        img, target, img_info, img_id = self.pull_item(index)\n\n        if self.preproc is not None:\n            img, target = self.preproc(img, target, self.input_dim)\n\n        return img, target, img_info, img_id\n```\n\nOne more thing worth noting is that you should also implement [pull_item](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/datasets/voc.py#L129) and [load_anno](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/datasets/voc.py#L121) method for the `Mosiac` and `MixUp` augmentations.\n\n**Step 3** Prepare the evaluator. We currently have [COCO evaluator](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/evaluators/coco_evaluator.py) and [VOC evaluator](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/evaluators/voc_evaluator.py).\nIf you have your own format data or evaluation metric, you can write your own evaluator.\n\n**Step 4** Put your dataset under `$YOLOX_DIR/datasets`, for VOC:\n\n```shell\nln -s /path/to/your/VOCdevkit ./datasets/VOCdevkit\n```\n* The path \"VOCdevkit\" will be used in your exp file described in next section. Specifically, in `get_data_loader` and `get_eval_loader` function.\n\n✧✧✧ You can download the mini-coco128 dataset by the [link](https://drive.google.com/file/d/16N3u36ycNd70m23IM7vMuRQXejAJY9Fs/view?usp=sharing), and then unzip it to the `datasets` directory. The dataset has been converted from YOLO format to COCO format, and can be used directly as a dataset for testing whether the train environment can be runned successfully.\n\n## 2. Create your Exp file to control everything\nWe put everything involved in a model to one single Exp file, including model setting, training setting, and testing setting.\n\n**A complete Exp file is at [yolox_base.py](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/exp/yolox_base.py).** It may be too long to write for every exp, but you can inherit the base Exp file and only overwrite the changed part.\n\nLet's take the [VOC Exp file](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/example/yolox_voc/yolox_voc_s.py) as an example.\n\nWe select `YOLOX-S` model here, so we should change the network depth and width. VOC has only 20 classes, so we should also change the `num_classes`.\n\nThese configs are changed in the `init()` method:\n```python\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.num_classes = 20\n        self.depth = 0.33\n        self.width = 0.50\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n```\n\nBesides, you should also overwrite the `dataset` and `evaluator`, prepared before training the model on your own data.\n\nPlease see [get_data_loader](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/example/yolox_voc/yolox_voc_s.py#L20), [get_eval_loader](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/example/yolox_voc/yolox_voc_s.py#L82), and [get_evaluator](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/exps/example/yolox_voc/yolox_voc_s.py#L113) for more details.\n\n✧✧✧ You can also see the `exps/example/custom` directory for more details.\n\n## 3. Train\nExcept special cases, we always recommend to use our [COCO pretrained weights](https://github.com/Megvii-BaseDetection/YOLOX/blob/main/README.md) for initializing the model.\n\nOnce you get the Exp file and the COCO pretrained weights we provided, you can train your own model by the following below command:\n```bash\npython tools/train.py -f /path/to/your/Exp/file -d 8 -b 64 --fp16 -o -c /path/to/the/pretrained/weights [--cache]\n```\n* --cache: we now support RAM caching to speed up training! Make sure you have enough system RAM when adopting it. \n\nor take the `YOLOX-S` VOC training for example:\n```bash\npython tools/train.py -f exps/example/yolox_voc/yolox_voc_s.py -d 8 -b 64 --fp16 -o -c /path/to/yolox_s.pth [--cache]\n```\n\n✧✧✧ For example:\n- If you download the [mini-coco128](https://drive.google.com/file/d/16N3u36ycNd70m23IM7vMuRQXejAJY9Fs/view?usp=sharing) and unzip it to the `datasets`, you can direct run the following training code.\n    ```bash\n    python tools/train.py -f exps/example/custom/yolox_s.py -d 8 -b 64 --fp16 -o -c /path/to/yolox_s.pth\n    ```\n\n(Don't worry for the different shape of detection head between the pretrained weights and your own model, we will handle it)\n\n## 4. Tips for Best Training Results\n\nAs **YOLOX** is an anchor-free detector with only several hyper-parameters, most of the time good results can be obtained with no changes to the models or training settings.\nWe thus always recommend you first train with all default training settings.\n\nIf at first you don't get good results, there are steps you could consider to improve the model.\n\n**Model Selection** We provide `YOLOX-Nano`, `YOLOX-Tiny`, and `YOLOX-S` for mobile deployments, while `YOLOX-M`/`L`/`X` for cloud or high performance GPU deployments.\n\nIf your deployment meets any compatibility issues. we recommend `YOLOX-DarkNet53`.\n\n**Training Configs** If your training overfits early, then you can reduce max\\_epochs or decrease the base\\_lr and min\\_lr\\_ratio in your Exp file:\n\n```python\n# --------------  training config --------------------- #\n    self.warmup_epochs = 5\n    self.max_epoch = 300\n    self.warmup_lr = 0\n    self.basic_lr_per_img = 0.01 / 64.0\n    self.scheduler = \"yoloxwarmcos\"\n    self.no_aug_epochs = 15\n    self.min_lr_ratio = 0.05\n    self.ema = True\n\n    self.weight_decay = 5e-4\n    self.momentum = 0.9\n```\n\n**Aug Configs** You may also change the degree of the augmentations.\n\nGenerally, for small models, you should weak the aug, while for large models or small size of dataset, you may enchance the aug in your Exp file:\n```python\n# --------------- transform config ----------------- #\n    self.degrees = 10.0\n    self.translate = 0.1\n    self.scale = (0.1, 2)\n    self.mosaic_scale = (0.8, 1.6)\n    self.shear = 2.0\n    self.perspective = 0.0\n    self.enable_mixup = True\n```\n\n**Design your own detector** You may refer to our [Arxiv](https://arxiv.org/abs/2107.08430) paper for details and suggestions for designing your own detector.\n"
  },
  {
    "path": "docs/updates_note.md",
    "content": "\n# Updates notes\n\n## 【2021/08/19】\n\n* Support image caching for faster training, which requires large system RAM. \n* Remove the dependence of apex and support torch amp training. \n* Optimize the preprocessing for faster training \n* Replace the older distort augmentation with new HSV aug for faster training and better performance. \n\n### 2X Faster training\n\nWe optimize the data preprocess and support image caching with `--cache` flag:\n\n```shell\npython tools/train.py -n yolox-s -d 8 -b 64 --fp16 -o [--cache]\n                         yolox-m\n                         yolox-l\n                         yolox-x\n```\n* -d: number of gpu devices\n* -b: total batch size, the recommended number for -b is num-gpu * 8\n* --fp16: mixed precision training\n* --cache: caching imgs into RAM to accelarate training, which need large system RAM.\n\n### Higher performance\n\nNew models achieve **~1%** higher performance! See [Model_Zoo](model_zoo.md) for more details.\n\n### Support torch amp\n\nWe now support torch.cuda.amp training and Apex is not used anymore.\n\n### Breaking changes\n\nWe remove the normalization operation like -mean/std. This will make the old weights **incompatible**.\n\nIf you still want to use old weights, you can add `--legacy' in demo and eval:\n\n```shell\npython tools/demo.py image -n yolox-s -c /path/to/your/yolox_s.pth --path assets/dog.jpg --conf 0.25 --nms 0.45 --tsize 640 --save_result --device [cpu/gpu] [--legacy]\n```\n\nand \n\n```shell\npython tools/eval.py -n  yolox-s -c yolox_s.pth -b 64 -d 8 --conf 0.001 [--fp16] [--fuse] [--legacy]\n                         yolox-m\n                         yolox-l\n                         yolox-x\n```\n\nBut for deployment demo, we don't support the old weights anymore. Users could checkout to YOLOX version 0.1.0 to use legacy weights for deployment\n\n\n"
  },
  {
    "path": "exps/default/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n"
  },
  {
    "path": "exps/default/yolov3.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\n\nimport torch.nn as nn\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 1.0\n        self.width = 1.0\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n\n    def get_model(self, sublinear=False):\n        def init_yolo(M):\n            for m in M.modules():\n                if isinstance(m, nn.BatchNorm2d):\n                    m.eps = 1e-3\n                    m.momentum = 0.03\n        if \"model\" not in self.__dict__:\n            from yolox.models import YOLOX, YOLOFPN, YOLOXHead\n            backbone = YOLOFPN()\n            head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act=\"lrelu\")\n            self.model = YOLOX(backbone, head)\n        self.model.apply(init_yolo)\n        self.model.head.initialize_biases(1e-2)\n\n        return self.model\n"
  },
  {
    "path": "exps/default/yolox_l.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 1.0\n        self.width = 1.0\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n"
  },
  {
    "path": "exps/default/yolox_m.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 0.67\n        self.width = 0.75\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n"
  },
  {
    "path": "exps/default/yolox_nano.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\n\nimport torch.nn as nn\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 0.33\n        self.width = 0.25\n        self.input_size = (416, 416)\n        self.random_size = (10, 20)\n        self.mosaic_scale = (0.5, 1.5)\n        self.test_size = (416, 416)\n        self.mosaic_prob = 0.5\n        self.enable_mixup = False\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n\n    def get_model(self, sublinear=False):\n\n        def init_yolo(M):\n            for m in M.modules():\n                if isinstance(m, nn.BatchNorm2d):\n                    m.eps = 1e-3\n                    m.momentum = 0.03\n        if \"model\" not in self.__dict__:\n            from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead\n            in_channels = [256, 512, 1024]\n            # NANO model use depthwise = True, which is main difference.\n            backbone = YOLOPAFPN(\n                self.depth, self.width, in_channels=in_channels,\n                act=self.act, depthwise=True,\n            )\n            head = YOLOXHead(\n                self.num_classes, self.width, in_channels=in_channels,\n                act=self.act, depthwise=True\n            )\n            self.model = YOLOX(backbone, head)\n\n        self.model.apply(init_yolo)\n        self.model.head.initialize_biases(1e-2)\n        return self.model\n"
  },
  {
    "path": "exps/default/yolox_s.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 0.33\n        self.width = 0.50\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n"
  },
  {
    "path": "exps/default/yolox_tiny.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 0.33\n        self.width = 0.375\n        self.input_size = (416, 416)\n        self.mosaic_scale = (0.5, 1.5)\n        self.random_size = (10, 20)\n        self.test_size = (416, 416)\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n        self.enable_mixup = False\n"
  },
  {
    "path": "exps/default/yolox_x.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 1.33\n        self.width = 1.25\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n"
  },
  {
    "path": "exps/example/custom/nano.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\n\nimport torch.nn as nn\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 0.33\n        self.width = 0.25\n        self.input_size = (416, 416)\n        self.mosaic_scale = (0.5, 1.5)\n        self.random_size = (10, 20)\n        self.test_size = (416, 416)\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n        self.enable_mixup = False\n\n        # Define yourself dataset path\n        self.data_dir = \"datasets/coco128\"\n        self.train_ann = \"instances_train2017.json\"\n        self.val_ann = \"instances_val2017.json\"\n\n        self.num_classes = 71\n\n    def get_model(self, sublinear=False):\n\n        def init_yolo(M):\n            for m in M.modules():\n                if isinstance(m, nn.BatchNorm2d):\n                    m.eps = 1e-3\n                    m.momentum = 0.03\n        if \"model\" not in self.__dict__:\n            from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead\n            in_channels = [256, 512, 1024]\n            # NANO model use depthwise = True, which is main difference.\n            backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)\n            head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)\n            self.model = YOLOX(backbone, head)\n\n        self.model.apply(init_yolo)\n        self.model.head.initialize_biases(1e-2)\n        return self.model\n"
  },
  {
    "path": "exps/example/custom/yolox_s.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\nimport os\n\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.depth = 0.33\n        self.width = 0.50\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n\n        # Define yourself dataset path\n        self.data_dir = \"datasets/coco128\"\n        self.train_ann = \"instances_train2017.json\"\n        self.val_ann = \"instances_val2017.json\"\n\n        self.num_classes = 71\n\n        self.max_epoch = 300\n        self.data_num_workers = 4\n        self.eval_interval = 1\n"
  },
  {
    "path": "exps/example/yolox_voc/yolox_voc_s.py",
    "content": "# encoding: utf-8\nimport os\n\nfrom yolox.data import get_yolox_datadir\nfrom yolox.exp import Exp as MyExp\n\n\nclass Exp(MyExp):\n    def __init__(self):\n        super(Exp, self).__init__()\n        self.num_classes = 20\n        self.depth = 0.33\n        self.width = 0.50\n        self.warmup_epochs = 1\n\n        # ---------- transform config ------------ #\n        self.mosaic_prob = 1.0\n        self.mixup_prob = 1.0\n        self.hsv_prob = 1.0\n        self.flip_prob = 0.5\n\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n\n    def get_dataset(self, cache: bool, cache_type: str = \"ram\"):\n        from yolox.data import VOCDetection, TrainTransform\n\n        return VOCDetection(\n            data_dir=os.path.join(get_yolox_datadir(), \"VOCdevkit\"),\n            image_sets=[('2007', 'trainval'), ('2012', 'trainval')],\n            img_size=self.input_size,\n            preproc=TrainTransform(\n                max_labels=50,\n                flip_prob=self.flip_prob,\n                hsv_prob=self.hsv_prob),\n            cache=cache,\n            cache_type=cache_type,\n        )\n\n    def get_eval_dataset(self, **kwargs):\n        from yolox.data import VOCDetection, ValTransform\n        legacy = kwargs.get(\"legacy\", False)\n\n        return VOCDetection(\n            data_dir=os.path.join(get_yolox_datadir(), \"VOCdevkit\"),\n            image_sets=[('2007', 'test')],\n            img_size=self.test_size,\n            preproc=ValTransform(legacy=legacy),\n        )\n\n    def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):\n        from yolox.evaluators import VOCEvaluator\n\n        return VOCEvaluator(\n            dataloader=self.get_eval_loader(batch_size, is_distributed,\n                                            testdev=testdev, legacy=legacy),\n            img_size=self.test_size,\n            confthre=self.test_conf,\n            nmsthre=self.nmsthre,\n            num_classes=self.num_classes,\n        )\n"
  },
  {
    "path": "hubconf.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n\n\"\"\"\nUsage example:\n    import torch\n    model = torch.hub.load(\"Megvii-BaseDetection/YOLOX\", \"yolox_s\")\n    model = torch.hub.load(\"Megvii-BaseDetection/YOLOX\", \"yolox_custom\",\n                           exp_path=\"exp.py\", ckpt_path=\"ckpt.pth\")\n\"\"\"\ndependencies = [\"torch\"]\n\nfrom yolox.models import (  # isort:skip  # noqa: F401, E402\n    yolox_tiny,\n    yolox_nano,\n    yolox_s,\n    yolox_m,\n    yolox_l,\n    yolox_x,\n    yolov3,\n    yolox_custom\n)\n"
  },
  {
    "path": "requirements.txt",
    "content": "# TODO: Update with exact module version\nnumpy\ntorch>=1.7\nopencv_python\nloguru\ntqdm\ntorchvision\nthop\nninja\ntabulate\npsutil\ntensorboard\n\n# verified versions\n# pycocotools corresponds to https://github.com/ppwwyyxx/cocoapi\npycocotools>=2.0.2\nonnx>=1.13.0\nonnx-simplifier==0.4.10\n"
  },
  {
    "path": "setup.cfg",
    "content": "[isort]\nline_length = 100\nmulti_line_output = 3\nbalanced_wrapping = True\nknown_standard_library = setuptools\nknown_third_party = tqdm,loguru,tabulate,psutil\nknown_data_processing = cv2,numpy,scipy,PIL,matplotlib\nknown_datasets = pycocotools\nknown_deeplearning = torch,torchvision,caffe2,onnx,apex,timm,thop,torch2trt,tensorrt,openvino,onnxruntime\nknown_myself = yolox\nsections = FUTURE,STDLIB,THIRDPARTY,data_processing,datasets,deeplearning,myself,FIRSTPARTY,LOCALFOLDER\nno_lines_before=STDLIB,THIRDPARTY,datasets\ndefault_section = FIRSTPARTY\n\n[flake8]\nmax-line-length = 100\nmax-complexity = 18\nexclude = __init__.py\n"
  },
  {
    "path": "setup.py",
    "content": "#!/usr/bin/env python\n# Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved\n\nimport re\nimport setuptools\nimport sys\n\nTORCH_AVAILABLE = True\ntry:\n    import torch\n    from torch.utils import cpp_extension\nexcept ImportError:\n    TORCH_AVAILABLE = False\n    print(\"[WARNING] Unable to import torch, pre-compiling ops will be disabled.\")\n\n\ndef get_package_dir():\n    pkg_dir = {\n        \"yolox.tools\": \"tools\",\n        \"yolox.exp.default\": \"exps/default\",\n    }\n    return pkg_dir\n\n\ndef get_install_requirements():\n    with open(\"requirements.txt\", \"r\", encoding=\"utf-8\") as f:\n        reqs = [x.strip() for x in f.read().splitlines()]\n    reqs = [x for x in reqs if not x.startswith(\"#\")]\n    return reqs\n\n\ndef get_yolox_version():\n    with open(\"yolox/__init__.py\", \"r\") as f:\n        version = re.search(\n            r'^__version__\\s*=\\s*[\\'\"]([^\\'\"]*)[\\'\"]',\n            f.read(), re.MULTILINE\n        ).group(1)\n    return version\n\n\ndef get_long_description():\n    with open(\"README.md\", \"r\", encoding=\"utf-8\") as f:\n        long_description = f.read()\n    return long_description\n\n\ndef get_ext_modules():\n    ext_module = []\n    if sys.platform != \"win32\":  # pre-compile ops on linux\n        assert TORCH_AVAILABLE, \"torch is required for pre-compiling ops, please install it first.\"\n        # if any other op is added, please also add it here\n        from yolox.layers import FastCOCOEvalOp\n        ext_module.append(FastCOCOEvalOp().build_op())\n    return ext_module\n\n\ndef get_cmd_class():\n    cmdclass = {}\n    if TORCH_AVAILABLE:\n        cmdclass[\"build_ext\"] = cpp_extension.BuildExtension\n    return cmdclass\n\n\nsetuptools.setup(\n    name=\"yolox\",\n    version=get_yolox_version(),\n    author=\"megvii basedet team\",\n    url=\"https://github.com/Megvii-BaseDetection/YOLOX\",\n    package_dir=get_package_dir(),\n    packages=setuptools.find_packages(exclude=(\"tests\", \"tools\")) + list(get_package_dir().keys()),\n    python_requires=\">=3.6\",\n    install_requires=get_install_requirements(),\n    setup_requires=[\"wheel\"],  # avoid building error when pip is not updated\n    long_description=get_long_description(),\n    long_description_content_type=\"text/markdown\",\n    include_package_data=True,  # include files in MANIFEST.in\n    ext_modules=get_ext_modules(),\n    cmdclass=get_cmd_class(),\n    classifiers=[\n        \"Programming Language :: Python :: 3\", \"Operating System :: OS Independent\",\n        \"License :: OSI Approved :: Apache Software License\",\n    ],\n    project_urls={\n        \"Documentation\": \"https://yolox.readthedocs.io\",\n        \"Source\": \"https://github.com/Megvii-BaseDetection/YOLOX\",\n        \"Tracker\": \"https://github.com/Megvii-BaseDetection/YOLOX/issues\",\n    },\n)\n"
  },
  {
    "path": "tests/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n"
  },
  {
    "path": "tests/utils/test_model_utils.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport unittest\n\nimport torch\nfrom torch import nn\n\nfrom yolox.utils import adjust_status, freeze_module\nfrom yolox.exp import get_exp\n\n\nclass TestModelUtils(unittest.TestCase):\n\n    def setUp(self):\n        self.model: nn.Module = get_exp(exp_name=\"yolox-s\").get_model()\n\n    def test_model_state_adjust_status(self):\n        data = torch.ones(1, 10, 10, 10)\n        # use bn since bn changes state during train/val\n        model = nn.BatchNorm2d(10)\n        prev_state = model.state_dict()\n\n        modes = [False, True]\n        results = [True, False]\n\n        # test under train/eval mode\n        for mode, result in zip(modes, results):\n            with adjust_status(model, training=mode):\n                model(data)\n            model_state = model.state_dict()\n            self.assertTrue(len(model_state) == len(prev_state))\n            self.assertEqual(\n                result,\n                all([torch.allclose(v, model_state[k]) for k, v in prev_state.items()])\n            )\n\n        # test recurrsive context case\n        prev_state = model.state_dict()\n        with adjust_status(model, training=False):\n            with adjust_status(model, training=False):\n                model(data)\n        model_state = model.state_dict()\n        self.assertTrue(len(model_state) == len(prev_state))\n        self.assertTrue(\n            all([torch.allclose(v, model_state[k]) for k, v in prev_state.items()])\n        )\n\n    def test_model_effect_adjust_status(self):\n        # test context effect\n        self.model.train()\n        with adjust_status(self.model, training=False):\n            for module in self.model.modules():\n                self.assertFalse(module.training)\n        # all training after exit\n        for module in self.model.modules():\n            self.assertTrue(module.training)\n\n        # only backbone set to eval\n        self.model.backbone.eval()\n        with adjust_status(self.model, training=False):\n            for module in self.model.modules():\n                self.assertFalse(module.training)\n\n        for name, module in self.model.named_modules():\n            if \"backbone\" in name:\n                self.assertFalse(module.training)\n            else:\n                self.assertTrue(module.training)\n\n    def test_freeze_module(self):\n        model = nn.Sequential(\n            nn.Conv2d(3, 10, 1),\n            nn.BatchNorm2d(10),\n            nn.ReLU(),\n        )\n        data = torch.rand(1, 3, 10, 10)\n        model.train()\n        assert isinstance(model[1], nn.BatchNorm2d)\n        before_states = model[1].state_dict()\n        freeze_module(model[1])\n        model(data)\n        after_states = model[1].state_dict()\n        self.assertTrue(\n            all([torch.allclose(v, after_states[k]) for k, v in before_states.items()])\n        )\n\n        # yolox test\n        self.model.train()\n        for module in self.model.modules():\n            self.assertTrue(module.training)\n\n        freeze_module(self.model, \"backbone\")\n        for module in self.model.backbone.modules():\n            self.assertFalse(module.training)\n        for p in self.model.backbone.parameters():\n            self.assertFalse(p.requires_grad)\n\n        for module in self.model.head.modules():\n            self.assertTrue(module.training)\n        for p in self.model.head.parameters():\n            self.assertTrue(p.requires_grad)\n\n\nif __name__ == \"__main__\":\n    unittest.main()\n"
  },
  {
    "path": "tools/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n"
  },
  {
    "path": "tools/demo.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport os\nimport time\nfrom loguru import logger\n\nimport cv2\n\nimport torch\n\nfrom yolox.data.data_augment import ValTransform\nfrom yolox.data.datasets import COCO_CLASSES\nfrom yolox.exp import get_exp\nfrom yolox.utils import fuse_model, get_model_info, postprocess, vis\n\nIMAGE_EXT = [\".jpg\", \".jpeg\", \".webp\", \".bmp\", \".png\"]\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"YOLOX Demo!\")\n    parser.add_argument(\n        \"demo\", default=\"image\", help=\"demo type, eg. image, video and webcam\"\n    )\n    parser.add_argument(\"-expn\", \"--experiment-name\", type=str, default=None)\n    parser.add_argument(\"-n\", \"--name\", type=str, default=None, help=\"model name\")\n\n    parser.add_argument(\n        \"--path\", default=\"./assets/dog.jpg\", help=\"path to images or video\"\n    )\n    parser.add_argument(\"--camid\", type=int, default=0, help=\"webcam demo camera id\")\n    parser.add_argument(\n        \"--save_result\",\n        action=\"store_true\",\n        help=\"whether to save the inference result of image/video\",\n    )\n\n    # exp file\n    parser.add_argument(\n        \"-f\",\n        \"--exp_file\",\n        default=None,\n        type=str,\n        help=\"please input your experiment description file\",\n    )\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"ckpt for eval\")\n    parser.add_argument(\n        \"--device\",\n        default=\"cpu\",\n        type=str,\n        help=\"device to run our model, can either be cpu or gpu\",\n    )\n    parser.add_argument(\"--conf\", default=0.3, type=float, help=\"test conf\")\n    parser.add_argument(\"--nms\", default=0.3, type=float, help=\"test nms threshold\")\n    parser.add_argument(\"--tsize\", default=None, type=int, help=\"test img size\")\n    parser.add_argument(\n        \"--fp16\",\n        dest=\"fp16\",\n        default=False,\n        action=\"store_true\",\n        help=\"Adopting mix precision evaluating.\",\n    )\n    parser.add_argument(\n        \"--legacy\",\n        dest=\"legacy\",\n        default=False,\n        action=\"store_true\",\n        help=\"To be compatible with older versions\",\n    )\n    parser.add_argument(\n        \"--fuse\",\n        dest=\"fuse\",\n        default=False,\n        action=\"store_true\",\n        help=\"Fuse conv and bn for testing.\",\n    )\n    parser.add_argument(\n        \"--trt\",\n        dest=\"trt\",\n        default=False,\n        action=\"store_true\",\n        help=\"Using TensorRT model for testing.\",\n    )\n    return parser\n\n\ndef get_image_list(path):\n    image_names = []\n    for maindir, subdir, file_name_list in os.walk(path):\n        for filename in file_name_list:\n            apath = os.path.join(maindir, filename)\n            ext = os.path.splitext(apath)[1]\n            if ext in IMAGE_EXT:\n                image_names.append(apath)\n    return image_names\n\n\nclass Predictor(object):\n    def __init__(\n        self,\n        model,\n        exp,\n        cls_names=COCO_CLASSES,\n        trt_file=None,\n        decoder=None,\n        device=\"cpu\",\n        fp16=False,\n        legacy=False,\n    ):\n        self.model = model\n        self.cls_names = cls_names\n        self.decoder = decoder\n        self.num_classes = exp.num_classes\n        self.confthre = exp.test_conf\n        self.nmsthre = exp.nmsthre\n        self.test_size = exp.test_size\n        self.device = device\n        self.fp16 = fp16\n        self.preproc = ValTransform(legacy=legacy)\n        if trt_file is not None:\n            from torch2trt import TRTModule\n\n            model_trt = TRTModule()\n            model_trt.load_state_dict(torch.load(trt_file))\n\n            x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()\n            self.model(x)\n            self.model = model_trt\n\n    def inference(self, img):\n        img_info = {\"id\": 0}\n        if isinstance(img, str):\n            img_info[\"file_name\"] = os.path.basename(img)\n            img = cv2.imread(img)\n        else:\n            img_info[\"file_name\"] = None\n\n        height, width = img.shape[:2]\n        img_info[\"height\"] = height\n        img_info[\"width\"] = width\n        img_info[\"raw_img\"] = img\n\n        ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])\n        img_info[\"ratio\"] = ratio\n\n        img, _ = self.preproc(img, None, self.test_size)\n        img = torch.from_numpy(img).unsqueeze(0)\n        img = img.float()\n        if self.device == \"gpu\":\n            img = img.cuda()\n            if self.fp16:\n                img = img.half()  # to FP16\n\n        with torch.no_grad():\n            t0 = time.time()\n            outputs = self.model(img)\n            if self.decoder is not None:\n                outputs = self.decoder(outputs, dtype=outputs.type())\n            outputs = postprocess(\n                outputs, self.num_classes, self.confthre,\n                self.nmsthre, class_agnostic=True\n            )\n            logger.info(\"Infer time: {:.4f}s\".format(time.time() - t0))\n        return outputs, img_info\n\n    def visual(self, output, img_info, cls_conf=0.35):\n        ratio = img_info[\"ratio\"]\n        img = img_info[\"raw_img\"]\n        if output is None:\n            return img\n        output = output.cpu()\n\n        bboxes = output[:, 0:4]\n\n        # preprocessing: resize\n        bboxes /= ratio\n\n        cls = output[:, 6]\n        scores = output[:, 4] * output[:, 5]\n\n        vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)\n        return vis_res\n\n\ndef image_demo(predictor, vis_folder, path, current_time, save_result):\n    if os.path.isdir(path):\n        files = get_image_list(path)\n    else:\n        files = [path]\n    files.sort()\n    for image_name in files:\n        outputs, img_info = predictor.inference(image_name)\n        result_image = predictor.visual(outputs[0], img_info, predictor.confthre)\n        if save_result:\n            save_folder = os.path.join(\n                vis_folder, time.strftime(\"%Y_%m_%d_%H_%M_%S\", current_time)\n            )\n            os.makedirs(save_folder, exist_ok=True)\n            save_file_name = os.path.join(save_folder, os.path.basename(image_name))\n            logger.info(\"Saving detection result in {}\".format(save_file_name))\n            cv2.imwrite(save_file_name, result_image)\n        ch = cv2.waitKey(0)\n        if ch == 27 or ch == ord(\"q\") or ch == ord(\"Q\"):\n            break\n\n\ndef imageflow_demo(predictor, vis_folder, current_time, args):\n    cap = cv2.VideoCapture(args.path if args.demo == \"video\" else args.camid)\n    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float\n    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float\n    fps = cap.get(cv2.CAP_PROP_FPS)\n    if args.save_result:\n        save_folder = os.path.join(\n            vis_folder, time.strftime(\"%Y_%m_%d_%H_%M_%S\", current_time)\n        )\n        os.makedirs(save_folder, exist_ok=True)\n        if args.demo == \"video\":\n            save_path = os.path.join(save_folder, os.path.basename(args.path))\n        else:\n            save_path = os.path.join(save_folder, \"camera.mp4\")\n        logger.info(f\"video save_path is {save_path}\")\n        vid_writer = cv2.VideoWriter(\n            save_path, cv2.VideoWriter_fourcc(*\"mp4v\"), fps, (int(width), int(height))\n        )\n    while True:\n        ret_val, frame = cap.read()\n        if ret_val:\n            outputs, img_info = predictor.inference(frame)\n            result_frame = predictor.visual(outputs[0], img_info, predictor.confthre)\n            if args.save_result:\n                vid_writer.write(result_frame)\n            else:\n                cv2.namedWindow(\"yolox\", cv2.WINDOW_NORMAL)\n                cv2.imshow(\"yolox\", result_frame)\n            ch = cv2.waitKey(1)\n            if ch == 27 or ch == ord(\"q\") or ch == ord(\"Q\"):\n                break\n        else:\n            break\n\n\ndef main(exp, args):\n    if not args.experiment_name:\n        args.experiment_name = exp.exp_name\n\n    file_name = os.path.join(exp.output_dir, args.experiment_name)\n    os.makedirs(file_name, exist_ok=True)\n\n    vis_folder = None\n    if args.save_result:\n        vis_folder = os.path.join(file_name, \"vis_res\")\n        os.makedirs(vis_folder, exist_ok=True)\n\n    if args.trt:\n        args.device = \"gpu\"\n\n    logger.info(\"Args: {}\".format(args))\n\n    if args.conf is not None:\n        exp.test_conf = args.conf\n    if args.nms is not None:\n        exp.nmsthre = args.nms\n    if args.tsize is not None:\n        exp.test_size = (args.tsize, args.tsize)\n\n    model = exp.get_model()\n    logger.info(\"Model Summary: {}\".format(get_model_info(model, exp.test_size)))\n\n    if args.device == \"gpu\":\n        model.cuda()\n        if args.fp16:\n            model.half()  # to FP16\n    model.eval()\n\n    if not args.trt:\n        if args.ckpt is None:\n            ckpt_file = os.path.join(file_name, \"best_ckpt.pth\")\n        else:\n            ckpt_file = args.ckpt\n        logger.info(\"loading checkpoint\")\n        ckpt = torch.load(ckpt_file, map_location=\"cpu\")\n        # load the model state dict\n        model.load_state_dict(ckpt[\"model\"])\n        logger.info(\"loaded checkpoint done.\")\n\n    if args.fuse:\n        logger.info(\"\\tFusing model...\")\n        model = fuse_model(model)\n\n    if args.trt:\n        assert not args.fuse, \"TensorRT model is not support model fusing!\"\n        trt_file = os.path.join(file_name, \"model_trt.pth\")\n        assert os.path.exists(\n            trt_file\n        ), \"TensorRT model is not found!\\n Run python3 tools/trt.py first!\"\n        model.head.decode_in_inference = False\n        decoder = model.head.decode_outputs\n        logger.info(\"Using TensorRT to inference\")\n    else:\n        trt_file = None\n        decoder = None\n\n    predictor = Predictor(\n        model, exp, COCO_CLASSES, trt_file, decoder,\n        args.device, args.fp16, args.legacy,\n    )\n    current_time = time.localtime()\n    if args.demo == \"image\":\n        image_demo(predictor, vis_folder, args.path, current_time, args.save_result)\n    elif args.demo == \"video\" or args.demo == \"webcam\":\n        imageflow_demo(predictor, vis_folder, current_time, args)\n\n\nif __name__ == \"__main__\":\n    args = make_parser().parse_args()\n    exp = get_exp(args.exp_file, args.name)\n\n    main(exp, args)\n"
  },
  {
    "path": "tools/eval.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport os\nimport random\nimport warnings\nfrom loguru import logger\n\nimport torch\nimport torch.backends.cudnn as cudnn\nfrom torch.nn.parallel import DistributedDataParallel as DDP\n\nfrom yolox.core import launch\nfrom yolox.exp import get_exp\nfrom yolox.utils import (\n    configure_module,\n    configure_nccl,\n    fuse_model,\n    get_local_rank,\n    get_model_info,\n    setup_logger\n)\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"YOLOX Eval\")\n    parser.add_argument(\"-expn\", \"--experiment-name\", type=str, default=None)\n    parser.add_argument(\"-n\", \"--name\", type=str, default=None, help=\"model name\")\n\n    # distributed\n    parser.add_argument(\n        \"--dist-backend\", default=\"nccl\", type=str, help=\"distributed backend\"\n    )\n    parser.add_argument(\n        \"--dist-url\",\n        default=None,\n        type=str,\n        help=\"url used to set up distributed training\",\n    )\n    parser.add_argument(\"-b\", \"--batch-size\", type=int, default=64, help=\"batch size\")\n    parser.add_argument(\n        \"-d\", \"--devices\", default=None, type=int, help=\"device for training\"\n    )\n    parser.add_argument(\n        \"--num_machines\", default=1, type=int, help=\"num of node for training\"\n    )\n    parser.add_argument(\n        \"--machine_rank\", default=0, type=int, help=\"node rank for multi-node training\"\n    )\n    parser.add_argument(\n        \"-f\",\n        \"--exp_file\",\n        default=None,\n        type=str,\n        help=\"please input your experiment description file\",\n    )\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"ckpt for eval\")\n    parser.add_argument(\"--conf\", default=None, type=float, help=\"test conf\")\n    parser.add_argument(\"--nms\", default=None, type=float, help=\"test nms threshold\")\n    parser.add_argument(\"--tsize\", default=None, type=int, help=\"test img size\")\n    parser.add_argument(\"--seed\", default=None, type=int, help=\"eval seed\")\n    parser.add_argument(\n        \"--fp16\",\n        dest=\"fp16\",\n        default=False,\n        action=\"store_true\",\n        help=\"Adopting mix precision evaluating.\",\n    )\n    parser.add_argument(\n        \"--fuse\",\n        dest=\"fuse\",\n        default=False,\n        action=\"store_true\",\n        help=\"Fuse conv and bn for testing.\",\n    )\n    parser.add_argument(\n        \"--trt\",\n        dest=\"trt\",\n        default=False,\n        action=\"store_true\",\n        help=\"Using TensorRT model for testing.\",\n    )\n    parser.add_argument(\n        \"--legacy\",\n        dest=\"legacy\",\n        default=False,\n        action=\"store_true\",\n        help=\"To be compatible with older versions\",\n    )\n    parser.add_argument(\n        \"--test\",\n        dest=\"test\",\n        default=False,\n        action=\"store_true\",\n        help=\"Evaluating on test-dev set.\",\n    )\n    parser.add_argument(\n        \"--speed\",\n        dest=\"speed\",\n        default=False,\n        action=\"store_true\",\n        help=\"speed test only.\",\n    )\n    parser.add_argument(\n        \"opts\",\n        help=\"Modify config options using the command-line\",\n        default=None,\n        nargs=argparse.REMAINDER,\n    )\n    return parser\n\n\n@logger.catch\ndef main(exp, args, num_gpu):\n    if args.seed is not None:\n        random.seed(args.seed)\n        torch.manual_seed(args.seed)\n        cudnn.deterministic = True\n        warnings.warn(\n            \"You have chosen to seed testing. This will turn on the CUDNN deterministic setting, \"\n        )\n\n    is_distributed = num_gpu > 1\n\n    # set environment variables for distributed training\n    configure_nccl()\n    cudnn.benchmark = True\n\n    rank = get_local_rank()\n\n    file_name = os.path.join(exp.output_dir, args.experiment_name)\n\n    if rank == 0:\n        os.makedirs(file_name, exist_ok=True)\n\n    setup_logger(file_name, distributed_rank=rank, filename=\"val_log.txt\", mode=\"a\")\n    logger.info(\"Args: {}\".format(args))\n\n    if args.conf is not None:\n        exp.test_conf = args.conf\n    if args.nms is not None:\n        exp.nmsthre = args.nms\n    if args.tsize is not None:\n        exp.test_size = (args.tsize, args.tsize)\n\n    model = exp.get_model()\n    logger.info(\"Model Summary: {}\".format(get_model_info(model, exp.test_size)))\n    logger.info(\"Model Structure:\\n{}\".format(str(model)))\n\n    evaluator = exp.get_evaluator(args.batch_size, is_distributed, args.test, args.legacy)\n    evaluator.per_class_AP = True\n    evaluator.per_class_AR = True\n\n    torch.cuda.set_device(rank)\n    model.cuda(rank)\n    model.eval()\n\n    if not args.speed and not args.trt:\n        if args.ckpt is None:\n            ckpt_file = os.path.join(file_name, \"best_ckpt.pth\")\n        else:\n            ckpt_file = args.ckpt\n        logger.info(\"loading checkpoint from {}\".format(ckpt_file))\n        loc = \"cuda:{}\".format(rank)\n        ckpt = torch.load(ckpt_file, map_location=loc)\n        model.load_state_dict(ckpt[\"model\"])\n        logger.info(\"loaded checkpoint done.\")\n\n    if is_distributed:\n        model = DDP(model, device_ids=[rank])\n\n    if args.fuse:\n        logger.info(\"\\tFusing model...\")\n        model = fuse_model(model)\n\n    if args.trt:\n        assert (\n            not args.fuse and not is_distributed and args.batch_size == 1\n        ), \"TensorRT model is not support model fusing and distributed inferencing!\"\n        trt_file = os.path.join(file_name, \"model_trt.pth\")\n        assert os.path.exists(\n            trt_file\n        ), \"TensorRT model is not found!\\n Run tools/trt.py first!\"\n        model.head.decode_in_inference = False\n        decoder = model.head.decode_outputs\n    else:\n        trt_file = None\n        decoder = None\n\n    # start evaluate\n    *_, summary = evaluator.evaluate(\n        model, is_distributed, args.fp16, trt_file, decoder, exp.test_size\n    )\n    logger.info(\"\\n\" + summary)\n\n\nif __name__ == \"__main__\":\n    configure_module()\n    args = make_parser().parse_args()\n    exp = get_exp(args.exp_file, args.name)\n    exp.merge(args.opts)\n\n    if not args.experiment_name:\n        args.experiment_name = exp.exp_name\n\n    num_gpu = torch.cuda.device_count() if args.devices is None else args.devices\n    assert num_gpu <= torch.cuda.device_count()\n\n    dist_url = \"auto\" if args.dist_url is None else args.dist_url\n    launch(\n        main,\n        num_gpu,\n        args.num_machines,\n        args.machine_rank,\n        backend=args.dist_backend,\n        dist_url=dist_url,\n        args=(exp, args, num_gpu),\n    )\n"
  },
  {
    "path": "tools/export_onnx.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport os\nfrom loguru import logger\n\nimport torch\nfrom torch import nn\n\nfrom yolox.exp import get_exp\nfrom yolox.models.network_blocks import SiLU\nfrom yolox.utils import replace_module\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"YOLOX onnx deploy\")\n    parser.add_argument(\n        \"--output-name\", type=str, default=\"yolox.onnx\", help=\"output name of models\"\n    )\n    parser.add_argument(\n        \"--input\", default=\"images\", type=str, help=\"input node name of onnx model\"\n    )\n    parser.add_argument(\n        \"--output\", default=\"output\", type=str, help=\"output node name of onnx model\"\n    )\n    parser.add_argument(\n        \"-o\", \"--opset\", default=11, type=int, help=\"onnx opset version\"\n    )\n    parser.add_argument(\"--batch-size\", type=int, default=1, help=\"batch size\")\n    parser.add_argument(\n        \"--dynamic\", action=\"store_true\", help=\"whether the input shape should be dynamic or not\"\n    )\n    parser.add_argument(\"--no-onnxsim\", action=\"store_true\", help=\"use onnxsim or not\")\n    parser.add_argument(\n        \"-f\",\n        \"--exp_file\",\n        default=None,\n        type=str,\n        help=\"experiment description file\",\n    )\n    parser.add_argument(\"-expn\", \"--experiment-name\", type=str, default=None)\n    parser.add_argument(\"-n\", \"--name\", type=str, default=None, help=\"model name\")\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"ckpt path\")\n    parser.add_argument(\n        \"opts\",\n        help=\"Modify config options using the command-line\",\n        default=None,\n        nargs=argparse.REMAINDER,\n    )\n    parser.add_argument(\n        \"--decode_in_inference\",\n        action=\"store_true\",\n        help=\"decode in inference or not\"\n    )\n\n    return parser\n\n\n@logger.catch\ndef main():\n    args = make_parser().parse_args()\n    logger.info(\"args value: {}\".format(args))\n    exp = get_exp(args.exp_file, args.name)\n    exp.merge(args.opts)\n\n    if not args.experiment_name:\n        args.experiment_name = exp.exp_name\n\n    model = exp.get_model()\n    if args.ckpt is None:\n        file_name = os.path.join(exp.output_dir, args.experiment_name)\n        ckpt_file = os.path.join(file_name, \"best_ckpt.pth\")\n    else:\n        ckpt_file = args.ckpt\n\n    # load the model state dict\n    ckpt = torch.load(ckpt_file, map_location=\"cpu\")\n\n    model.eval()\n    if \"model\" in ckpt:\n        ckpt = ckpt[\"model\"]\n    model.load_state_dict(ckpt)\n    model = replace_module(model, nn.SiLU, SiLU)\n    model.head.decode_in_inference = args.decode_in_inference\n\n    logger.info(\"loading checkpoint done.\")\n    dummy_input = torch.randn(args.batch_size, 3, exp.test_size[0], exp.test_size[1])\n\n    torch.onnx._export(\n        model,\n        dummy_input,\n        args.output_name,\n        input_names=[args.input],\n        output_names=[args.output],\n        dynamic_axes={args.input: {0: 'batch'},\n                      args.output: {0: 'batch'}} if args.dynamic else None,\n        opset_version=args.opset,\n    )\n    logger.info(\"generated onnx model named {}\".format(args.output_name))\n\n    if not args.no_onnxsim:\n        import onnx\n        from onnxsim import simplify\n\n        # use onnx-simplifier to reduce reduent model.\n        onnx_model = onnx.load(args.output_name)\n        model_simp, check = simplify(onnx_model)\n        assert check, \"Simplified ONNX model could not be validated\"\n        onnx.save(model_simp, args.output_name)\n        logger.info(\"generated simplified onnx model named {}\".format(args.output_name))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/export_torchscript.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport os\nfrom loguru import logger\n\nimport torch\n\nfrom yolox.exp import get_exp\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"YOLOX torchscript deploy\")\n    parser.add_argument(\n        \"--output-name\", type=str, default=\"yolox.torchscript.pt\", help=\"output name of models\"\n    )\n    parser.add_argument(\"--batch-size\", type=int, default=1, help=\"batch size\")\n    parser.add_argument(\n        \"-f\",\n        \"--exp_file\",\n        default=None,\n        type=str,\n        help=\"experiment description file\",\n    )\n    parser.add_argument(\"-expn\", \"--experiment-name\", type=str, default=None)\n    parser.add_argument(\"-n\", \"--name\", type=str, default=None, help=\"model name\")\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"ckpt path\")\n    parser.add_argument(\n        \"--decode_in_inference\",\n        action=\"store_true\",\n        help=\"decode in inference or not\"\n    )\n    parser.add_argument(\n        \"opts\",\n        help=\"Modify config options using the command-line\",\n        default=None,\n        nargs=argparse.REMAINDER,\n    )\n\n    return parser\n\n\n@logger.catch\ndef main():\n    args = make_parser().parse_args()\n    logger.info(\"args value: {}\".format(args))\n    exp = get_exp(args.exp_file, args.name)\n    exp.merge(args.opts)\n\n    if not args.experiment_name:\n        args.experiment_name = exp.exp_name\n\n    model = exp.get_model()\n    if args.ckpt is None:\n        file_name = os.path.join(exp.output_dir, args.experiment_name)\n        ckpt_file = os.path.join(file_name, \"best_ckpt.pth\")\n    else:\n        ckpt_file = args.ckpt\n\n    # load the model state dict\n    ckpt = torch.load(ckpt_file, map_location=\"cpu\")\n\n    model.eval()\n    if \"model\" in ckpt:\n        ckpt = ckpt[\"model\"]\n    model.load_state_dict(ckpt)\n    model.head.decode_in_inference = args.decode_in_inference\n\n    logger.info(\"loading checkpoint done.\")\n    dummy_input = torch.randn(args.batch_size, 3, exp.test_size[0], exp.test_size[1])\n\n    mod = torch.jit.trace(model, dummy_input)\n    mod.save(args.output_name)\n    logger.info(\"generated torchscript model named {}\".format(args.output_name))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/train.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport random\nimport warnings\nfrom loguru import logger\n\nimport torch\nimport torch.backends.cudnn as cudnn\n\nfrom yolox.core import launch\nfrom yolox.exp import Exp, check_exp_value, get_exp\nfrom yolox.utils import configure_module, configure_nccl, configure_omp, get_num_devices\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"YOLOX train parser\")\n    parser.add_argument(\"-expn\", \"--experiment-name\", type=str, default=None)\n    parser.add_argument(\"-n\", \"--name\", type=str, default=None, help=\"model name\")\n\n    # distributed\n    parser.add_argument(\n        \"--dist-backend\", default=\"nccl\", type=str, help=\"distributed backend\"\n    )\n    parser.add_argument(\n        \"--dist-url\",\n        default=None,\n        type=str,\n        help=\"url used to set up distributed training\",\n    )\n    parser.add_argument(\"-b\", \"--batch-size\", type=int, default=64, help=\"batch size\")\n    parser.add_argument(\n        \"-d\", \"--devices\", default=None, type=int, help=\"device for training\"\n    )\n    parser.add_argument(\n        \"-f\",\n        \"--exp_file\",\n        default=None,\n        type=str,\n        help=\"plz input your experiment description file\",\n    )\n    parser.add_argument(\n        \"--resume\", default=False, action=\"store_true\", help=\"resume training\"\n    )\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"checkpoint file\")\n    parser.add_argument(\n        \"-e\",\n        \"--start_epoch\",\n        default=None,\n        type=int,\n        help=\"resume training start epoch\",\n    )\n    parser.add_argument(\n        \"--num_machines\", default=1, type=int, help=\"num of node for training\"\n    )\n    parser.add_argument(\n        \"--machine_rank\", default=0, type=int, help=\"node rank for multi-node training\"\n    )\n    parser.add_argument(\n        \"--fp16\",\n        dest=\"fp16\",\n        default=False,\n        action=\"store_true\",\n        help=\"Adopting mix precision training.\",\n    )\n    parser.add_argument(\n        \"--cache\",\n        type=str,\n        nargs=\"?\",\n        const=\"ram\",\n        help=\"Caching imgs to ram/disk for fast training.\",\n    )\n    parser.add_argument(\n        \"-o\",\n        \"--occupy\",\n        dest=\"occupy\",\n        default=False,\n        action=\"store_true\",\n        help=\"occupy GPU memory first for training.\",\n    )\n    parser.add_argument(\n        \"-l\",\n        \"--logger\",\n        type=str,\n        help=\"Logger to be used for metrics. \\\n                Implemented loggers include `tensorboard`, `mlflow` and `wandb`.\",\n        default=\"tensorboard\"\n    )\n    parser.add_argument(\n        \"opts\",\n        help=\"Modify config options using the command-line\",\n        default=None,\n        nargs=argparse.REMAINDER,\n    )\n    return parser\n\n\n@logger.catch\ndef main(exp: Exp, args):\n    if exp.seed is not None:\n        random.seed(exp.seed)\n        torch.manual_seed(exp.seed)\n        cudnn.deterministic = True\n        warnings.warn(\n            \"You have chosen to seed training. This will turn on the CUDNN deterministic setting, \"\n            \"which can slow down your training considerably! You may see unexpected behavior \"\n            \"when restarting from checkpoints.\"\n        )\n\n    # set environment variables for distributed training\n    configure_nccl()\n    configure_omp()\n    cudnn.benchmark = True\n\n    trainer = exp.get_trainer(args)\n    trainer.train()\n\n\nif __name__ == \"__main__\":\n    configure_module()\n    args = make_parser().parse_args()\n    exp = get_exp(args.exp_file, args.name)\n    exp.merge(args.opts)\n    check_exp_value(exp)\n\n    if not args.experiment_name:\n        args.experiment_name = exp.exp_name\n\n    num_gpu = get_num_devices() if args.devices is None else args.devices\n    assert num_gpu <= get_num_devices()\n\n    if args.cache is not None:\n        exp.dataset = exp.get_dataset(cache=True, cache_type=args.cache)\n\n    dist_url = \"auto\" if args.dist_url is None else args.dist_url\n    launch(\n        main,\n        num_gpu,\n        args.num_machines,\n        args.machine_rank,\n        backend=args.dist_backend,\n        dist_url=dist_url,\n        args=(exp, args),\n    )\n"
  },
  {
    "path": "tools/trt.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport argparse\nimport os\nimport shutil\nfrom loguru import logger\n\nimport tensorrt as trt\nimport torch\nfrom torch2trt import torch2trt\n\nfrom yolox.exp import get_exp\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"YOLOX ncnn deploy\")\n    parser.add_argument(\"-expn\", \"--experiment-name\", type=str, default=None)\n    parser.add_argument(\"-n\", \"--name\", type=str, default=None, help=\"model name\")\n\n    parser.add_argument(\n        \"-f\",\n        \"--exp_file\",\n        default=None,\n        type=str,\n        help=\"please input your experiment description file\",\n    )\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"ckpt path\")\n    parser.add_argument(\n        \"-w\", '--workspace', type=int, default=32, help='max workspace size in detect'\n    )\n    parser.add_argument(\"-b\", '--batch', type=int, default=1, help='max batch size in detect')\n    return parser\n\n\n@logger.catch\n@torch.no_grad()\ndef main():\n    args = make_parser().parse_args()\n    exp = get_exp(args.exp_file, args.name)\n    if not args.experiment_name:\n        args.experiment_name = exp.exp_name\n\n    model = exp.get_model()\n    file_name = os.path.join(exp.output_dir, args.experiment_name)\n    os.makedirs(file_name, exist_ok=True)\n    if args.ckpt is None:\n        ckpt_file = os.path.join(file_name, \"best_ckpt.pth\")\n    else:\n        ckpt_file = args.ckpt\n\n    ckpt = torch.load(ckpt_file, map_location=\"cpu\")\n    # load the model state dict\n\n    model.load_state_dict(ckpt[\"model\"])\n    logger.info(\"loaded checkpoint done.\")\n    model.eval()\n    model.cuda()\n    model.head.decode_in_inference = False\n    x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda()\n    model_trt = torch2trt(\n        model,\n        [x],\n        fp16_mode=True,\n        log_level=trt.Logger.INFO,\n        max_workspace_size=(1 << args.workspace),\n        max_batch_size=args.batch,\n    )\n    torch.save(model_trt.state_dict(), os.path.join(file_name, \"model_trt.pth\"))\n    logger.info(\"Converted TensorRT model done.\")\n    engine_file = os.path.join(file_name, \"model_trt.engine\")\n    engine_file_demo = os.path.join(\"demo\", \"TensorRT\", \"cpp\", \"model_trt.engine\")\n    with open(engine_file, \"wb\") as f:\n        f.write(model_trt.engine.serialize())\n\n    shutil.copyfile(engine_file, engine_file_demo)\n\n    logger.info(\"Converted TensorRT model engine file is saved for C++ inference.\")\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "tools/visualize_assign.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\nimport sys\nimport random\nimport time\nimport warnings\nfrom loguru import logger\n\nimport torch\nimport torch.backends.cudnn as cudnn\n\nfrom yolox.exp import Exp, get_exp\nfrom yolox.core import Trainer\nfrom yolox.utils import configure_module, configure_omp\nfrom yolox.tools.train import make_parser\n\n\nclass AssignVisualizer(Trainer):\n\n    def __init__(self, exp: Exp, args):\n        super().__init__(exp, args)\n        self.batch_cnt = 0\n        self.vis_dir = os.path.join(self.file_name, \"vis\")\n        os.makedirs(self.vis_dir, exist_ok=True)\n\n    def train_one_iter(self):\n        iter_start_time = time.time()\n\n        inps, targets = self.prefetcher.next()\n        inps = inps.to(self.data_type)\n        targets = targets.to(self.data_type)\n        targets.requires_grad = False\n        inps, targets = self.exp.preprocess(inps, targets, self.input_size)\n        data_end_time = time.time()\n\n        with torch.cuda.amp.autocast(enabled=self.amp_training):\n            path_prefix = os.path.join(self.vis_dir, f\"assign_vis_{self.batch_cnt}_\")\n            self.model.visualize(inps, targets, path_prefix)\n\n        if self.use_model_ema:\n            self.ema_model.update(self.model)\n\n        iter_end_time = time.time()\n        self.meter.update(\n            iter_time=iter_end_time - iter_start_time,\n            data_time=data_end_time - iter_start_time,\n        )\n        self.batch_cnt += 1\n        if self.batch_cnt >= self.args.max_batch:\n            sys.exit(0)\n\n    def after_train(self):\n        logger.info(\"Finish visualize assignment, exit...\")\n\n\ndef assign_vis_parser():\n    parser = make_parser()\n    parser.add_argument(\"--max-batch\", type=int, default=1, help=\"max batch of images to visualize\")\n    return parser\n\n\n@logger.catch\ndef main(exp: Exp, args):\n    if exp.seed is not None:\n        random.seed(exp.seed)\n        torch.manual_seed(exp.seed)\n        cudnn.deterministic = True\n        warnings.warn(\n            \"You have chosen to seed training. This will turn on the CUDNN deterministic setting, \"\n            \"which can slow down your training considerably! You may see unexpected behavior \"\n            \"when restarting from checkpoints.\"\n        )\n\n    # set environment variables for distributed training\n    configure_omp()\n    cudnn.benchmark = True\n\n    visualizer = AssignVisualizer(exp, args)\n    visualizer.train()\n\n\nif __name__ == \"__main__\":\n    configure_module()\n    args = assign_vis_parser().parse_args()\n    exp = get_exp(args.exp_file, args.name)\n    exp.merge(args.opts)\n\n    if not args.experiment_name:\n        args.experiment_name = exp.exp_name\n\n    main(exp, args)\n"
  },
  {
    "path": "yolox/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n\n__version__ = \"0.3.0\"\n"
  },
  {
    "path": "yolox/core/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nfrom .launch import launch\nfrom .trainer import Trainer\n"
  },
  {
    "path": "yolox/core/launch.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Code are based on\n# https://github.com/facebookresearch/detectron2/blob/master/detectron2/engine/launch.py\n# Copyright (c) Facebook, Inc. and its affiliates.\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport sys\nfrom datetime import timedelta\nfrom loguru import logger\n\nimport torch\nimport torch.distributed as dist\nimport torch.multiprocessing as mp\n\nimport yolox.utils.dist as comm\n\n__all__ = [\"launch\"]\n\n\nDEFAULT_TIMEOUT = timedelta(minutes=30)\n\n\ndef _find_free_port():\n    \"\"\"\n    Find an available port of current machine / node.\n    \"\"\"\n    import socket\n\n    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n    # Binding to port 0 will cause the OS to find an available port for us\n    sock.bind((\"\", 0))\n    port = sock.getsockname()[1]\n    sock.close()\n    # NOTE: there is still a chance the port could be taken by other processes.\n    return port\n\n\ndef launch(\n    main_func,\n    num_gpus_per_machine,\n    num_machines=1,\n    machine_rank=0,\n    backend=\"nccl\",\n    dist_url=None,\n    args=(),\n    timeout=DEFAULT_TIMEOUT,\n):\n    \"\"\"\n    Args:\n        main_func: a function that will be called by `main_func(*args)`\n        num_machines (int): the total number of machines\n        machine_rank (int): the rank of this machine (one per machine)\n        dist_url (str): url to connect to for distributed training, including protocol\n                       e.g. \"tcp://127.0.0.1:8686\".\n                       Can be set to auto to automatically select a free port on localhost\n        args (tuple): arguments passed to main_func\n    \"\"\"\n    world_size = num_machines * num_gpus_per_machine\n    if world_size > 1:\n        # https://github.com/pytorch/pytorch/pull/14391\n        # TODO prctl in spawned processes\n\n        if dist_url == \"auto\":\n            assert (\n                num_machines == 1\n            ), \"dist_url=auto cannot work with distributed training.\"\n            port = _find_free_port()\n            dist_url = f\"tcp://127.0.0.1:{port}\"\n\n        start_method = \"spawn\"\n        cache = vars(args[1]).get(\"cache\", False)\n\n        # To use numpy memmap for caching image into RAM, we have to use fork method\n        if cache:\n            assert sys.platform != \"win32\", (\n                \"As Windows platform doesn't support fork method, \"\n                \"do not add --cache in your training command.\"\n            )\n            start_method = \"fork\"\n\n        mp.start_processes(\n            _distributed_worker,\n            nprocs=num_gpus_per_machine,\n            args=(\n                main_func,\n                world_size,\n                num_gpus_per_machine,\n                machine_rank,\n                backend,\n                dist_url,\n                args,\n            ),\n            daemon=False,\n            start_method=start_method,\n        )\n    else:\n        main_func(*args)\n\n\ndef _distributed_worker(\n    local_rank,\n    main_func,\n    world_size,\n    num_gpus_per_machine,\n    machine_rank,\n    backend,\n    dist_url,\n    args,\n    timeout=DEFAULT_TIMEOUT,\n):\n    assert (\n        torch.cuda.is_available()\n    ), \"cuda is not available. Please check your installation.\"\n    global_rank = machine_rank * num_gpus_per_machine + local_rank\n    logger.info(\"Rank {} initialization finished.\".format(global_rank))\n    try:\n        dist.init_process_group(\n            backend=backend,\n            init_method=dist_url,\n            world_size=world_size,\n            rank=global_rank,\n            timeout=timeout,\n        )\n    except Exception:\n        logger.error(\"Process group URL: {}\".format(dist_url))\n        raise\n\n    # Setup the local process group (which contains ranks within the same machine)\n    assert comm._LOCAL_PROCESS_GROUP is None\n    num_machines = world_size // num_gpus_per_machine\n    for i in range(num_machines):\n        ranks_on_i = list(\n            range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)\n        )\n        pg = dist.new_group(ranks_on_i)\n        if i == machine_rank:\n            comm._LOCAL_PROCESS_GROUP = pg\n\n    # synchronize is needed here to prevent a possible timeout after calling init_process_group\n    # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172\n    comm.synchronize()\n\n    assert num_gpus_per_machine <= torch.cuda.device_count()\n    torch.cuda.set_device(local_rank)\n\n    main_func(*args)\n"
  },
  {
    "path": "yolox/core/trainer.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport datetime\nimport os\nimport time\nfrom loguru import logger\n\nimport torch\nfrom torch.nn.parallel import DistributedDataParallel as DDP\nfrom torch.utils.tensorboard import SummaryWriter\n\nfrom yolox.data import DataPrefetcher\nfrom yolox.exp import Exp\nfrom yolox.utils import (\n    MeterBuffer,\n    MlflowLogger,\n    ModelEMA,\n    WandbLogger,\n    adjust_status,\n    all_reduce_norm,\n    get_local_rank,\n    get_model_info,\n    get_rank,\n    get_world_size,\n    gpu_mem_usage,\n    is_parallel,\n    load_ckpt,\n    mem_usage,\n    occupy_mem,\n    save_checkpoint,\n    setup_logger,\n    synchronize\n)\n\n\nclass Trainer:\n    def __init__(self, exp: Exp, args):\n        # init function only defines some basic attr, other attrs like model, optimizer are built in\n        # before_train methods.\n        self.exp = exp\n        self.args = args\n\n        # training related attr\n        self.max_epoch = exp.max_epoch\n        self.amp_training = args.fp16\n        self.scaler = torch.cuda.amp.GradScaler(enabled=args.fp16)\n        self.is_distributed = get_world_size() > 1\n        self.rank = get_rank()\n        self.local_rank = get_local_rank()\n        self.device = \"cuda:{}\".format(self.local_rank)\n        self.use_model_ema = exp.ema\n        self.save_history_ckpt = exp.save_history_ckpt\n\n        # data/dataloader related attr\n        self.data_type = torch.float16 if args.fp16 else torch.float32\n        self.input_size = exp.input_size\n        self.best_ap = 0\n\n        # metric record\n        self.meter = MeterBuffer(window_size=exp.print_interval)\n        self.file_name = os.path.join(exp.output_dir, args.experiment_name)\n\n        if self.rank == 0:\n            os.makedirs(self.file_name, exist_ok=True)\n\n        setup_logger(\n            self.file_name,\n            distributed_rank=self.rank,\n            filename=\"train_log.txt\",\n            mode=\"a\",\n        )\n\n    def train(self):\n        self.before_train()\n        try:\n            self.train_in_epoch()\n        except Exception as e:\n            logger.error(\"Exception in training: \", e)\n            raise\n        finally:\n            self.after_train()\n\n    def train_in_epoch(self):\n        for self.epoch in range(self.start_epoch, self.max_epoch):\n            self.before_epoch()\n            self.train_in_iter()\n            self.after_epoch()\n\n    def train_in_iter(self):\n        for self.iter in range(self.max_iter):\n            self.before_iter()\n            self.train_one_iter()\n            self.after_iter()\n\n    def train_one_iter(self):\n        iter_start_time = time.time()\n\n        inps, targets = self.prefetcher.next()\n        inps = inps.to(self.data_type)\n        targets = targets.to(self.data_type)\n        targets.requires_grad = False\n        inps, targets = self.exp.preprocess(inps, targets, self.input_size)\n        data_end_time = time.time()\n\n        with torch.cuda.amp.autocast(enabled=self.amp_training):\n            outputs = self.model(inps, targets)\n\n        loss = outputs[\"total_loss\"]\n\n        self.optimizer.zero_grad()\n        self.scaler.scale(loss).backward()\n        self.scaler.step(self.optimizer)\n        self.scaler.update()\n\n        if self.use_model_ema:\n            self.ema_model.update(self.model)\n\n        lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1)\n        for param_group in self.optimizer.param_groups:\n            param_group[\"lr\"] = lr\n\n        iter_end_time = time.time()\n        self.meter.update(\n            iter_time=iter_end_time - iter_start_time,\n            data_time=data_end_time - iter_start_time,\n            lr=lr,\n            **outputs,\n        )\n\n    def before_train(self):\n        logger.info(\"args: {}\".format(self.args))\n        logger.info(\"exp value:\\n{}\".format(self.exp))\n\n        # model related init\n        torch.cuda.set_device(self.local_rank)\n        model = self.exp.get_model()\n        logger.info(\n            \"Model Summary: {}\".format(get_model_info(model, self.exp.test_size))\n        )\n        model.to(self.device)\n\n        # solver related init\n        self.optimizer = self.exp.get_optimizer(self.args.batch_size)\n\n        # value of epoch will be set in `resume_train`\n        model = self.resume_train(model)\n\n        # data related init\n        self.no_aug = self.start_epoch >= self.max_epoch - self.exp.no_aug_epochs\n        self.train_loader = self.exp.get_data_loader(\n            batch_size=self.args.batch_size,\n            is_distributed=self.is_distributed,\n            no_aug=self.no_aug,\n            cache_img=self.args.cache,\n        )\n        logger.info(\"init prefetcher, this might take one minute or less...\")\n        self.prefetcher = DataPrefetcher(self.train_loader)\n        # max_iter means iters per epoch\n        self.max_iter = len(self.train_loader)\n\n        self.lr_scheduler = self.exp.get_lr_scheduler(\n            self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter\n        )\n        if self.args.occupy:\n            occupy_mem(self.local_rank)\n\n        if self.is_distributed:\n            model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False)\n\n        if self.use_model_ema:\n            self.ema_model = ModelEMA(model, 0.9998)\n            self.ema_model.updates = self.max_iter * self.start_epoch\n\n        self.model = model\n\n        self.evaluator = self.exp.get_evaluator(\n            batch_size=self.args.batch_size, is_distributed=self.is_distributed\n        )\n        # Tensorboard and Wandb loggers\n        if self.rank == 0:\n            if self.args.logger == \"tensorboard\":\n                self.tblogger = SummaryWriter(os.path.join(self.file_name, \"tensorboard\"))\n            elif self.args.logger == \"wandb\":\n                self.wandb_logger = WandbLogger.initialize_wandb_logger(\n                    self.args,\n                    self.exp,\n                    self.evaluator.dataloader.dataset\n                )\n            elif self.args.logger == \"mlflow\":\n                self.mlflow_logger = MlflowLogger()\n                self.mlflow_logger.setup(args=self.args, exp=self.exp)\n            else:\n                raise ValueError(\"logger must be either 'tensorboard', 'mlflow' or 'wandb'\")\n\n        logger.info(\"Training start...\")\n        logger.info(\"\\n{}\".format(model))\n\n    def after_train(self):\n        logger.info(\n            \"Training of experiment is done and the best AP is {:.2f}\".format(self.best_ap * 100)\n        )\n        if self.rank == 0:\n            if self.args.logger == \"wandb\":\n                self.wandb_logger.finish()\n            elif self.args.logger == \"mlflow\":\n                metadata = {\n                    \"epoch\": self.epoch + 1,\n                    \"input_size\": self.input_size,\n                    'start_ckpt': self.args.ckpt,\n                    'exp_file': self.args.exp_file,\n                    \"best_ap\": float(self.best_ap)\n                }\n                self.mlflow_logger.on_train_end(self.args, file_name=self.file_name,\n                                                metadata=metadata)\n\n    def before_epoch(self):\n        logger.info(\"---> start train epoch{}\".format(self.epoch + 1))\n\n        if self.epoch + 1 == self.max_epoch - self.exp.no_aug_epochs or self.no_aug:\n            logger.info(\"--->No mosaic aug now!\")\n            self.train_loader.close_mosaic()\n            logger.info(\"--->Add additional L1 loss now!\")\n            if self.is_distributed:\n                self.model.module.head.use_l1 = True\n            else:\n                self.model.head.use_l1 = True\n            self.exp.eval_interval = 1\n            if not self.no_aug:\n                self.save_ckpt(ckpt_name=\"last_mosaic_epoch\")\n\n    def after_epoch(self):\n        self.save_ckpt(ckpt_name=\"latest\")\n\n        if (self.epoch + 1) % self.exp.eval_interval == 0:\n            all_reduce_norm(self.model)\n            self.evaluate_and_save_model()\n\n    def before_iter(self):\n        pass\n\n    def after_iter(self):\n        \"\"\"\n        `after_iter` contains two parts of logic:\n            * log information\n            * reset setting of resize\n        \"\"\"\n        # log needed information\n        if (self.iter + 1) % self.exp.print_interval == 0:\n            # TODO check ETA logic\n            left_iters = self.max_iter * self.max_epoch - (self.progress_in_iter + 1)\n            eta_seconds = self.meter[\"iter_time\"].global_avg * left_iters\n            eta_str = \"ETA: {}\".format(datetime.timedelta(seconds=int(eta_seconds)))\n\n            progress_str = \"epoch: {}/{}, iter: {}/{}\".format(\n                self.epoch + 1, self.max_epoch, self.iter + 1, self.max_iter\n            )\n            loss_meter = self.meter.get_filtered_meter(\"loss\")\n            loss_str = \", \".join(\n                [\"{}: {:.1f}\".format(k, v.latest) for k, v in loss_meter.items()]\n            )\n\n            time_meter = self.meter.get_filtered_meter(\"time\")\n            time_str = \", \".join(\n                [\"{}: {:.3f}s\".format(k, v.avg) for k, v in time_meter.items()]\n            )\n\n            mem_str = \"gpu mem: {:.0f}Mb, mem: {:.1f}Gb\".format(gpu_mem_usage(), mem_usage())\n\n            logger.info(\n                \"{}, {}, {}, {}, lr: {:.3e}\".format(\n                    progress_str,\n                    mem_str,\n                    time_str,\n                    loss_str,\n                    self.meter[\"lr\"].latest,\n                )\n                + (\", size: {:d}, {}\".format(self.input_size[0], eta_str))\n            )\n\n            if self.rank == 0:\n                if self.args.logger == \"tensorboard\":\n                    self.tblogger.add_scalar(\n                        \"train/lr\", self.meter[\"lr\"].latest, self.progress_in_iter)\n                    for k, v in loss_meter.items():\n                        self.tblogger.add_scalar(\n                            f\"train/{k}\", v.latest, self.progress_in_iter)\n                if self.args.logger == \"wandb\":\n                    metrics = {\"train/\" + k: v.latest for k, v in loss_meter.items()}\n                    metrics.update({\n                        \"train/lr\": self.meter[\"lr\"].latest\n                    })\n                    self.wandb_logger.log_metrics(metrics, step=self.progress_in_iter)\n                if self.args.logger == 'mlflow':\n                    logs = {\"train/\" + k: v.latest for k, v in loss_meter.items()}\n                    logs.update({\"train/lr\": self.meter[\"lr\"].latest})\n                    self.mlflow_logger.on_log(self.args, self.exp, self.epoch+1, logs)\n\n            self.meter.clear_meters()\n\n        # random resizing\n        if (self.progress_in_iter + 1) % 10 == 0:\n            self.input_size = self.exp.random_resize(\n                self.train_loader, self.epoch, self.rank, self.is_distributed\n            )\n\n    @property\n    def progress_in_iter(self):\n        return self.epoch * self.max_iter + self.iter\n\n    def resume_train(self, model):\n        if self.args.resume:\n            logger.info(\"resume training\")\n            if self.args.ckpt is None:\n                ckpt_file = os.path.join(self.file_name, \"latest\" + \"_ckpt.pth\")\n            else:\n                ckpt_file = self.args.ckpt\n\n            ckpt = torch.load(ckpt_file, map_location=self.device)\n            # resume the model/optimizer state dict\n            model.load_state_dict(ckpt[\"model\"])\n            self.optimizer.load_state_dict(ckpt[\"optimizer\"])\n            self.best_ap = ckpt.pop(\"best_ap\", 0)\n            # resume the training states variables\n            start_epoch = (\n                self.args.start_epoch - 1\n                if self.args.start_epoch is not None\n                else ckpt[\"start_epoch\"]\n            )\n            self.start_epoch = start_epoch\n            logger.info(\n                \"loaded checkpoint '{}' (epoch {})\".format(\n                    self.args.resume, self.start_epoch\n                )\n            )  # noqa\n        else:\n            if self.args.ckpt is not None:\n                logger.info(\"loading checkpoint for fine tuning\")\n                ckpt_file = self.args.ckpt\n                ckpt = torch.load(ckpt_file, map_location=self.device)[\"model\"]\n                model = load_ckpt(model, ckpt)\n            self.start_epoch = 0\n\n        return model\n\n    def evaluate_and_save_model(self):\n        if self.use_model_ema:\n            evalmodel = self.ema_model.ema\n        else:\n            evalmodel = self.model\n            if is_parallel(evalmodel):\n                evalmodel = evalmodel.module\n\n        with adjust_status(evalmodel, training=False):\n            (ap50_95, ap50, summary), predictions = self.exp.eval(\n                evalmodel, self.evaluator, self.is_distributed, return_outputs=True\n            )\n\n        update_best_ckpt = ap50_95 > self.best_ap\n        self.best_ap = max(self.best_ap, ap50_95)\n\n        if self.rank == 0:\n            if self.args.logger == \"tensorboard\":\n                self.tblogger.add_scalar(\"val/COCOAP50\", ap50, self.epoch + 1)\n                self.tblogger.add_scalar(\"val/COCOAP50_95\", ap50_95, self.epoch + 1)\n            if self.args.logger == \"wandb\":\n                self.wandb_logger.log_metrics({\n                    \"val/COCOAP50\": ap50,\n                    \"val/COCOAP50_95\": ap50_95,\n                    \"train/epoch\": self.epoch + 1,\n                })\n                self.wandb_logger.log_images(predictions)\n            if self.args.logger == \"mlflow\":\n                logs = {\n                    \"val/COCOAP50\": ap50,\n                    \"val/COCOAP50_95\": ap50_95,\n                    \"val/best_ap\": round(self.best_ap, 3),\n                    \"train/epoch\": self.epoch + 1,\n                }\n                self.mlflow_logger.on_log(self.args, self.exp, self.epoch+1, logs)\n            logger.info(\"\\n\" + summary)\n        synchronize()\n\n        self.save_ckpt(\"last_epoch\", update_best_ckpt, ap=ap50_95)\n        if self.save_history_ckpt:\n            self.save_ckpt(f\"epoch_{self.epoch + 1}\", ap=ap50_95)\n\n        if self.args.logger == \"mlflow\":\n            metadata = {\n                    \"epoch\": self.epoch + 1,\n                    \"input_size\": self.input_size,\n                    'start_ckpt': self.args.ckpt,\n                    'exp_file': self.args.exp_file,\n                    \"best_ap\": float(self.best_ap)\n                }\n            self.mlflow_logger.save_checkpoints(self.args, self.exp, self.file_name, self.epoch,\n                                                metadata, update_best_ckpt)\n\n    def save_ckpt(self, ckpt_name, update_best_ckpt=False, ap=None):\n        if self.rank == 0:\n            save_model = self.ema_model.ema if self.use_model_ema else self.model\n            logger.info(\"Save weights to {}\".format(self.file_name))\n            ckpt_state = {\n                \"start_epoch\": self.epoch + 1,\n                \"model\": save_model.state_dict(),\n                \"optimizer\": self.optimizer.state_dict(),\n                \"best_ap\": self.best_ap,\n                \"curr_ap\": ap,\n            }\n            save_checkpoint(\n                ckpt_state,\n                update_best_ckpt,\n                self.file_name,\n                ckpt_name,\n            )\n\n            if self.args.logger == \"wandb\":\n                self.wandb_logger.save_checkpoint(\n                    self.file_name,\n                    ckpt_name,\n                    update_best_ckpt,\n                    metadata={\n                        \"epoch\": self.epoch + 1,\n                        \"optimizer\": self.optimizer.state_dict(),\n                        \"best_ap\": self.best_ap,\n                        \"curr_ap\": ap\n                    }\n                )\n"
  },
  {
    "path": "yolox/data/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nfrom .data_augment import TrainTransform, ValTransform\nfrom .data_prefetcher import DataPrefetcher\nfrom .dataloading import DataLoader, get_yolox_datadir, worker_init_reset_seed\nfrom .datasets import *\nfrom .samplers import InfiniteSampler, YoloBatchSampler\n"
  },
  {
    "path": "yolox/data/data_augment.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\"\"\"\nData augmentation functionality. Passed as callable transformations to\nDataset classes.\n\nThe data augmentation procedures were interpreted from @weiliu89's SSD paper\nhttp://arxiv.org/abs/1512.02325\n\"\"\"\n\nimport math\nimport random\n\nimport cv2\nimport numpy as np\n\nfrom yolox.utils import xyxy2cxcywh\n\n\ndef augment_hsv(img, hgain=5, sgain=30, vgain=30):\n    hsv_augs = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain]  # random gains\n    hsv_augs *= np.random.randint(0, 2, 3)  # random selection of h, s, v\n    hsv_augs = hsv_augs.astype(np.int16)\n    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)\n\n    img_hsv[..., 0] = (img_hsv[..., 0] + hsv_augs[0]) % 180\n    img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_augs[1], 0, 255)\n    img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_augs[2], 0, 255)\n\n    cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img)  # no return needed\n\n\ndef get_aug_params(value, center=0):\n    if isinstance(value, float):\n        return random.uniform(center - value, center + value)\n    elif len(value) == 2:\n        return random.uniform(value[0], value[1])\n    else:\n        raise ValueError(\n            \"Affine params should be either a sequence containing two values\\\n             or single float values. Got {}\".format(value)\n        )\n\n\ndef get_affine_matrix(\n    target_size,\n    degrees=10,\n    translate=0.1,\n    scales=0.1,\n    shear=10,\n):\n    twidth, theight = target_size\n\n    # Rotation and Scale\n    angle = get_aug_params(degrees)\n    scale = get_aug_params(scales, center=1.0)\n\n    if scale <= 0.0:\n        raise ValueError(\"Argument scale should be positive\")\n\n    R = cv2.getRotationMatrix2D(angle=angle, center=(0, 0), scale=scale)\n\n    M = np.ones([2, 3])\n    # Shear\n    shear_x = math.tan(get_aug_params(shear) * math.pi / 180)\n    shear_y = math.tan(get_aug_params(shear) * math.pi / 180)\n\n    M[0] = R[0] + shear_y * R[1]\n    M[1] = R[1] + shear_x * R[0]\n\n    # Translation\n    translation_x = get_aug_params(translate) * twidth  # x translation (pixels)\n    translation_y = get_aug_params(translate) * theight  # y translation (pixels)\n\n    M[0, 2] = translation_x\n    M[1, 2] = translation_y\n\n    return M, scale\n\n\ndef apply_affine_to_bboxes(targets, target_size, M, scale):\n    num_gts = len(targets)\n\n    # warp corner points\n    twidth, theight = target_size\n    corner_points = np.ones((4 * num_gts, 3))\n    corner_points[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(\n        4 * num_gts, 2\n    )  # x1y1, x2y2, x1y2, x2y1\n    corner_points = corner_points @ M.T  # apply affine transform\n    corner_points = corner_points.reshape(num_gts, 8)\n\n    # create new boxes\n    corner_xs = corner_points[:, 0::2]\n    corner_ys = corner_points[:, 1::2]\n    new_bboxes = (\n        np.concatenate(\n            (corner_xs.min(1), corner_ys.min(1), corner_xs.max(1), corner_ys.max(1))\n        )\n        .reshape(4, num_gts)\n        .T\n    )\n\n    # clip boxes\n    new_bboxes[:, 0::2] = new_bboxes[:, 0::2].clip(0, twidth)\n    new_bboxes[:, 1::2] = new_bboxes[:, 1::2].clip(0, theight)\n\n    targets[:, :4] = new_bboxes\n\n    return targets\n\n\ndef random_affine(\n    img,\n    targets=(),\n    target_size=(640, 640),\n    degrees=10,\n    translate=0.1,\n    scales=0.1,\n    shear=10,\n):\n    M, scale = get_affine_matrix(target_size, degrees, translate, scales, shear)\n\n    img = cv2.warpAffine(img, M, dsize=target_size, borderValue=(114, 114, 114))\n\n    # Transform label coordinates\n    if len(targets) > 0:\n        targets = apply_affine_to_bboxes(targets, target_size, M, scale)\n\n    return img, targets\n\n\ndef _mirror(image, boxes, prob=0.5):\n    _, width, _ = image.shape\n    if random.random() < prob:\n        image = image[:, ::-1]\n        boxes[:, 0::2] = width - boxes[:, 2::-2]\n    return image, boxes\n\n\ndef preproc(img, input_size, swap=(2, 0, 1)):\n    if len(img.shape) == 3:\n        padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114\n    else:\n        padded_img = np.ones(input_size, dtype=np.uint8) * 114\n\n    r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])\n    resized_img = cv2.resize(\n        img,\n        (int(img.shape[1] * r), int(img.shape[0] * r)),\n        interpolation=cv2.INTER_LINEAR,\n    ).astype(np.uint8)\n    padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img\n\n    padded_img = padded_img.transpose(swap)\n    padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)\n    return padded_img, r\n\n\nclass TrainTransform:\n    def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0):\n        self.max_labels = max_labels\n        self.flip_prob = flip_prob\n        self.hsv_prob = hsv_prob\n\n    def __call__(self, image, targets, input_dim):\n        boxes = targets[:, :4].copy()\n        labels = targets[:, 4].copy()\n        if len(boxes) == 0:\n            targets = np.zeros((self.max_labels, 5), dtype=np.float32)\n            image, r_o = preproc(image, input_dim)\n            return image, targets\n\n        image_o = image.copy()\n        targets_o = targets.copy()\n        height_o, width_o, _ = image_o.shape\n        boxes_o = targets_o[:, :4]\n        labels_o = targets_o[:, 4]\n        # bbox_o: [xyxy] to [c_x,c_y,w,h]\n        boxes_o = xyxy2cxcywh(boxes_o)\n\n        if random.random() < self.hsv_prob:\n            augment_hsv(image)\n        image_t, boxes = _mirror(image, boxes, self.flip_prob)\n        height, width, _ = image_t.shape\n        image_t, r_ = preproc(image_t, input_dim)\n        # boxes [xyxy] 2 [cx,cy,w,h]\n        boxes = xyxy2cxcywh(boxes)\n        boxes *= r_\n\n        mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1\n        boxes_t = boxes[mask_b]\n        labels_t = labels[mask_b]\n\n        if len(boxes_t) == 0:\n            image_t, r_o = preproc(image_o, input_dim)\n            boxes_o *= r_o\n            boxes_t = boxes_o\n            labels_t = labels_o\n\n        labels_t = np.expand_dims(labels_t, 1)\n\n        targets_t = np.hstack((labels_t, boxes_t))\n        padded_labels = np.zeros((self.max_labels, 5))\n        padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[\n            : self.max_labels\n        ]\n        padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)\n        return image_t, padded_labels\n\n\nclass ValTransform:\n    \"\"\"\n    Defines the transformations that should be applied to test PIL image\n    for input into the network\n\n    dimension -> tensorize -> color adj\n\n    Arguments:\n        resize (int): input dimension to SSD\n        rgb_means ((int,int,int)): average RGB of the dataset\n            (104,117,123)\n        swap ((int,int,int)): final order of channels\n\n    Returns:\n        transform (transform) : callable transform to be applied to test/val\n        data\n    \"\"\"\n\n    def __init__(self, swap=(2, 0, 1), legacy=False):\n        self.swap = swap\n        self.legacy = legacy\n\n    # assume input is cv2 img for now\n    def __call__(self, img, res, input_size):\n        img, _ = preproc(img, input_size, self.swap)\n        if self.legacy:\n            img = img[::-1, :, :].copy()\n            img /= 255.0\n            img -= np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1)\n            img /= np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1)\n        return img, np.zeros((1, 5))\n"
  },
  {
    "path": "yolox/data/data_prefetcher.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport torch\n\n\nclass DataPrefetcher:\n    \"\"\"\n    DataPrefetcher is inspired by code of following file:\n    https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py\n    It could speedup your pytorch dataloader. For more information, please check\n    https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789.\n    \"\"\"\n\n    def __init__(self, loader):\n        self.loader = iter(loader)\n        self.stream = torch.cuda.Stream()\n        self.input_cuda = self._input_cuda_for_image\n        self.record_stream = DataPrefetcher._record_stream_for_image\n        self.preload()\n\n    def preload(self):\n        try:\n            self.next_input, self.next_target, _, _ = next(self.loader)\n        except StopIteration:\n            self.next_input = None\n            self.next_target = None\n            return\n\n        with torch.cuda.stream(self.stream):\n            self.input_cuda()\n            self.next_target = self.next_target.cuda(non_blocking=True)\n\n    def next(self):\n        torch.cuda.current_stream().wait_stream(self.stream)\n        input = self.next_input\n        target = self.next_target\n        if input is not None:\n            self.record_stream(input)\n        if target is not None:\n            target.record_stream(torch.cuda.current_stream())\n        self.preload()\n        return input, target\n\n    def _input_cuda_for_image(self):\n        self.next_input = self.next_input.cuda(non_blocking=True)\n\n    @staticmethod\n    def _record_stream_for_image(input):\n        input.record_stream(torch.cuda.current_stream())\n"
  },
  {
    "path": "yolox/data/dataloading.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\nimport random\nimport uuid\n\nimport numpy as np\n\nimport torch\nfrom torch.utils.data.dataloader import DataLoader as torchDataLoader\nfrom torch.utils.data.dataloader import default_collate\n\nfrom .samplers import YoloBatchSampler\n\n\ndef get_yolox_datadir():\n    \"\"\"\n    get dataset dir of YOLOX. If environment variable named `YOLOX_DATADIR` is set,\n    this function will return value of the environment variable. Otherwise, use data\n    \"\"\"\n    yolox_datadir = os.getenv(\"YOLOX_DATADIR\", None)\n    if yolox_datadir is None:\n        import yolox\n\n        yolox_path = os.path.dirname(os.path.dirname(yolox.__file__))\n        yolox_datadir = os.path.join(yolox_path, \"datasets\")\n    return yolox_datadir\n\n\nclass DataLoader(torchDataLoader):\n    \"\"\"\n    Lightnet dataloader that enables on the fly resizing of the images.\n    See :class:`torch.utils.data.DataLoader` for more information on the arguments.\n    Check more on the following website:\n    https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py\n    \"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.__initialized = False\n        shuffle = False\n        batch_sampler = None\n        if len(args) > 5:\n            shuffle = args[2]\n            sampler = args[3]\n            batch_sampler = args[4]\n        elif len(args) > 4:\n            shuffle = args[2]\n            sampler = args[3]\n            if \"batch_sampler\" in kwargs:\n                batch_sampler = kwargs[\"batch_sampler\"]\n        elif len(args) > 3:\n            shuffle = args[2]\n            if \"sampler\" in kwargs:\n                sampler = kwargs[\"sampler\"]\n            if \"batch_sampler\" in kwargs:\n                batch_sampler = kwargs[\"batch_sampler\"]\n        else:\n            if \"shuffle\" in kwargs:\n                shuffle = kwargs[\"shuffle\"]\n            if \"sampler\" in kwargs:\n                sampler = kwargs[\"sampler\"]\n            if \"batch_sampler\" in kwargs:\n                batch_sampler = kwargs[\"batch_sampler\"]\n\n        # Use custom BatchSampler\n        if batch_sampler is None:\n            if sampler is None:\n                if shuffle:\n                    sampler = torch.utils.data.sampler.RandomSampler(self.dataset)\n                    # sampler = torch.utils.data.DistributedSampler(self.dataset)\n                else:\n                    sampler = torch.utils.data.sampler.SequentialSampler(self.dataset)\n            batch_sampler = YoloBatchSampler(\n                sampler,\n                self.batch_size,\n                self.drop_last,\n                input_dimension=self.dataset.input_dim,\n            )\n            # batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iterations =\n\n        self.batch_sampler = batch_sampler\n\n        self.__initialized = True\n\n    def close_mosaic(self):\n        self.batch_sampler.mosaic = False\n\n\ndef list_collate(batch):\n    \"\"\"\n    Function that collates lists or tuples together into one list (of lists/tuples).\n    Use this as the collate function in a Dataloader, if you want to have a list of\n    items as an output, as opposed to tensors (eg. Brambox.boxes).\n    \"\"\"\n    items = list(zip(*batch))\n\n    for i in range(len(items)):\n        if isinstance(items[i][0], (list, tuple)):\n            items[i] = list(items[i])\n        else:\n            items[i] = default_collate(items[i])\n\n    return items\n\n\ndef worker_init_reset_seed(worker_id):\n    seed = uuid.uuid4().int % 2**32\n    random.seed(seed)\n    torch.set_rng_state(torch.manual_seed(seed).get_state())\n    np.random.seed(seed)\n"
  },
  {
    "path": "yolox/data/datasets/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nfrom .coco import COCODataset\nfrom .coco_classes import COCO_CLASSES\nfrom .datasets_wrapper import CacheDataset, ConcatDataset, Dataset, MixConcatDataset\nfrom .mosaicdetection import MosaicDetection\nfrom .voc import VOCDetection\n"
  },
  {
    "path": "yolox/data/datasets/coco.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\nimport copy\nimport os\n\nimport cv2\nimport numpy as np\nfrom pycocotools.coco import COCO\n\nfrom ..dataloading import get_yolox_datadir\nfrom .datasets_wrapper import CacheDataset, cache_read_img\n\n\ndef remove_useless_info(coco):\n    \"\"\"\n    Remove useless info in coco dataset. COCO object is modified inplace.\n    This function is mainly used for saving memory (save about 30% mem).\n    \"\"\"\n    if isinstance(coco, COCO):\n        dataset = coco.dataset\n        dataset.pop(\"info\", None)\n        dataset.pop(\"licenses\", None)\n        for img in dataset[\"images\"]:\n            img.pop(\"license\", None)\n            img.pop(\"coco_url\", None)\n            img.pop(\"date_captured\", None)\n            img.pop(\"flickr_url\", None)\n        if \"annotations\" in coco.dataset:\n            for anno in coco.dataset[\"annotations\"]:\n                anno.pop(\"segmentation\", None)\n\n\nclass COCODataset(CacheDataset):\n    \"\"\"\n    COCO dataset class.\n    \"\"\"\n\n    def __init__(\n        self,\n        data_dir=None,\n        json_file=\"instances_train2017.json\",\n        name=\"train2017\",\n        img_size=(416, 416),\n        preproc=None,\n        cache=False,\n        cache_type=\"ram\",\n    ):\n        \"\"\"\n        COCO dataset initialization. Annotation data are read into memory by COCO API.\n        Args:\n            data_dir (str): dataset root directory\n            json_file (str): COCO json file name\n            name (str): COCO data name (e.g. 'train2017' or 'val2017')\n            img_size (int): target image size after pre-processing\n            preproc: data augmentation strategy\n        \"\"\"\n        if data_dir is None:\n            data_dir = os.path.join(get_yolox_datadir(), \"COCO\")\n        self.data_dir = data_dir\n        self.json_file = json_file\n\n        self.coco = COCO(os.path.join(self.data_dir, \"annotations\", self.json_file))\n        remove_useless_info(self.coco)\n        self.ids = self.coco.getImgIds()\n        self.num_imgs = len(self.ids)\n        self.class_ids = sorted(self.coco.getCatIds())\n        self.cats = self.coco.loadCats(self.coco.getCatIds())\n        self._classes = tuple([c[\"name\"] for c in self.cats])\n        self.name = name\n        self.img_size = img_size\n        self.preproc = preproc\n        self.annotations = self._load_coco_annotations()\n\n        path_filename = [os.path.join(name, anno[3]) for anno in self.annotations]\n        super().__init__(\n            input_dimension=img_size,\n            num_imgs=self.num_imgs,\n            data_dir=data_dir,\n            cache_dir_name=f\"cache_{name}\",\n            path_filename=path_filename,\n            cache=cache,\n            cache_type=cache_type\n        )\n\n    def __len__(self):\n        return self.num_imgs\n\n    def _load_coco_annotations(self):\n        return [self.load_anno_from_ids(_ids) for _ids in self.ids]\n\n    def load_anno_from_ids(self, id_):\n        im_ann = self.coco.loadImgs(id_)[0]\n        width = im_ann[\"width\"]\n        height = im_ann[\"height\"]\n        anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)\n        annotations = self.coco.loadAnns(anno_ids)\n        objs = []\n        for obj in annotations:\n            x1 = np.max((0, obj[\"bbox\"][0]))\n            y1 = np.max((0, obj[\"bbox\"][1]))\n            x2 = np.min((width, x1 + np.max((0, obj[\"bbox\"][2]))))\n            y2 = np.min((height, y1 + np.max((0, obj[\"bbox\"][3]))))\n            if obj[\"area\"] > 0 and x2 >= x1 and y2 >= y1:\n                obj[\"clean_bbox\"] = [x1, y1, x2, y2]\n                objs.append(obj)\n\n        num_objs = len(objs)\n\n        res = np.zeros((num_objs, 5))\n        for ix, obj in enumerate(objs):\n            cls = self.class_ids.index(obj[\"category_id\"])\n            res[ix, 0:4] = obj[\"clean_bbox\"]\n            res[ix, 4] = cls\n\n        r = min(self.img_size[0] / height, self.img_size[1] / width)\n        res[:, :4] *= r\n\n        img_info = (height, width)\n        resized_info = (int(height * r), int(width * r))\n\n        file_name = (\n            im_ann[\"file_name\"]\n            if \"file_name\" in im_ann\n            else \"{:012}\".format(id_) + \".jpg\"\n        )\n\n        return (res, img_info, resized_info, file_name)\n\n    def load_anno(self, index):\n        return self.annotations[index][0]\n\n    def load_resized_img(self, index):\n        img = self.load_image(index)\n        r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])\n        resized_img = cv2.resize(\n            img,\n            (int(img.shape[1] * r), int(img.shape[0] * r)),\n            interpolation=cv2.INTER_LINEAR,\n        ).astype(np.uint8)\n        return resized_img\n\n    def load_image(self, index):\n        file_name = self.annotations[index][3]\n\n        img_file = os.path.join(self.data_dir, self.name, file_name)\n\n        img = cv2.imread(img_file)\n        assert img is not None, f\"file named {img_file} not found\"\n\n        return img\n\n    @cache_read_img(use_cache=True)\n    def read_img(self, index):\n        return self.load_resized_img(index)\n\n    def pull_item(self, index):\n        id_ = self.ids[index]\n        label, origin_image_size, _, _ = self.annotations[index]\n        img = self.read_img(index)\n\n        return img, copy.deepcopy(label), origin_image_size, np.array([id_])\n\n    @CacheDataset.mosaic_getitem\n    def __getitem__(self, index):\n        \"\"\"\n        One image / label pair for the given index is picked up and pre-processed.\n\n        Args:\n            index (int): data index\n\n        Returns:\n            img (numpy.ndarray): pre-processed image\n            padded_labels (torch.Tensor): pre-processed label data.\n                The shape is :math:`[max_labels, 5]`.\n                each label consists of [class, xc, yc, w, h]:\n                    class (float): class index.\n                    xc, yc (float) : center of bbox whose values range from 0 to 1.\n                    w, h (float) : size of bbox whose values range from 0 to 1.\n            info_img : tuple of h, w.\n                h, w (int): original shape of the image\n            img_id (int): same as the input index. Used for evaluation.\n        \"\"\"\n        img, target, img_info, img_id = self.pull_item(index)\n\n        if self.preproc is not None:\n            img, target = self.preproc(img, target, self.input_dim)\n        return img, target, img_info, img_id\n"
  },
  {
    "path": "yolox/data/datasets/coco_classes.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nCOCO_CLASSES = (\n    \"person\",\n    \"bicycle\",\n    \"car\",\n    \"motorcycle\",\n    \"airplane\",\n    \"bus\",\n    \"train\",\n    \"truck\",\n    \"boat\",\n    \"traffic light\",\n    \"fire hydrant\",\n    \"stop sign\",\n    \"parking meter\",\n    \"bench\",\n    \"bird\",\n    \"cat\",\n    \"dog\",\n    \"horse\",\n    \"sheep\",\n    \"cow\",\n    \"elephant\",\n    \"bear\",\n    \"zebra\",\n    \"giraffe\",\n    \"backpack\",\n    \"umbrella\",\n    \"handbag\",\n    \"tie\",\n    \"suitcase\",\n    \"frisbee\",\n    \"skis\",\n    \"snowboard\",\n    \"sports ball\",\n    \"kite\",\n    \"baseball bat\",\n    \"baseball glove\",\n    \"skateboard\",\n    \"surfboard\",\n    \"tennis racket\",\n    \"bottle\",\n    \"wine glass\",\n    \"cup\",\n    \"fork\",\n    \"knife\",\n    \"spoon\",\n    \"bowl\",\n    \"banana\",\n    \"apple\",\n    \"sandwich\",\n    \"orange\",\n    \"broccoli\",\n    \"carrot\",\n    \"hot dog\",\n    \"pizza\",\n    \"donut\",\n    \"cake\",\n    \"chair\",\n    \"couch\",\n    \"potted plant\",\n    \"bed\",\n    \"dining table\",\n    \"toilet\",\n    \"tv\",\n    \"laptop\",\n    \"mouse\",\n    \"remote\",\n    \"keyboard\",\n    \"cell phone\",\n    \"microwave\",\n    \"oven\",\n    \"toaster\",\n    \"sink\",\n    \"refrigerator\",\n    \"book\",\n    \"clock\",\n    \"vase\",\n    \"scissors\",\n    \"teddy bear\",\n    \"hair drier\",\n    \"toothbrush\",\n)\n"
  },
  {
    "path": "yolox/data/datasets/datasets_wrapper.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport bisect\nimport copy\nimport os\nimport random\nfrom abc import ABCMeta, abstractmethod\nfrom functools import partial, wraps\nfrom multiprocessing.pool import ThreadPool\nimport psutil\nfrom loguru import logger\nfrom tqdm import tqdm\n\nimport numpy as np\n\nfrom torch.utils.data.dataset import ConcatDataset as torchConcatDataset\nfrom torch.utils.data.dataset import Dataset as torchDataset\n\n\nclass ConcatDataset(torchConcatDataset):\n    def __init__(self, datasets):\n        super(ConcatDataset, self).__init__(datasets)\n        if hasattr(self.datasets[0], \"input_dim\"):\n            self._input_dim = self.datasets[0].input_dim\n            self.input_dim = self.datasets[0].input_dim\n\n    def pull_item(self, idx):\n        if idx < 0:\n            if -idx > len(self):\n                raise ValueError(\n                    \"absolute value of index should not exceed dataset length\"\n                )\n            idx = len(self) + idx\n        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)\n        if dataset_idx == 0:\n            sample_idx = idx\n        else:\n            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]\n        return self.datasets[dataset_idx].pull_item(sample_idx)\n\n\nclass MixConcatDataset(torchConcatDataset):\n    def __init__(self, datasets):\n        super(MixConcatDataset, self).__init__(datasets)\n        if hasattr(self.datasets[0], \"input_dim\"):\n            self._input_dim = self.datasets[0].input_dim\n            self.input_dim = self.datasets[0].input_dim\n\n    def __getitem__(self, index):\n\n        if not isinstance(index, int):\n            idx = index[1]\n        if idx < 0:\n            if -idx > len(self):\n                raise ValueError(\n                    \"absolute value of index should not exceed dataset length\"\n                )\n            idx = len(self) + idx\n        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)\n        if dataset_idx == 0:\n            sample_idx = idx\n        else:\n            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]\n        if not isinstance(index, int):\n            index = (index[0], sample_idx, index[2])\n\n        return self.datasets[dataset_idx][index]\n\n\nclass Dataset(torchDataset):\n    \"\"\" This class is a subclass of the base :class:`torch.utils.data.Dataset`,\n    that enables on the fly resizing of the ``input_dim``.\n\n    Args:\n        input_dimension (tuple): (width,height) tuple with default dimensions of the network\n    \"\"\"\n\n    def __init__(self, input_dimension, mosaic=True):\n        super().__init__()\n        self.__input_dim = input_dimension[:2]\n        self.enable_mosaic = mosaic\n\n    @property\n    def input_dim(self):\n        \"\"\"\n        Dimension that can be used by transforms to set the correct image size, etc.\n        This allows transforms to have a single source of truth\n        for the input dimension of the network.\n\n        Return:\n            list: Tuple containing the current width,height\n        \"\"\"\n        if hasattr(self, \"_input_dim\"):\n            return self._input_dim\n        return self.__input_dim\n\n    @staticmethod\n    def mosaic_getitem(getitem_fn):\n        \"\"\"\n        Decorator method that needs to be used around the ``__getitem__`` method. |br|\n        This decorator enables the closing mosaic\n\n        Example:\n            >>> class CustomSet(ln.data.Dataset):\n            ...     def __len__(self):\n            ...         return 10\n            ...     @ln.data.Dataset.mosaic_getitem\n            ...     def __getitem__(self, index):\n            ...         return self.enable_mosaic\n        \"\"\"\n\n        @wraps(getitem_fn)\n        def wrapper(self, index):\n            if not isinstance(index, int):\n                self.enable_mosaic = index[0]\n                index = index[1]\n\n            ret_val = getitem_fn(self, index)\n\n            return ret_val\n\n        return wrapper\n\n\nclass CacheDataset(Dataset, metaclass=ABCMeta):\n    \"\"\" This class is a subclass of the base :class:`yolox.data.datasets.Dataset`,\n    that enables cache images to ram or disk.\n\n    Args:\n        input_dimension (tuple): (width,height) tuple with default dimensions of the network\n        num_imgs (int): datset size\n        data_dir (str): the root directory of the dataset, e.g. `/path/to/COCO`.\n        cache_dir_name (str): the name of the directory to cache to disk,\n            e.g. `\"custom_cache\"`. The files cached to disk will be saved\n            under `/path/to/COCO/custom_cache`.\n        path_filename (str): a list of paths to the data relative to the `data_dir`,\n            e.g. if you have data `/path/to/COCO/train/1.jpg`, `/path/to/COCO/train/2.jpg`,\n            then `path_filename = ['train/1.jpg', ' train/2.jpg']`.\n        cache (bool): whether to cache the images to ram or disk.\n        cache_type (str): the type of cache,\n            \"ram\" : Caching imgs to ram for fast training.\n            \"disk\": Caching imgs to disk for fast training.\n    \"\"\"\n\n    def __init__(\n        self,\n        input_dimension,\n        num_imgs=None,\n        data_dir=None,\n        cache_dir_name=None,\n        path_filename=None,\n        cache=False,\n        cache_type=\"ram\",\n    ):\n        super().__init__(input_dimension)\n        self.cache = cache\n        self.cache_type = cache_type\n\n        if self.cache and self.cache_type == \"disk\":\n            self.cache_dir = os.path.join(data_dir, cache_dir_name)\n            self.path_filename = path_filename\n\n        if self.cache and self.cache_type == \"ram\":\n            self.imgs = None\n\n        if self.cache:\n            self.cache_images(\n                num_imgs=num_imgs,\n                data_dir=data_dir,\n                cache_dir_name=cache_dir_name,\n                path_filename=path_filename,\n            )\n\n    def __del__(self):\n        if self.cache and self.cache_type == \"ram\":\n            del self.imgs\n\n    @abstractmethod\n    def read_img(self, index):\n        \"\"\"\n        Given index, return the corresponding image\n\n        Args:\n            index (int): image index\n        \"\"\"\n        raise NotImplementedError\n\n    def cache_images(\n        self,\n        num_imgs=None,\n        data_dir=None,\n        cache_dir_name=None,\n        path_filename=None,\n    ):\n        assert num_imgs is not None, \"num_imgs must be specified as the size of the dataset\"\n        if self.cache_type == \"disk\":\n            assert (data_dir and cache_dir_name and path_filename) is not None, \\\n                \"data_dir, cache_name and path_filename must be specified if cache_type is disk\"\n            self.path_filename = path_filename\n\n        mem = psutil.virtual_memory()\n        mem_required = self.cal_cache_occupy(num_imgs)\n        gb = 1 << 30\n\n        if self.cache_type == \"ram\":\n            if mem_required > mem.available:\n                self.cache = False\n            else:\n                logger.info(\n                    f\"{mem_required / gb:.1f}GB RAM required, \"\n                    f\"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB RAM available, \"\n                    f\"Since the first thing we do is cache, \"\n                    f\"there is no guarantee that the remaining memory space is sufficient\"\n                )\n\n        if self.cache and self.imgs is None:\n            if self.cache_type == 'ram':\n                self.imgs = [None] * num_imgs\n                logger.info(\"You are using cached images in RAM to accelerate training!\")\n            else:   # 'disk'\n                if not os.path.exists(self.cache_dir):\n                    os.mkdir(self.cache_dir)\n                    logger.warning(\n                        f\"\\n*******************************************************************\\n\"\n                        f\"You are using cached images in DISK to accelerate training.\\n\"\n                        f\"This requires large DISK space.\\n\"\n                        f\"Make sure you have {mem_required / gb:.1f} \"\n                        f\"available DISK space for training your dataset.\\n\"\n                        f\"*******************************************************************\\\\n\"\n                    )\n                else:\n                    logger.info(f\"Found disk cache at {self.cache_dir}\")\n                    return\n\n            logger.info(\n                \"Caching images...\\n\"\n                \"This might take some time for your dataset\"\n            )\n\n            num_threads = min(8, max(1, os.cpu_count() - 1))\n            b = 0\n            load_imgs = ThreadPool(num_threads).imap(\n                partial(self.read_img, use_cache=False),\n                range(num_imgs)\n            )\n            pbar = tqdm(enumerate(load_imgs), total=num_imgs)\n            for i, x in pbar:   # x = self.read_img(self, i, use_cache=False)\n                if self.cache_type == 'ram':\n                    self.imgs[i] = x\n                else:   # 'disk'\n                    cache_filename = f'{self.path_filename[i].split(\".\")[0]}.npy'\n                    cache_path_filename = os.path.join(self.cache_dir, cache_filename)\n                    os.makedirs(os.path.dirname(cache_path_filename), exist_ok=True)\n                    np.save(cache_path_filename, x)\n                b += x.nbytes\n                pbar.desc = \\\n                    f'Caching images ({b / gb:.1f}/{mem_required / gb:.1f}GB {self.cache_type})'\n            pbar.close()\n\n    def cal_cache_occupy(self, num_imgs):\n        cache_bytes = 0\n        num_samples = min(num_imgs, 32)\n        for _ in range(num_samples):\n            img = self.read_img(index=random.randint(0, num_imgs - 1), use_cache=False)\n            cache_bytes += img.nbytes\n        mem_required = cache_bytes * num_imgs / num_samples\n        return mem_required\n\n\ndef cache_read_img(use_cache=True):\n    def decorator(read_img_fn):\n        \"\"\"\n        Decorate the read_img function to cache the image\n\n        Args:\n            read_img_fn: read_img function\n            use_cache (bool, optional): For the decorated read_img function,\n                whether to read the image from cache.\n                Defaults to True.\n        \"\"\"\n        @wraps(read_img_fn)\n        def wrapper(self, index, use_cache=use_cache):\n            cache = self.cache and use_cache\n            if cache:\n                if self.cache_type == \"ram\":\n                    img = self.imgs[index]\n                    img = copy.deepcopy(img)\n                elif self.cache_type == \"disk\":\n                    img = np.load(\n                        os.path.join(\n                            self.cache_dir, f\"{self.path_filename[index].split('.')[0]}.npy\"))\n                else:\n                    raise ValueError(f\"Unknown cache type: {self.cache_type}\")\n            else:\n                img = read_img_fn(self, index)\n            return img\n        return wrapper\n    return decorator\n"
  },
  {
    "path": "yolox/data/datasets/mosaicdetection.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport random\n\nimport cv2\nimport numpy as np\n\nfrom yolox.utils import adjust_box_anns, get_local_rank\n\nfrom ..data_augment import random_affine\nfrom .datasets_wrapper import Dataset\n\n\ndef get_mosaic_coordinate(mosaic_image, mosaic_index, xc, yc, w, h, input_h, input_w):\n    # TODO update doc\n    # index0 to top left part of image\n    if mosaic_index == 0:\n        x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc\n        small_coord = w - (x2 - x1), h - (y2 - y1), w, h\n    # index1 to top right part of image\n    elif mosaic_index == 1:\n        x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc\n        small_coord = 0, h - (y2 - y1), min(w, x2 - x1), h\n    # index2 to bottom left part of image\n    elif mosaic_index == 2:\n        x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h)\n        small_coord = w - (x2 - x1), 0, w, min(y2 - y1, h)\n    # index2 to bottom right part of image\n    elif mosaic_index == 3:\n        x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2, yc + h)  # noqa\n        small_coord = 0, 0, min(w, x2 - x1), min(y2 - y1, h)\n    return (x1, y1, x2, y2), small_coord\n\n\nclass MosaicDetection(Dataset):\n    \"\"\"Detection dataset wrapper that performs mixup for normal dataset.\"\"\"\n\n    def __init__(\n        self, dataset, img_size, mosaic=True, preproc=None,\n        degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),\n        mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True,\n        mosaic_prob=1.0, mixup_prob=1.0, *args\n    ):\n        \"\"\"\n\n        Args:\n            dataset(Dataset) : Pytorch dataset object.\n            img_size (tuple):\n            mosaic (bool): enable mosaic augmentation or not.\n            preproc (func):\n            degrees (float):\n            translate (float):\n            mosaic_scale (tuple):\n            mixup_scale (tuple):\n            shear (float):\n            enable_mixup (bool):\n            *args(tuple) : Additional arguments for mixup random sampler.\n        \"\"\"\n        super().__init__(img_size, mosaic=mosaic)\n        self._dataset = dataset\n        self.preproc = preproc\n        self.degrees = degrees\n        self.translate = translate\n        self.scale = mosaic_scale\n        self.shear = shear\n        self.mixup_scale = mixup_scale\n        self.enable_mosaic = mosaic\n        self.enable_mixup = enable_mixup\n        self.mosaic_prob = mosaic_prob\n        self.mixup_prob = mixup_prob\n        self.local_rank = get_local_rank()\n\n    def __len__(self):\n        return len(self._dataset)\n\n    @Dataset.mosaic_getitem\n    def __getitem__(self, idx):\n        if self.enable_mosaic and random.random() < self.mosaic_prob:\n            mosaic_labels = []\n            input_dim = self._dataset.input_dim\n            input_h, input_w = input_dim[0], input_dim[1]\n\n            # yc, xc = s, s  # mosaic center x, y\n            yc = int(random.uniform(0.5 * input_h, 1.5 * input_h))\n            xc = int(random.uniform(0.5 * input_w, 1.5 * input_w))\n\n            # 3 additional image indices\n            indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)]\n\n            for i_mosaic, index in enumerate(indices):\n                img, _labels, _, img_id = self._dataset.pull_item(index)\n                h0, w0 = img.shape[:2]  # orig hw\n                scale = min(1. * input_h / h0, 1. * input_w / w0)\n                img = cv2.resize(\n                    img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR\n                )\n                # generate output mosaic image\n                (h, w, c) = img.shape[:3]\n                if i_mosaic == 0:\n                    mosaic_img = np.full((input_h * 2, input_w * 2, c), 114, dtype=np.uint8)\n\n                # suffix l means large image, while s means small image in mosaic aug.\n                (l_x1, l_y1, l_x2, l_y2), (s_x1, s_y1, s_x2, s_y2) = get_mosaic_coordinate(\n                    mosaic_img, i_mosaic, xc, yc, w, h, input_h, input_w\n                )\n\n                mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2]\n                padw, padh = l_x1 - s_x1, l_y1 - s_y1\n\n                labels = _labels.copy()\n                # Normalized xywh to pixel xyxy format\n                if _labels.size > 0:\n                    labels[:, 0] = scale * _labels[:, 0] + padw\n                    labels[:, 1] = scale * _labels[:, 1] + padh\n                    labels[:, 2] = scale * _labels[:, 2] + padw\n                    labels[:, 3] = scale * _labels[:, 3] + padh\n                mosaic_labels.append(labels)\n\n            if len(mosaic_labels):\n                mosaic_labels = np.concatenate(mosaic_labels, 0)\n                np.clip(mosaic_labels[:, 0], 0, 2 * input_w, out=mosaic_labels[:, 0])\n                np.clip(mosaic_labels[:, 1], 0, 2 * input_h, out=mosaic_labels[:, 1])\n                np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2])\n                np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3])\n\n            mosaic_img, mosaic_labels = random_affine(\n                mosaic_img,\n                mosaic_labels,\n                target_size=(input_w, input_h),\n                degrees=self.degrees,\n                translate=self.translate,\n                scales=self.scale,\n                shear=self.shear,\n            )\n\n            # -----------------------------------------------------------------\n            # CopyPaste: https://arxiv.org/abs/2012.07177\n            # -----------------------------------------------------------------\n            if (\n                self.enable_mixup\n                and not len(mosaic_labels) == 0\n                and random.random() < self.mixup_prob\n            ):\n                mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim)\n            mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim)\n            img_info = (mix_img.shape[1], mix_img.shape[0])\n\n            # -----------------------------------------------------------------\n            # img_info and img_id are not used for training.\n            # They are also hard to be specified on a mosaic image.\n            # -----------------------------------------------------------------\n            return mix_img, padded_labels, img_info, img_id\n\n        else:\n            self._dataset._input_dim = self.input_dim\n            img, label, img_info, img_id = self._dataset.pull_item(idx)\n            img, label = self.preproc(img, label, self.input_dim)\n            return img, label, img_info, img_id\n\n    def mixup(self, origin_img, origin_labels, input_dim):\n        jit_factor = random.uniform(*self.mixup_scale)\n        FLIP = random.uniform(0, 1) > 0.5\n        cp_labels = []\n        while len(cp_labels) == 0:\n            cp_index = random.randint(0, self.__len__() - 1)\n            cp_labels = self._dataset.load_anno(cp_index)\n        img, cp_labels, _, _ = self._dataset.pull_item(cp_index)\n\n        if len(img.shape) == 3:\n            cp_img = np.ones((input_dim[0], input_dim[1], 3), dtype=np.uint8) * 114\n        else:\n            cp_img = np.ones(input_dim, dtype=np.uint8) * 114\n\n        cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1])\n        resized_img = cv2.resize(\n            img,\n            (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)),\n            interpolation=cv2.INTER_LINEAR,\n        )\n\n        cp_img[\n            : int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio)\n        ] = resized_img\n\n        cp_img = cv2.resize(\n            cp_img,\n            (int(cp_img.shape[1] * jit_factor), int(cp_img.shape[0] * jit_factor)),\n        )\n        cp_scale_ratio *= jit_factor\n\n        if FLIP:\n            cp_img = cp_img[:, ::-1, :]\n\n        origin_h, origin_w = cp_img.shape[:2]\n        target_h, target_w = origin_img.shape[:2]\n        padded_img = np.zeros(\n            (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8\n        )\n        padded_img[:origin_h, :origin_w] = cp_img\n\n        x_offset, y_offset = 0, 0\n        if padded_img.shape[0] > target_h:\n            y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)\n        if padded_img.shape[1] > target_w:\n            x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)\n        padded_cropped_img = padded_img[\n            y_offset: y_offset + target_h, x_offset: x_offset + target_w\n        ]\n\n        cp_bboxes_origin_np = adjust_box_anns(\n            cp_labels[:, :4].copy(), cp_scale_ratio, 0, 0, origin_w, origin_h\n        )\n        if FLIP:\n            cp_bboxes_origin_np[:, 0::2] = (\n                origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1]\n            )\n        cp_bboxes_transformed_np = cp_bboxes_origin_np.copy()\n        cp_bboxes_transformed_np[:, 0::2] = np.clip(\n            cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w\n        )\n        cp_bboxes_transformed_np[:, 1::2] = np.clip(\n            cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h\n        )\n\n        cls_labels = cp_labels[:, 4:5].copy()\n        box_labels = cp_bboxes_transformed_np\n        labels = np.hstack((box_labels, cls_labels))\n        origin_labels = np.vstack((origin_labels, labels))\n        origin_img = origin_img.astype(np.float32)\n        origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)\n\n        return origin_img.astype(np.uint8), origin_labels\n"
  },
  {
    "path": "yolox/data/datasets/voc.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Code are based on\n# https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py\n# Copyright (c) Francisco Massa.\n# Copyright (c) Ellis Brown, Max deGroot.\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\nimport os.path\nimport pickle\nimport xml.etree.ElementTree as ET\n\nimport cv2\nimport numpy as np\n\nfrom yolox.evaluators.voc_eval import voc_eval\n\nfrom .datasets_wrapper import CacheDataset, cache_read_img\nfrom .voc_classes import VOC_CLASSES\n\n\nclass AnnotationTransform(object):\n\n    \"\"\"Transforms a VOC annotation into a Tensor of bbox coords and label index\n    Initilized with a dictionary lookup of classnames to indexes\n\n    Arguments:\n        class_to_ind (dict, optional): dictionary lookup of classnames -> indexes\n            (default: alphabetic indexing of VOC's 20 classes)\n        keep_difficult (bool, optional): keep difficult instances or not\n            (default: False)\n        height (int): height\n        width (int): width\n    \"\"\"\n\n    def __init__(self, class_to_ind=None, keep_difficult=True):\n        self.class_to_ind = class_to_ind or dict(\n            zip(VOC_CLASSES, range(len(VOC_CLASSES)))\n        )\n        self.keep_difficult = keep_difficult\n\n    def __call__(self, target):\n        \"\"\"\n        Arguments:\n            target (annotation) : the target annotation to be made usable\n                will be an ET.Element\n        Returns:\n            a list containing lists of bounding boxes  [bbox coords, class name]\n        \"\"\"\n        res = np.empty((0, 5))\n        for obj in target.iter(\"object\"):\n            difficult = obj.find(\"difficult\")\n            if difficult is not None:\n                difficult = int(difficult.text) == 1\n            else:\n                difficult = False\n            if not self.keep_difficult and difficult:\n                continue\n            name = obj.find(\"name\").text.strip()\n            bbox = obj.find(\"bndbox\")\n\n            pts = [\"xmin\", \"ymin\", \"xmax\", \"ymax\"]\n            bndbox = []\n            for i, pt in enumerate(pts):\n                cur_pt = int(float(bbox.find(pt).text)) - 1\n                # scale height or width\n                # cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height\n                bndbox.append(cur_pt)\n            label_idx = self.class_to_ind[name]\n            bndbox.append(label_idx)\n            res = np.vstack((res, bndbox))  # [xmin, ymin, xmax, ymax, label_ind]\n            # img_id = target.find('filename').text[:-4]\n\n        width = int(target.find(\"size\").find(\"width\").text)\n        height = int(target.find(\"size\").find(\"height\").text)\n        img_info = (height, width)\n\n        return res, img_info\n\n\nclass VOCDetection(CacheDataset):\n\n    \"\"\"\n    VOC Detection Dataset Object\n\n    input is image, target is annotation\n\n    Args:\n        root (string): filepath to VOCdevkit folder.\n        image_set (string): imageset to use (eg. 'train', 'val', 'test')\n        transform (callable, optional): transformation to perform on the\n            input image\n        target_transform (callable, optional): transformation to perform on the\n            target `annotation`\n            (eg: take in caption string, return tensor of word indices)\n        dataset_name (string, optional): which dataset to load\n            (default: 'VOC2007')\n    \"\"\"\n\n    def __init__(\n        self,\n        data_dir,\n        image_sets=[(\"2007\", \"trainval\"), (\"2012\", \"trainval\")],\n        img_size=(416, 416),\n        preproc=None,\n        target_transform=AnnotationTransform(),\n        dataset_name=\"VOC0712\",\n        cache=False,\n        cache_type=\"ram\",\n    ):\n        self.root = data_dir\n        self.image_set = image_sets\n        self.img_size = img_size\n        self.preproc = preproc\n        self.target_transform = target_transform\n        self.name = dataset_name\n        self._annopath = os.path.join(\"%s\", \"Annotations\", \"%s.xml\")\n        self._imgpath = os.path.join(\"%s\", \"JPEGImages\", \"%s.jpg\")\n        self._classes = VOC_CLASSES\n        self.cats = [\n            {\"id\": idx, \"name\": val} for idx, val in enumerate(VOC_CLASSES)\n        ]\n        self.class_ids = list(range(len(VOC_CLASSES)))\n        self.ids = list()\n        for (year, name) in image_sets:\n            self._year = year\n            rootpath = os.path.join(self.root, \"VOC\" + year)\n            for line in open(\n                os.path.join(rootpath, \"ImageSets\", \"Main\", name + \".txt\")\n            ):\n                self.ids.append((rootpath, line.strip()))\n        self.num_imgs = len(self.ids)\n\n        self.annotations = self._load_coco_annotations()\n\n        path_filename = [\n            (self._imgpath % self.ids[i]).split(self.root + \"/\")[1]\n            for i in range(self.num_imgs)\n        ]\n        super().__init__(\n            input_dimension=img_size,\n            num_imgs=self.num_imgs,\n            data_dir=self.root,\n            cache_dir_name=f\"cache_{self.name}\",\n            path_filename=path_filename,\n            cache=cache,\n            cache_type=cache_type\n        )\n\n    def __len__(self):\n        return self.num_imgs\n\n    def _load_coco_annotations(self):\n        return [self.load_anno_from_ids(_ids) for _ids in range(self.num_imgs)]\n\n    def load_anno_from_ids(self, index):\n        img_id = self.ids[index]\n        target = ET.parse(self._annopath % img_id).getroot()\n\n        assert self.target_transform is not None\n        res, img_info = self.target_transform(target)\n        height, width = img_info\n\n        r = min(self.img_size[0] / height, self.img_size[1] / width)\n        res[:, :4] *= r\n        resized_info = (int(height * r), int(width * r))\n\n        return (res, img_info, resized_info)\n\n    def load_anno(self, index):\n        return self.annotations[index][0]\n\n    def load_resized_img(self, index):\n        img = self.load_image(index)\n        r = min(self.img_size[0] / img.shape[0], self.img_size[1] / img.shape[1])\n        resized_img = cv2.resize(\n            img,\n            (int(img.shape[1] * r), int(img.shape[0] * r)),\n            interpolation=cv2.INTER_LINEAR,\n        ).astype(np.uint8)\n\n        return resized_img\n\n    def load_image(self, index):\n        img_id = self.ids[index]\n        img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)\n        assert img is not None, f\"file named {self._imgpath % img_id} not found\"\n\n        return img\n\n    @cache_read_img(use_cache=True)\n    def read_img(self, index):\n        return self.load_resized_img(index)\n\n    def pull_item(self, index):\n        \"\"\"Returns the original image and target at an index for mixup\n\n        Note: not using self.__getitem__(), as any transformations passed in\n        could mess up this functionality.\n\n        Argument:\n            index (int): index of img to show\n        Return:\n            img, target\n        \"\"\"\n        target, img_info, _ = self.annotations[index]\n        img = self.read_img(index)\n\n        return img, target, img_info, index\n\n    @CacheDataset.mosaic_getitem\n    def __getitem__(self, index):\n        img, target, img_info, img_id = self.pull_item(index)\n\n        if self.preproc is not None:\n            img, target = self.preproc(img, target, self.input_dim)\n\n        return img, target, img_info, img_id\n\n    def evaluate_detections(self, all_boxes, output_dir=None):\n        \"\"\"\n        all_boxes is a list of length number-of-classes.\n        Each list element is a list of length number-of-images.\n        Each of those list elements is either an empty list []\n        or a numpy array of detection.\n\n        all_boxes[class][image] = [] or np.array of shape #dets x 5\n        \"\"\"\n        self._write_voc_results_file(all_boxes)\n        IouTh = np.linspace(\n            0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True\n        )\n        mAPs = []\n        for iou in IouTh:\n            mAP = self._do_python_eval(output_dir, iou)\n            mAPs.append(mAP)\n\n        print(\"--------------------------------------------------------------\")\n        print(\"map_5095:\", np.mean(mAPs))\n        print(\"map_50:\", mAPs[0])\n        print(\"--------------------------------------------------------------\")\n        return np.mean(mAPs), mAPs[0]\n\n    def _get_voc_results_file_template(self):\n        filename = \"comp4_det_test\" + \"_{:s}.txt\"\n        filedir = os.path.join(self.root, \"results\", \"VOC\" + self._year, \"Main\")\n        if not os.path.exists(filedir):\n            os.makedirs(filedir)\n        path = os.path.join(filedir, filename)\n        return path\n\n    def _write_voc_results_file(self, all_boxes):\n        for cls_ind, cls in enumerate(VOC_CLASSES):\n            cls_ind = cls_ind\n            if cls == \"__background__\":\n                continue\n            print(\"Writing {} VOC results file\".format(cls))\n            filename = self._get_voc_results_file_template().format(cls)\n            with open(filename, \"wt\") as f:\n                for im_ind, index in enumerate(self.ids):\n                    index = index[1]\n                    dets = all_boxes[cls_ind][im_ind]\n                    if dets == []:\n                        continue\n                    for k in range(dets.shape[0]):\n                        f.write(\n                            \"{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\\n\".format(\n                                index,\n                                dets[k, -1],\n                                dets[k, 0] + 1,\n                                dets[k, 1] + 1,\n                                dets[k, 2] + 1,\n                                dets[k, 3] + 1,\n                            )\n                        )\n\n    def _do_python_eval(self, output_dir=\"output\", iou=0.5):\n        rootpath = os.path.join(self.root, \"VOC\" + self._year)\n        name = self.image_set[0][1]\n        annopath = os.path.join(rootpath, \"Annotations\", \"{:s}.xml\")\n        imagesetfile = os.path.join(rootpath, \"ImageSets\", \"Main\", name + \".txt\")\n        cachedir = os.path.join(\n            self.root, \"annotations_cache\", \"VOC\" + self._year, name\n        )\n        if not os.path.exists(cachedir):\n            os.makedirs(cachedir)\n        aps = []\n        # The PASCAL VOC metric changed in 2010\n        use_07_metric = True if int(self._year) < 2010 else False\n        print(\"Eval IoU : {:.2f}\".format(iou))\n        if output_dir is not None and not os.path.isdir(output_dir):\n            os.mkdir(output_dir)\n        for i, cls in enumerate(VOC_CLASSES):\n\n            if cls == \"__background__\":\n                continue\n\n            filename = self._get_voc_results_file_template().format(cls)\n            rec, prec, ap = voc_eval(\n                filename,\n                annopath,\n                imagesetfile,\n                cls,\n                cachedir,\n                ovthresh=iou,\n                use_07_metric=use_07_metric,\n            )\n            aps += [ap]\n            if iou == 0.5:\n                print(\"AP for {} = {:.4f}\".format(cls, ap))\n            if output_dir is not None:\n                with open(os.path.join(output_dir, cls + \"_pr.pkl\"), \"wb\") as f:\n                    pickle.dump({\"rec\": rec, \"prec\": prec, \"ap\": ap}, f)\n        if iou == 0.5:\n            print(\"Mean AP = {:.4f}\".format(np.mean(aps)))\n            print(\"~~~~~~~~\")\n            print(\"Results:\")\n            for ap in aps:\n                print(\"{:.3f}\".format(ap))\n            print(\"{:.3f}\".format(np.mean(aps)))\n            print(\"~~~~~~~~\")\n            print(\"\")\n            print(\"--------------------------------------------------------------\")\n            print(\"Results computed with the **unofficial** Python eval code.\")\n            print(\"Results should be very close to the official MATLAB eval code.\")\n            print(\"Recompute with `./tools/reval.py --matlab ...` for your paper.\")\n            print(\"-- Thanks, The Management\")\n            print(\"--------------------------------------------------------------\")\n\n        return np.mean(aps)\n"
  },
  {
    "path": "yolox/data/datasets/voc_classes.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\n# VOC_CLASSES = ( '__background__', # always index 0\nVOC_CLASSES = (\n    \"aeroplane\",\n    \"bicycle\",\n    \"bird\",\n    \"boat\",\n    \"bottle\",\n    \"bus\",\n    \"car\",\n    \"cat\",\n    \"chair\",\n    \"cow\",\n    \"diningtable\",\n    \"dog\",\n    \"horse\",\n    \"motorbike\",\n    \"person\",\n    \"pottedplant\",\n    \"sheep\",\n    \"sofa\",\n    \"train\",\n    \"tvmonitor\",\n)\n"
  },
  {
    "path": "yolox/data/samplers.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport itertools\nfrom typing import Optional\n\nimport torch\nimport torch.distributed as dist\nfrom torch.utils.data.sampler import BatchSampler as torchBatchSampler\nfrom torch.utils.data.sampler import Sampler\n\n\nclass YoloBatchSampler(torchBatchSampler):\n    \"\"\"\n    This batch sampler will generate mini-batches of (mosaic, index) tuples from another sampler.\n    It works just like the :class:`torch.utils.data.sampler.BatchSampler`,\n    but it will turn on/off the mosaic aug.\n    \"\"\"\n\n    def __init__(self, *args, mosaic=True, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.mosaic = mosaic\n\n    def __iter__(self):\n        for batch in super().__iter__():\n            yield [(self.mosaic, idx) for idx in batch]\n\n\nclass InfiniteSampler(Sampler):\n    \"\"\"\n    In training, we only care about the \"infinite stream\" of training data.\n    So this sampler produces an infinite stream of indices and\n    all workers cooperate to correctly shuffle the indices and sample different indices.\n    The samplers in each worker effectively produces `indices[worker_id::num_workers]`\n    where `indices` is an infinite stream of indices consisting of\n    `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)\n    or `range(size) + range(size) + ...` (if shuffle is False)\n    \"\"\"\n\n    def __init__(\n        self,\n        size: int,\n        shuffle: bool = True,\n        seed: Optional[int] = 0,\n        rank=0,\n        world_size=1,\n    ):\n        \"\"\"\n        Args:\n            size (int): the total number of data of the underlying dataset to sample from\n            shuffle (bool): whether to shuffle the indices or not\n            seed (int): the initial seed of the shuffle. Must be the same\n                across all workers. If None, will use a random seed shared\n                among workers (require synchronization among all workers).\n        \"\"\"\n        self._size = size\n        assert size > 0\n        self._shuffle = shuffle\n        self._seed = int(seed)\n\n        if dist.is_available() and dist.is_initialized():\n            self._rank = dist.get_rank()\n            self._world_size = dist.get_world_size()\n        else:\n            self._rank = rank\n            self._world_size = world_size\n\n    def __iter__(self):\n        start = self._rank\n        yield from itertools.islice(\n            self._infinite_indices(), start, None, self._world_size\n        )\n\n    def _infinite_indices(self):\n        g = torch.Generator()\n        g.manual_seed(self._seed)\n        while True:\n            if self._shuffle:\n                yield from torch.randperm(self._size, generator=g)\n            else:\n                yield from torch.arange(self._size)\n\n    def __len__(self):\n        return self._size // self._world_size\n"
  },
  {
    "path": "yolox/evaluators/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nfrom .coco_evaluator import COCOEvaluator\nfrom .voc_evaluator import VOCEvaluator\n"
  },
  {
    "path": "yolox/evaluators/coco_evaluator.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport contextlib\nimport io\nimport itertools\nimport json\nimport tempfile\nimport time\nfrom collections import ChainMap, defaultdict\nfrom loguru import logger\nfrom tabulate import tabulate\nfrom tqdm import tqdm\n\nimport numpy as np\n\nimport torch\n\nfrom yolox.data.datasets import COCO_CLASSES\nfrom yolox.utils import (\n    gather,\n    is_main_process,\n    postprocess,\n    synchronize,\n    time_synchronized,\n    xyxy2xywh\n)\n\n\ndef per_class_AR_table(coco_eval, class_names=COCO_CLASSES, headers=[\"class\", \"AR\"], colums=6):\n    per_class_AR = {}\n    recalls = coco_eval.eval[\"recall\"]\n    # dimension of recalls: [TxKxAxM]\n    # recall has dims (iou, cls, area range, max dets)\n    assert len(class_names) == recalls.shape[1]\n\n    for idx, name in enumerate(class_names):\n        recall = recalls[:, idx, 0, -1]\n        recall = recall[recall > -1]\n        ar = np.mean(recall) if recall.size else float(\"nan\")\n        per_class_AR[name] = float(ar * 100)\n\n    num_cols = min(colums, len(per_class_AR) * len(headers))\n    result_pair = [x for pair in per_class_AR.items() for x in pair]\n    row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])\n    table_headers = headers * (num_cols // len(headers))\n    table = tabulate(\n        row_pair, tablefmt=\"pipe\", floatfmt=\".3f\", headers=table_headers, numalign=\"left\",\n    )\n    return table\n\n\ndef per_class_AP_table(coco_eval, class_names=COCO_CLASSES, headers=[\"class\", \"AP\"], colums=6):\n    per_class_AP = {}\n    precisions = coco_eval.eval[\"precision\"]\n    # dimension of precisions: [TxRxKxAxM]\n    # precision has dims (iou, recall, cls, area range, max dets)\n    assert len(class_names) == precisions.shape[2]\n\n    for idx, name in enumerate(class_names):\n        # area range index 0: all area ranges\n        # max dets index -1: typically 100 per image\n        precision = precisions[:, :, idx, 0, -1]\n        precision = precision[precision > -1]\n        ap = np.mean(precision) if precision.size else float(\"nan\")\n        per_class_AP[name] = float(ap * 100)\n\n    num_cols = min(colums, len(per_class_AP) * len(headers))\n    result_pair = [x for pair in per_class_AP.items() for x in pair]\n    row_pair = itertools.zip_longest(*[result_pair[i::num_cols] for i in range(num_cols)])\n    table_headers = headers * (num_cols // len(headers))\n    table = tabulate(\n        row_pair, tablefmt=\"pipe\", floatfmt=\".3f\", headers=table_headers, numalign=\"left\",\n    )\n    return table\n\n\nclass COCOEvaluator:\n    \"\"\"\n    COCO AP Evaluation class.  All the data in the val2017 dataset are processed\n    and evaluated by COCO API.\n    \"\"\"\n\n    def __init__(\n        self,\n        dataloader,\n        img_size: int,\n        confthre: float,\n        nmsthre: float,\n        num_classes: int,\n        testdev: bool = False,\n        per_class_AP: bool = True,\n        per_class_AR: bool = True,\n    ):\n        \"\"\"\n        Args:\n            dataloader (Dataloader): evaluate dataloader.\n            img_size: image size after preprocess. images are resized\n                to squares whose shape is (img_size, img_size).\n            confthre: confidence threshold ranging from 0 to 1, which\n                is defined in the config file.\n            nmsthre: IoU threshold of non-max supression ranging from 0 to 1.\n            per_class_AP: Show per class AP during evalution or not. Default to True.\n            per_class_AR: Show per class AR during evalution or not. Default to True.\n        \"\"\"\n        self.dataloader = dataloader\n        self.img_size = img_size\n        self.confthre = confthre\n        self.nmsthre = nmsthre\n        self.num_classes = num_classes\n        self.testdev = testdev\n        self.per_class_AP = per_class_AP\n        self.per_class_AR = per_class_AR\n\n    def evaluate(\n        self, model, distributed=False, half=False, trt_file=None,\n        decoder=None, test_size=None, return_outputs=False\n    ):\n        \"\"\"\n        COCO average precision (AP) Evaluation. Iterate inference on the test dataset\n        and the results are evaluated by COCO API.\n\n        NOTE: This function will change training mode to False, please save states if needed.\n\n        Args:\n            model : model to evaluate.\n\n        Returns:\n            ap50_95 (float) : COCO AP of IoU=50:95\n            ap50 (float) : COCO AP of IoU=50\n            summary (sr): summary info of evaluation.\n        \"\"\"\n        # TODO half to amp_test\n        tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor\n        model = model.eval()\n        if half:\n            model = model.half()\n        ids = []\n        data_list = []\n        output_data = defaultdict()\n        progress_bar = tqdm if is_main_process() else iter\n\n        inference_time = 0\n        nms_time = 0\n        n_samples = max(len(self.dataloader) - 1, 1)\n\n        if trt_file is not None:\n            from torch2trt import TRTModule\n\n            model_trt = TRTModule()\n            model_trt.load_state_dict(torch.load(trt_file))\n\n            x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()\n            model(x)\n            model = model_trt\n\n        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(\n            progress_bar(self.dataloader)\n        ):\n            with torch.no_grad():\n                imgs = imgs.type(tensor_type)\n\n                # skip the last iters since batchsize might be not enough for batch inference\n                is_time_record = cur_iter < len(self.dataloader) - 1\n                if is_time_record:\n                    start = time.time()\n\n                outputs = model(imgs)\n                if decoder is not None:\n                    outputs = decoder(outputs, dtype=outputs.type())\n\n                if is_time_record:\n                    infer_end = time_synchronized()\n                    inference_time += infer_end - start\n\n                outputs = postprocess(\n                    outputs, self.num_classes, self.confthre, self.nmsthre\n                )\n                if is_time_record:\n                    nms_end = time_synchronized()\n                    nms_time += nms_end - infer_end\n\n            data_list_elem, image_wise_data = self.convert_to_coco_format(\n                outputs, info_imgs, ids, return_outputs=True)\n            data_list.extend(data_list_elem)\n            output_data.update(image_wise_data)\n\n        statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples])\n        if distributed:\n            # different process/device might have different speed,\n            # to make sure the process will not be stucked, sync func is used here.\n            synchronize()\n            data_list = gather(data_list, dst=0)\n            output_data = gather(output_data, dst=0)\n            data_list = list(itertools.chain(*data_list))\n            output_data = dict(ChainMap(*output_data))\n            torch.distributed.reduce(statistics, dst=0)\n\n        eval_results = self.evaluate_prediction(data_list, statistics)\n        synchronize()\n\n        if return_outputs:\n            return eval_results, output_data\n        return eval_results\n\n    def convert_to_coco_format(self, outputs, info_imgs, ids, return_outputs=False):\n        data_list = []\n        image_wise_data = defaultdict(dict)\n        for (output, img_h, img_w, img_id) in zip(\n            outputs, info_imgs[0], info_imgs[1], ids\n        ):\n            if output is None:\n                continue\n            output = output.cpu()\n\n            bboxes = output[:, 0:4]\n\n            # preprocessing: resize\n            scale = min(\n                self.img_size[0] / float(img_h), self.img_size[1] / float(img_w)\n            )\n            bboxes /= scale\n            cls = output[:, 6]\n            scores = output[:, 4] * output[:, 5]\n\n            image_wise_data.update({\n                int(img_id): {\n                    \"bboxes\": [box.numpy().tolist() for box in bboxes],\n                    \"scores\": [score.numpy().item() for score in scores],\n                    \"categories\": [\n                        self.dataloader.dataset.class_ids[int(cls[ind])]\n                        for ind in range(bboxes.shape[0])\n                    ],\n                }\n            })\n\n            bboxes = xyxy2xywh(bboxes)\n\n            for ind in range(bboxes.shape[0]):\n                label = self.dataloader.dataset.class_ids[int(cls[ind])]\n                pred_data = {\n                    \"image_id\": int(img_id),\n                    \"category_id\": label,\n                    \"bbox\": bboxes[ind].numpy().tolist(),\n                    \"score\": scores[ind].numpy().item(),\n                    \"segmentation\": [],\n                }  # COCO json format\n                data_list.append(pred_data)\n\n        if return_outputs:\n            return data_list, image_wise_data\n        return data_list\n\n    def evaluate_prediction(self, data_dict, statistics):\n        if not is_main_process():\n            return 0, 0, None\n\n        logger.info(\"Evaluate in main process...\")\n\n        annType = [\"segm\", \"bbox\", \"keypoints\"]\n\n        inference_time = statistics[0].item()\n        nms_time = statistics[1].item()\n        n_samples = statistics[2].item()\n\n        a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)\n        a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size)\n\n        time_info = \", \".join(\n            [\n                \"Average {} time: {:.2f} ms\".format(k, v)\n                for k, v in zip(\n                    [\"forward\", \"NMS\", \"inference\"],\n                    [a_infer_time, a_nms_time, (a_infer_time + a_nms_time)],\n                )\n            ]\n        )\n\n        info = time_info + \"\\n\"\n\n        # Evaluate the Dt (detection) json comparing with the ground truth\n        if len(data_dict) > 0:\n            cocoGt = self.dataloader.dataset.coco\n            # TODO: since pycocotools can't process dict in py36, write data to json file.\n            if self.testdev:\n                json.dump(data_dict, open(\"./yolox_testdev_2017.json\", \"w\"))\n                cocoDt = cocoGt.loadRes(\"./yolox_testdev_2017.json\")\n            else:\n                _, tmp = tempfile.mkstemp()\n                json.dump(data_dict, open(tmp, \"w\"))\n                cocoDt = cocoGt.loadRes(tmp)\n            try:\n                from yolox.layers import COCOeval_opt as COCOeval\n            except ImportError:\n                from pycocotools.cocoeval import COCOeval\n\n                logger.warning(\"Use standard COCOeval.\")\n\n            cocoEval = COCOeval(cocoGt, cocoDt, annType[1])\n            cocoEval.evaluate()\n            cocoEval.accumulate()\n            redirect_string = io.StringIO()\n            with contextlib.redirect_stdout(redirect_string):\n                cocoEval.summarize()\n            info += redirect_string.getvalue()\n            cat_ids = list(cocoGt.cats.keys())\n            cat_names = [cocoGt.cats[catId]['name'] for catId in sorted(cat_ids)]\n            if self.per_class_AP:\n                AP_table = per_class_AP_table(cocoEval, class_names=cat_names)\n                info += \"per class AP:\\n\" + AP_table + \"\\n\"\n            if self.per_class_AR:\n                AR_table = per_class_AR_table(cocoEval, class_names=cat_names)\n                info += \"per class AR:\\n\" + AR_table + \"\\n\"\n            return cocoEval.stats[0], cocoEval.stats[1], info\n        else:\n            return 0, 0, info\n"
  },
  {
    "path": "yolox/evaluators/voc_eval.py",
    "content": "#!/usr/bin/env python3\n# Code are based on\n# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py\n# Copyright (c) Bharath Hariharan.\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport os\nimport pickle\nimport xml.etree.ElementTree as ET\n\nimport numpy as np\n\n\ndef parse_rec(filename):\n    \"\"\"Parse a PASCAL VOC xml file\"\"\"\n    tree = ET.parse(filename)\n    objects = []\n    for obj in tree.findall(\"object\"):\n        obj_struct = {}\n        obj_struct[\"name\"] = obj.find(\"name\").text\n        obj_struct[\"pose\"] = obj.find(\"pose\").text\n        obj_struct[\"truncated\"] = int(obj.find(\"truncated\").text)\n        obj_struct[\"difficult\"] = int(obj.find(\"difficult\").text)\n        bbox = obj.find(\"bndbox\")\n        obj_struct[\"bbox\"] = [\n            int(bbox.find(\"xmin\").text),\n            int(bbox.find(\"ymin\").text),\n            int(bbox.find(\"xmax\").text),\n            int(bbox.find(\"ymax\").text),\n        ]\n        objects.append(obj_struct)\n\n    return objects\n\n\ndef voc_ap(rec, prec, use_07_metric=False):\n    \"\"\"\n    Compute VOC AP given precision and recall.\n    If use_07_metric is true, uses the\n    VOC 07 11 point method (default:False).\n    \"\"\"\n    if use_07_metric:\n        # 11 point metric\n        ap = 0.0\n        for t in np.arange(0.0, 1.1, 0.1):\n            if np.sum(rec >= t) == 0:\n                p = 0\n            else:\n                p = np.max(prec[rec >= t])\n            ap = ap + p / 11.0\n    else:\n        # correct AP calculation\n        # first append sentinel values at the end\n        mrec = np.concatenate(([0.0], rec, [1.0]))\n        mpre = np.concatenate(([0.0], prec, [0.0]))\n\n        # compute the precision envelope\n        for i in range(mpre.size - 1, 0, -1):\n            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])\n\n        # to calculate area under PR curve, look for points\n        # where X axis (recall) changes value\n        i = np.where(mrec[1:] != mrec[:-1])[0]\n\n        # and sum (\\Delta recall) * prec\n        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])\n    return ap\n\n\ndef voc_eval(\n    detpath,\n    annopath,\n    imagesetfile,\n    classname,\n    cachedir,\n    ovthresh=0.5,\n    use_07_metric=False,\n):\n    # first load gt\n    if not os.path.isdir(cachedir):\n        os.mkdir(cachedir)\n    cachefile = os.path.join(cachedir, \"annots.pkl\")\n    # read list of images\n    with open(imagesetfile, \"r\") as f:\n        lines = f.readlines()\n    imagenames = [x.strip() for x in lines]\n\n    if not os.path.isfile(cachefile):\n        # load annots\n        recs = {}\n        for i, imagename in enumerate(imagenames):\n            recs[imagename] = parse_rec(annopath.format(imagename))\n            if i % 100 == 0:\n                print(f\"Reading annotation for {i + 1}/{len(imagenames)}\")\n        # save\n        print(f\"Saving cached annotations to {cachefile}\")\n        with open(cachefile, \"wb\") as f:\n            pickle.dump(recs, f)\n    else:\n        # load\n        with open(cachefile, \"rb\") as f:\n            recs = pickle.load(f)\n\n    # extract gt objects for this class\n    class_recs = {}\n    npos = 0\n    for imagename in imagenames:\n        R = [obj for obj in recs[imagename] if obj[\"name\"] == classname]\n        bbox = np.array([x[\"bbox\"] for x in R])\n        difficult = np.array([x[\"difficult\"] for x in R]).astype(bool)\n        det = [False] * len(R)\n        npos = npos + sum(~difficult)\n        class_recs[imagename] = {\"bbox\": bbox, \"difficult\": difficult, \"det\": det}\n\n    # read dets\n    detfile = detpath.format(classname)\n    with open(detfile, \"r\") as f:\n        lines = f.readlines()\n\n    if len(lines) == 0:\n        return 0, 0, 0\n\n    splitlines = [x.strip().split(\" \") for x in lines]\n    image_ids = [x[0] for x in splitlines]\n    confidence = np.array([float(x[1]) for x in splitlines])\n    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])\n\n    # sort by confidence\n    sorted_ind = np.argsort(-confidence)\n    BB = BB[sorted_ind, :]\n    image_ids = [image_ids[x] for x in sorted_ind]\n\n    # go down dets and mark TPs and FPs\n    nd = len(image_ids)\n    tp = np.zeros(nd)\n    fp = np.zeros(nd)\n    for d in range(nd):\n        R = class_recs[image_ids[d]]\n        bb = BB[d, :].astype(float)\n        ovmax = -np.inf\n        BBGT = R[\"bbox\"].astype(float)\n\n        if BBGT.size > 0:\n            # compute overlaps\n            # intersection\n            ixmin = np.maximum(BBGT[:, 0], bb[0])\n            iymin = np.maximum(BBGT[:, 1], bb[1])\n            ixmax = np.minimum(BBGT[:, 2], bb[2])\n            iymax = np.minimum(BBGT[:, 3], bb[3])\n            iw = np.maximum(ixmax - ixmin + 1.0, 0.0)\n            ih = np.maximum(iymax - iymin + 1.0, 0.0)\n            inters = iw * ih\n\n            # union\n            uni = (\n                (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)\n                + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) - inters\n            )\n\n            overlaps = inters / uni\n            ovmax = np.max(overlaps)\n            jmax = np.argmax(overlaps)\n\n        if ovmax > ovthresh:\n            if not R[\"difficult\"][jmax]:\n                if not R[\"det\"][jmax]:\n                    tp[d] = 1.0\n                    R[\"det\"][jmax] = 1\n                else:\n                    fp[d] = 1.0\n        else:\n            fp[d] = 1.0\n\n        # compute precision recall\n    fp = np.cumsum(fp)\n    tp = np.cumsum(tp)\n    rec = tp / float(npos)\n    # avoid divide by zero in case the first detection matches a difficult\n    # ground truth\n    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)\n    ap = voc_ap(rec, prec, use_07_metric)\n\n    return rec, prec, ap\n"
  },
  {
    "path": "yolox/evaluators/voc_evaluator.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii, Inc. and its affiliates.\n\nimport sys\nimport tempfile\nimport time\nfrom collections import ChainMap\nfrom loguru import logger\nfrom tqdm import tqdm\n\nimport numpy as np\n\nimport torch\n\nfrom yolox.utils import gather, is_main_process, postprocess, synchronize, time_synchronized\n\n\nclass VOCEvaluator:\n    \"\"\"\n    VOC AP Evaluation class.\n    \"\"\"\n\n    def __init__(self, dataloader, img_size, confthre, nmsthre, num_classes):\n        \"\"\"\n        Args:\n            dataloader (Dataloader): evaluate dataloader.\n            img_size (int): image size after preprocess. images are resized\n                to squares whose shape is (img_size, img_size).\n            confthre (float): confidence threshold ranging from 0 to 1, which\n                is defined in the config file.\n            nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1.\n        \"\"\"\n        self.dataloader = dataloader\n        self.img_size = img_size\n        self.confthre = confthre\n        self.nmsthre = nmsthre\n        self.num_classes = num_classes\n        self.num_images = len(dataloader.dataset)\n\n    def evaluate(\n        self, model, distributed=False, half=False, trt_file=None,\n        decoder=None, test_size=None, return_outputs=False,\n    ):\n        \"\"\"\n        VOC average precision (AP) Evaluation. Iterate inference on the test dataset\n        and the results are evaluated by COCO API.\n\n        NOTE: This function will change training mode to False, please save states if needed.\n\n        Args:\n            model : model to evaluate.\n\n        Returns:\n            ap50_95 (float) : COCO style AP of IoU=50:95\n            ap50 (float) : VOC 2007 metric AP of IoU=50\n            summary (sr): summary info of evaluation.\n        \"\"\"\n        # TODO half to amp_test\n        tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor\n        model = model.eval()\n        if half:\n            model = model.half()\n        ids = []\n        data_list = {}\n        progress_bar = tqdm if is_main_process() else iter\n\n        inference_time = 0\n        nms_time = 0\n        n_samples = max(len(self.dataloader) - 1, 1)\n\n        if trt_file is not None:\n            from torch2trt import TRTModule\n\n            model_trt = TRTModule()\n            model_trt.load_state_dict(torch.load(trt_file))\n\n            x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()\n            model(x)\n            model = model_trt\n\n        for cur_iter, (imgs, _, info_imgs, ids) in enumerate(progress_bar(self.dataloader)):\n            with torch.no_grad():\n                imgs = imgs.type(tensor_type)\n\n                # skip the last iters since batchsize might be not enough for batch inference\n                is_time_record = cur_iter < len(self.dataloader) - 1\n                if is_time_record:\n                    start = time.time()\n\n                outputs = model(imgs)\n                if decoder is not None:\n                    outputs = decoder(outputs, dtype=outputs.type())\n\n                if is_time_record:\n                    infer_end = time_synchronized()\n                    inference_time += infer_end - start\n\n                outputs = postprocess(\n                    outputs, self.num_classes, self.confthre, self.nmsthre\n                )\n                if is_time_record:\n                    nms_end = time_synchronized()\n                    nms_time += nms_end - infer_end\n\n            data_list.update(self.convert_to_voc_format(outputs, info_imgs, ids))\n\n        statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples])\n        if distributed:\n            data_list = gather(data_list, dst=0)\n            data_list = ChainMap(*data_list)\n            torch.distributed.reduce(statistics, dst=0)\n\n        eval_results = self.evaluate_prediction(data_list, statistics)\n        synchronize()\n        if return_outputs:\n            return eval_results, data_list\n        return eval_results\n\n    def convert_to_voc_format(self, outputs, info_imgs, ids):\n        predictions = {}\n        for output, img_h, img_w, img_id in zip(outputs, info_imgs[0], info_imgs[1], ids):\n            if output is None:\n                predictions[int(img_id)] = (None, None, None)\n                continue\n            output = output.cpu()\n\n            bboxes = output[:, 0:4]\n\n            # preprocessing: resize\n            scale = min(self.img_size[0] / float(img_h), self.img_size[1] / float(img_w))\n            bboxes /= scale\n\n            cls = output[:, 6]\n            scores = output[:, 4] * output[:, 5]\n\n            predictions[int(img_id)] = (bboxes, cls, scores)\n        return predictions\n\n    def evaluate_prediction(self, data_dict, statistics):\n        if not is_main_process():\n            return 0, 0, None\n\n        logger.info(\"Evaluate in main process...\")\n\n        inference_time = statistics[0].item()\n        nms_time = statistics[1].item()\n        n_samples = statistics[2].item()\n\n        a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)\n        a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size)\n\n        time_info = \", \".join(\n            [\n                \"Average {} time: {:.2f} ms\".format(k, v)\n                for k, v in zip(\n                    [\"forward\", \"NMS\", \"inference\"],\n                    [a_infer_time, a_nms_time, (a_infer_time + a_nms_time)],\n                )\n            ]\n        )\n        info = time_info + \"\\n\"\n\n        all_boxes = [\n            [[] for _ in range(self.num_images)] for _ in range(self.num_classes)\n        ]\n        for img_num in range(self.num_images):\n            bboxes, cls, scores = data_dict[img_num]\n            if bboxes is None:\n                for j in range(self.num_classes):\n                    all_boxes[j][img_num] = np.empty([0, 5], dtype=np.float32)\n                continue\n            for j in range(self.num_classes):\n                mask_c = cls == j\n                if sum(mask_c) == 0:\n                    all_boxes[j][img_num] = np.empty([0, 5], dtype=np.float32)\n                    continue\n\n                c_dets = torch.cat((bboxes, scores.unsqueeze(1)), dim=1)\n                all_boxes[j][img_num] = c_dets[mask_c].numpy()\n\n            sys.stdout.write(f\"im_eval: {img_num + 1}/{self.num_images} \\r\")\n            sys.stdout.flush()\n\n        with tempfile.TemporaryDirectory() as tempdir:\n            mAP50, mAP70 = self.dataloader.dataset.evaluate_detections(all_boxes, tempdir)\n            return mAP50, mAP70, info\n"
  },
  {
    "path": "yolox/exp/__init__.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n\nfrom .base_exp import BaseExp\nfrom .build import get_exp\nfrom .yolox_base import Exp, check_exp_value\n"
  },
  {
    "path": "yolox/exp/base_exp.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport ast\nimport pprint\nfrom abc import ABCMeta, abstractmethod\nfrom typing import Dict, List, Tuple\nfrom tabulate import tabulate\n\nimport torch\nfrom torch.nn import Module\n\nfrom yolox.utils import LRScheduler\n\n\nclass BaseExp(metaclass=ABCMeta):\n    \"\"\"Basic class for any experiment.\"\"\"\n\n    def __init__(self):\n        self.seed = None\n        self.output_dir = \"./YOLOX_outputs\"\n        self.print_interval = 100\n        self.eval_interval = 10\n        self.dataset = None\n\n    @abstractmethod\n    def get_model(self) -> Module:\n        pass\n\n    @abstractmethod\n    def get_dataset(self, cache: bool = False, cache_type: str = \"ram\"):\n        pass\n\n    @abstractmethod\n    def get_data_loader(\n        self, batch_size: int, is_distributed: bool\n    ) -> Dict[str, torch.utils.data.DataLoader]:\n        pass\n\n    @abstractmethod\n    def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:\n        pass\n\n    @abstractmethod\n    def get_lr_scheduler(\n        self, lr: float, iters_per_epoch: int, **kwargs\n    ) -> LRScheduler:\n        pass\n\n    @abstractmethod\n    def get_evaluator(self):\n        pass\n\n    @abstractmethod\n    def eval(self, model, evaluator, weights):\n        pass\n\n    def __repr__(self):\n        table_header = [\"keys\", \"values\"]\n        exp_table = [\n            (str(k), pprint.pformat(v))\n            for k, v in vars(self).items()\n            if not k.startswith(\"_\")\n        ]\n        return tabulate(exp_table, headers=table_header, tablefmt=\"fancy_grid\")\n\n    def merge(self, cfg_list):\n        assert len(cfg_list) % 2 == 0, f\"length must be even, check value here: {cfg_list}\"\n        for k, v in zip(cfg_list[0::2], cfg_list[1::2]):\n            # only update value with same key\n            if hasattr(self, k):\n                src_value = getattr(self, k)\n                src_type = type(src_value)\n\n                # pre-process input if source type is list or tuple\n                if isinstance(src_value, (List, Tuple)):\n                    v = v.strip(\"[]()\")\n                    v = [t.strip() for t in v.split(\",\")]\n\n                    # find type of tuple\n                    if len(src_value) > 0:\n                        src_item_type = type(src_value[0])\n                        v = [src_item_type(t) for t in v]\n\n                if src_value is not None and src_type != type(v):\n                    try:\n                        v = src_type(v)\n                    except Exception:\n                        v = ast.literal_eval(v)\n                setattr(self, k, v)\n"
  },
  {
    "path": "yolox/exp/build.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport importlib\nimport os\nimport sys\n\n\ndef get_exp_by_file(exp_file):\n    try:\n        sys.path.append(os.path.dirname(exp_file))\n        current_exp = importlib.import_module(os.path.basename(exp_file).split(\".\")[0])\n        exp = current_exp.Exp()\n    except Exception:\n        raise ImportError(\"{} doesn't contains class named 'Exp'\".format(exp_file))\n    return exp\n\n\ndef get_exp_by_name(exp_name):\n    exp = exp_name.replace(\"-\", \"_\")  # convert string like \"yolox-s\" to \"yolox_s\"\n    module_name = \".\".join([\"yolox\", \"exp\", \"default\", exp])\n    exp_object = importlib.import_module(module_name).Exp()\n    return exp_object\n\n\ndef get_exp(exp_file=None, exp_name=None):\n    \"\"\"\n    get Exp object by file or name. If exp_file and exp_name\n    are both provided, get Exp by exp_file.\n\n    Args:\n        exp_file (str): file path of experiment.\n        exp_name (str): name of experiment. \"yolo-s\",\n    \"\"\"\n    assert (\n        exp_file is not None or exp_name is not None\n    ), \"plz provide exp file or exp name.\"\n    if exp_file is not None:\n        return get_exp_by_file(exp_file)\n    else:\n        return get_exp_by_name(exp_name)\n"
  },
  {
    "path": "yolox/exp/default/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\n# This file is used for package installation and find default exp file\n\nimport sys\nfrom importlib import abc, util\nfrom pathlib import Path\n\n_EXP_PATH = Path(__file__).resolve().parent.parent.parent.parent / \"exps\" / \"default\"\n\nif _EXP_PATH.is_dir():\n    # This is true only for in-place installation (pip install -e, setup.py develop),\n    # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230\n\n    class _ExpFinder(abc.MetaPathFinder):\n        \n        def find_spec(self, name, path, target=None):\n            if not name.startswith(\"yolox.exp.default\"):\n                return\n            project_name = name.split(\".\")[-1] + \".py\"\n            target_file = _EXP_PATH / project_name\n            if not target_file.is_file():\n                return\n            return util.spec_from_file_location(name, target_file)\n\n    sys.meta_path.append(_ExpFinder())\n"
  },
  {
    "path": "yolox/exp/yolox_base.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport os\nimport random\n\nimport torch\nimport torch.distributed as dist\nimport torch.nn as nn\n\nfrom .base_exp import BaseExp\n\n__all__ = [\"Exp\", \"check_exp_value\"]\n\n\nclass Exp(BaseExp):\n    def __init__(self):\n        super().__init__()\n\n        # ---------------- model config ---------------- #\n        # detect classes number of model\n        self.num_classes = 80\n        # factor of model depth\n        self.depth = 1.00\n        # factor of model width\n        self.width = 1.00\n        # activation name. For example, if using \"relu\", then \"silu\" will be replaced to \"relu\".\n        self.act = \"silu\"\n\n        # ---------------- dataloader config ---------------- #\n        # set worker to 4 for shorter dataloader init time\n        # If your training process cost many memory, reduce this value.\n        self.data_num_workers = 4\n        self.input_size = (640, 640)  # (height, width)\n        # Actual multiscale ranges: [640 - 5 * 32, 640 + 5 * 32].\n        # To disable multiscale training, set the value to 0.\n        self.multiscale_range = 5\n        # You can uncomment this line to specify a multiscale range\n        # self.random_size = (14, 26)\n        # dir of dataset images, if data_dir is None, this project will use `datasets` dir\n        self.data_dir = None\n        # name of annotation file for training\n        self.train_ann = \"instances_train2017.json\"\n        # name of annotation file for evaluation\n        self.val_ann = \"instances_val2017.json\"\n        # name of annotation file for testing\n        self.test_ann = \"instances_test2017.json\"\n\n        # --------------- transform config ----------------- #\n        # prob of applying mosaic aug\n        self.mosaic_prob = 1.0\n        # prob of applying mixup aug\n        self.mixup_prob = 1.0\n        # prob of applying hsv aug\n        self.hsv_prob = 1.0\n        # prob of applying flip aug\n        self.flip_prob = 0.5\n        # rotation angle range, for example, if set to 2, the true range is (-2, 2)\n        self.degrees = 10.0\n        # translate range, for example, if set to 0.1, the true range is (-0.1, 0.1)\n        self.translate = 0.1\n        self.mosaic_scale = (0.1, 2)\n        # apply mixup aug or not\n        self.enable_mixup = True\n        self.mixup_scale = (0.5, 1.5)\n        # shear angle range, for example, if set to 2, the true range is (-2, 2)\n        self.shear = 2.0\n\n        # --------------  training config --------------------- #\n        # epoch number used for warmup\n        self.warmup_epochs = 5\n        # max training epoch\n        self.max_epoch = 300\n        # minimum learning rate during warmup\n        self.warmup_lr = 0\n        self.min_lr_ratio = 0.05\n        # learning rate for one image. During training, lr will multiply batchsize.\n        self.basic_lr_per_img = 0.01 / 64.0\n        # name of LRScheduler\n        self.scheduler = \"yoloxwarmcos\"\n        # last #epoch to close augmention like mosaic\n        self.no_aug_epochs = 15\n        # apply EMA during training\n        self.ema = True\n\n        # weight decay of optimizer\n        self.weight_decay = 5e-4\n        # momentum of optimizer\n        self.momentum = 0.9\n        # log period in iter, for example,\n        # if set to 1, user could see log every iteration.\n        self.print_interval = 10\n        # eval period in epoch, for example,\n        # if set to 1, model will be evaluate after every epoch.\n        self.eval_interval = 10\n        # save history checkpoint or not.\n        # If set to False, yolox will only save latest and best ckpt.\n        self.save_history_ckpt = True\n        # name of experiment\n        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(\".\")[0]\n\n        # -----------------  testing config ------------------ #\n        # output image size during evaluation/test\n        self.test_size = (640, 640)\n        # confidence threshold during evaluation/test,\n        # boxes whose scores are less than test_conf will be filtered\n        self.test_conf = 0.01\n        # nms threshold\n        self.nmsthre = 0.65\n\n    def get_model(self):\n        from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead\n\n        def init_yolo(M):\n            for m in M.modules():\n                if isinstance(m, nn.BatchNorm2d):\n                    m.eps = 1e-3\n                    m.momentum = 0.03\n\n        if getattr(self, \"model\", None) is None:\n            in_channels = [256, 512, 1024]\n            backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act)\n            head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act)\n            self.model = YOLOX(backbone, head)\n\n        self.model.apply(init_yolo)\n        self.model.head.initialize_biases(1e-2)\n        self.model.train()\n        return self.model\n\n    def get_dataset(self, cache: bool = False, cache_type: str = \"ram\"):\n        \"\"\"\n        Get dataset according to cache and cache_type parameters.\n        Args:\n            cache (bool): Whether to cache imgs to ram or disk.\n            cache_type (str, optional): Defaults to \"ram\".\n                \"ram\" : Caching imgs to ram for fast training.\n                \"disk\": Caching imgs to disk for fast training.\n        \"\"\"\n        from yolox.data import COCODataset, TrainTransform\n\n        return COCODataset(\n            data_dir=self.data_dir,\n            json_file=self.train_ann,\n            img_size=self.input_size,\n            preproc=TrainTransform(\n                max_labels=50,\n                flip_prob=self.flip_prob,\n                hsv_prob=self.hsv_prob\n            ),\n            cache=cache,\n            cache_type=cache_type,\n        )\n\n    def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img: str = None):\n        \"\"\"\n        Get dataloader according to cache_img parameter.\n        Args:\n            no_aug (bool, optional): Whether to turn off mosaic data enhancement. Defaults to False.\n            cache_img (str, optional): cache_img is equivalent to cache_type. Defaults to None.\n                \"ram\" : Caching imgs to ram for fast training.\n                \"disk\": Caching imgs to disk for fast training.\n                None: Do not use cache, in this case cache_data is also None.\n        \"\"\"\n        from yolox.data import (\n            TrainTransform,\n            YoloBatchSampler,\n            DataLoader,\n            InfiniteSampler,\n            MosaicDetection,\n            worker_init_reset_seed,\n        )\n        from yolox.utils import wait_for_the_master\n\n        # if cache is True, we will create self.dataset before launch\n        # else we will create self.dataset after launch\n        if self.dataset is None:\n            with wait_for_the_master():\n                assert cache_img is None, \\\n                    \"cache_img must be None if you didn't create self.dataset before launch\"\n                self.dataset = self.get_dataset(cache=False, cache_type=cache_img)\n\n        self.dataset = MosaicDetection(\n            dataset=self.dataset,\n            mosaic=not no_aug,\n            img_size=self.input_size,\n            preproc=TrainTransform(\n                max_labels=120,\n                flip_prob=self.flip_prob,\n                hsv_prob=self.hsv_prob),\n            degrees=self.degrees,\n            translate=self.translate,\n            mosaic_scale=self.mosaic_scale,\n            mixup_scale=self.mixup_scale,\n            shear=self.shear,\n            enable_mixup=self.enable_mixup,\n            mosaic_prob=self.mosaic_prob,\n            mixup_prob=self.mixup_prob,\n        )\n\n        if is_distributed:\n            batch_size = batch_size // dist.get_world_size()\n\n        sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)\n\n        batch_sampler = YoloBatchSampler(\n            sampler=sampler,\n            batch_size=batch_size,\n            drop_last=False,\n            mosaic=not no_aug,\n        )\n\n        dataloader_kwargs = {\"num_workers\": self.data_num_workers, \"pin_memory\": True}\n        dataloader_kwargs[\"batch_sampler\"] = batch_sampler\n\n        # Make sure each process has different random seed, especially for 'fork' method.\n        # Check https://github.com/pytorch/pytorch/issues/63311 for more details.\n        dataloader_kwargs[\"worker_init_fn\"] = worker_init_reset_seed\n\n        train_loader = DataLoader(self.dataset, **dataloader_kwargs)\n\n        return train_loader\n\n    def random_resize(self, data_loader, epoch, rank, is_distributed):\n        tensor = torch.LongTensor(2).cuda()\n\n        if rank == 0:\n            size_factor = self.input_size[1] * 1.0 / self.input_size[0]\n            if not hasattr(self, 'random_size'):\n                min_size = int(self.input_size[0] / 32) - self.multiscale_range\n                max_size = int(self.input_size[0] / 32) + self.multiscale_range\n                self.random_size = (min_size, max_size)\n            size = random.randint(*self.random_size)\n            size = (int(32 * size), 32 * int(size * size_factor))\n            tensor[0] = size[0]\n            tensor[1] = size[1]\n\n        if is_distributed:\n            dist.barrier()\n            dist.broadcast(tensor, 0)\n\n        input_size = (tensor[0].item(), tensor[1].item())\n        return input_size\n\n    def preprocess(self, inputs, targets, tsize):\n        scale_y = tsize[0] / self.input_size[0]\n        scale_x = tsize[1] / self.input_size[1]\n        if scale_x != 1 or scale_y != 1:\n            inputs = nn.functional.interpolate(\n                inputs, size=tsize, mode=\"bilinear\", align_corners=False\n            )\n            targets[..., 1::2] = targets[..., 1::2] * scale_x\n            targets[..., 2::2] = targets[..., 2::2] * scale_y\n        return inputs, targets\n\n    def get_optimizer(self, batch_size):\n        if \"optimizer\" not in self.__dict__:\n            if self.warmup_epochs > 0:\n                lr = self.warmup_lr\n            else:\n                lr = self.basic_lr_per_img * batch_size\n\n            pg0, pg1, pg2 = [], [], []  # optimizer parameter groups\n\n            for k, v in self.model.named_modules():\n                if hasattr(v, \"bias\") and isinstance(v.bias, nn.Parameter):\n                    pg2.append(v.bias)  # biases\n                if isinstance(v, nn.BatchNorm2d) or \"bn\" in k:\n                    pg0.append(v.weight)  # no decay\n                elif hasattr(v, \"weight\") and isinstance(v.weight, nn.Parameter):\n                    pg1.append(v.weight)  # apply decay\n\n            optimizer = torch.optim.SGD(\n                pg0, lr=lr, momentum=self.momentum, nesterov=True\n            )\n            optimizer.add_param_group(\n                {\"params\": pg1, \"weight_decay\": self.weight_decay}\n            )  # add pg1 with weight_decay\n            optimizer.add_param_group({\"params\": pg2})\n            self.optimizer = optimizer\n\n        return self.optimizer\n\n    def get_lr_scheduler(self, lr, iters_per_epoch):\n        from yolox.utils import LRScheduler\n\n        scheduler = LRScheduler(\n            self.scheduler,\n            lr,\n            iters_per_epoch,\n            self.max_epoch,\n            warmup_epochs=self.warmup_epochs,\n            warmup_lr_start=self.warmup_lr,\n            no_aug_epochs=self.no_aug_epochs,\n            min_lr_ratio=self.min_lr_ratio,\n        )\n        return scheduler\n\n    def get_eval_dataset(self, **kwargs):\n        from yolox.data import COCODataset, ValTransform\n        testdev = kwargs.get(\"testdev\", False)\n        legacy = kwargs.get(\"legacy\", False)\n\n        return COCODataset(\n            data_dir=self.data_dir,\n            json_file=self.val_ann if not testdev else self.test_ann,\n            name=\"val2017\" if not testdev else \"test2017\",\n            img_size=self.test_size,\n            preproc=ValTransform(legacy=legacy),\n        )\n\n    def get_eval_loader(self, batch_size, is_distributed, **kwargs):\n        valdataset = self.get_eval_dataset(**kwargs)\n\n        if is_distributed:\n            batch_size = batch_size // dist.get_world_size()\n            sampler = torch.utils.data.distributed.DistributedSampler(\n                valdataset, shuffle=False\n            )\n        else:\n            sampler = torch.utils.data.SequentialSampler(valdataset)\n\n        dataloader_kwargs = {\n            \"num_workers\": self.data_num_workers,\n            \"pin_memory\": True,\n            \"sampler\": sampler,\n        }\n        dataloader_kwargs[\"batch_size\"] = batch_size\n        val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)\n\n        return val_loader\n\n    def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):\n        from yolox.evaluators import COCOEvaluator\n\n        return COCOEvaluator(\n            dataloader=self.get_eval_loader(batch_size, is_distributed,\n                                            testdev=testdev, legacy=legacy),\n            img_size=self.test_size,\n            confthre=self.test_conf,\n            nmsthre=self.nmsthre,\n            num_classes=self.num_classes,\n            testdev=testdev,\n        )\n\n    def get_trainer(self, args):\n        from yolox.core import Trainer\n        trainer = Trainer(self, args)\n        # NOTE: trainer shouldn't be an attribute of exp object\n        return trainer\n\n    def eval(self, model, evaluator, is_distributed, half=False, return_outputs=False):\n        return evaluator.evaluate(model, is_distributed, half, return_outputs=return_outputs)\n\n\ndef check_exp_value(exp: Exp):\n    h, w = exp.input_size\n    assert h % 32 == 0 and w % 32 == 0, \"input size must be multiples of 32\"\n"
  },
  {
    "path": "yolox/layers/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\n# import torch first to make jit op work without `ImportError of libc10.so`\nimport torch  # noqa\n\nfrom .jit_ops import FastCOCOEvalOp, JitOp\n\ntry:\n    from .fast_coco_eval_api import COCOeval_opt\nexcept ImportError:  #  exception will be raised when users build yolox from source\n    pass\n"
  },
  {
    "path": "yolox/layers/cocoeval/cocoeval.cpp",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n#include \"cocoeval.h\"\n#include <time.h>\n#include <algorithm>\n#include <cstdint>\n#include <numeric>\n\nusing namespace pybind11::literals;\n\nnamespace COCOeval {\n\n// Sort detections from highest score to lowest, such that\n// detection_instances[detection_sorted_indices[t]] >=\n// detection_instances[detection_sorted_indices[t+1]].  Use stable_sort to match\n// original COCO API\nvoid SortInstancesByDetectionScore(\n    const std::vector<InstanceAnnotation>& detection_instances,\n    std::vector<uint64_t>* detection_sorted_indices) {\n  detection_sorted_indices->resize(detection_instances.size());\n  std::iota(\n      detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);\n  std::stable_sort(\n      detection_sorted_indices->begin(),\n      detection_sorted_indices->end(),\n      [&detection_instances](size_t j1, size_t j2) {\n        return detection_instances[j1].score > detection_instances[j2].score;\n      });\n}\n\n// Partition the ground truth objects based on whether or not to ignore them\n// based on area\nvoid SortInstancesByIgnore(\n    const std::array<double, 2>& area_range,\n    const std::vector<InstanceAnnotation>& ground_truth_instances,\n    std::vector<uint64_t>* ground_truth_sorted_indices,\n    std::vector<bool>* ignores) {\n  ignores->clear();\n  ignores->reserve(ground_truth_instances.size());\n  for (auto o : ground_truth_instances) {\n    ignores->push_back(\n        o.ignore || o.area < area_range[0] || o.area > area_range[1]);\n  }\n\n  ground_truth_sorted_indices->resize(ground_truth_instances.size());\n  std::iota(\n      ground_truth_sorted_indices->begin(),\n      ground_truth_sorted_indices->end(),\n      0);\n  std::stable_sort(\n      ground_truth_sorted_indices->begin(),\n      ground_truth_sorted_indices->end(),\n      [&ignores](size_t j1, size_t j2) {\n        return (int)(*ignores)[j1] < (int)(*ignores)[j2];\n      });\n}\n\n// For each IOU threshold, greedily match each detected instance to a ground\n// truth instance (if possible) and store the results\nvoid MatchDetectionsToGroundTruth(\n    const std::vector<InstanceAnnotation>& detection_instances,\n    const std::vector<uint64_t>& detection_sorted_indices,\n    const std::vector<InstanceAnnotation>& ground_truth_instances,\n    const std::vector<uint64_t>& ground_truth_sorted_indices,\n    const std::vector<bool>& ignores,\n    const std::vector<std::vector<double>>& ious,\n    const std::vector<double>& iou_thresholds,\n    const std::array<double, 2>& area_range,\n    ImageEvaluation* results) {\n  // Initialize memory to store return data matches and ignore\n  const int num_iou_thresholds = iou_thresholds.size();\n  const int num_ground_truth = ground_truth_sorted_indices.size();\n  const int num_detections = detection_sorted_indices.size();\n  std::vector<uint64_t> ground_truth_matches(\n      num_iou_thresholds * num_ground_truth, 0);\n  std::vector<uint64_t>& detection_matches = results->detection_matches;\n  std::vector<bool>& detection_ignores = results->detection_ignores;\n  std::vector<bool>& ground_truth_ignores = results->ground_truth_ignores;\n  detection_matches.resize(num_iou_thresholds * num_detections, 0);\n  detection_ignores.resize(num_iou_thresholds * num_detections, false);\n  ground_truth_ignores.resize(num_ground_truth);\n  for (auto g = 0; g < num_ground_truth; ++g) {\n    ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]];\n  }\n\n  for (auto t = 0; t < num_iou_thresholds; ++t) {\n    for (auto d = 0; d < num_detections; ++d) {\n      // information about best match so far (match=-1 -> unmatched)\n      double best_iou = std::min(iou_thresholds[t], 1 - 1e-10);\n      int match = -1;\n      for (auto g = 0; g < num_ground_truth; ++g) {\n        // if this ground truth instance is already matched and not a\n        // crowd, it cannot be matched to another detection\n        if (ground_truth_matches[t * num_ground_truth + g] > 0 &&\n            !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) {\n          continue;\n        }\n\n        // if detected instance matched to a regular ground truth\n        // instance, we can break on the first ground truth instance\n        // tagged as ignore (because they are sorted by the ignore tag)\n        if (match >= 0 && !ground_truth_ignores[match] &&\n            ground_truth_ignores[g]) {\n          break;\n        }\n\n        // if IOU overlap is the best so far, store the match appropriately\n        if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) {\n          best_iou = ious[d][ground_truth_sorted_indices[g]];\n          match = g;\n        }\n      }\n      // if match was made, store id of match for both detection and\n      // ground truth\n      if (match >= 0) {\n        detection_ignores[t * num_detections + d] = ground_truth_ignores[match];\n        detection_matches[t * num_detections + d] =\n            ground_truth_instances[ground_truth_sorted_indices[match]].id;\n        ground_truth_matches[t * num_ground_truth + match] =\n            detection_instances[detection_sorted_indices[d]].id;\n      }\n\n      // set unmatched detections outside of area range to ignore\n      const InstanceAnnotation& detection =\n          detection_instances[detection_sorted_indices[d]];\n      detection_ignores[t * num_detections + d] =\n          detection_ignores[t * num_detections + d] ||\n          (detection_matches[t * num_detections + d] == 0 &&\n           (detection.area < area_range[0] || detection.area > area_range[1]));\n    }\n  }\n\n  // store detection score results\n  results->detection_scores.resize(detection_sorted_indices.size());\n  for (size_t d = 0; d < detection_sorted_indices.size(); ++d) {\n    results->detection_scores[d] =\n        detection_instances[detection_sorted_indices[d]].score;\n  }\n}\n\nstd::vector<ImageEvaluation> EvaluateImages(\n    const std::vector<std::array<double, 2>>& area_ranges,\n    int max_detections,\n    const std::vector<double>& iou_thresholds,\n    const ImageCategoryInstances<std::vector<double>>& image_category_ious,\n    const ImageCategoryInstances<InstanceAnnotation>&\n        image_category_ground_truth_instances,\n    const ImageCategoryInstances<InstanceAnnotation>&\n        image_category_detection_instances) {\n  const int num_area_ranges = area_ranges.size();\n  const int num_images = image_category_ground_truth_instances.size();\n  const int num_categories =\n      image_category_ious.size() > 0 ? image_category_ious[0].size() : 0;\n  std::vector<uint64_t> detection_sorted_indices;\n  std::vector<uint64_t> ground_truth_sorted_indices;\n  std::vector<bool> ignores;\n  std::vector<ImageEvaluation> results_all(\n      num_images * num_area_ranges * num_categories);\n\n  // Store results for each image, category, and area range combination. Results\n  // for each IOU threshold are packed into the same ImageEvaluation object\n  for (auto i = 0; i < num_images; ++i) {\n    for (auto c = 0; c < num_categories; ++c) {\n      const std::vector<InstanceAnnotation>& ground_truth_instances =\n          image_category_ground_truth_instances[i][c];\n      const std::vector<InstanceAnnotation>& detection_instances =\n          image_category_detection_instances[i][c];\n\n      SortInstancesByDetectionScore(\n          detection_instances, &detection_sorted_indices);\n      if ((int)detection_sorted_indices.size() > max_detections) {\n        detection_sorted_indices.resize(max_detections);\n      }\n\n      for (size_t a = 0; a < area_ranges.size(); ++a) {\n        SortInstancesByIgnore(\n            area_ranges[a],\n            ground_truth_instances,\n            &ground_truth_sorted_indices,\n            &ignores);\n\n        MatchDetectionsToGroundTruth(\n            detection_instances,\n            detection_sorted_indices,\n            ground_truth_instances,\n            ground_truth_sorted_indices,\n            ignores,\n            image_category_ious[i][c],\n            iou_thresholds,\n            area_ranges[a],\n            &results_all\n                [c * num_area_ranges * num_images + a * num_images + i]);\n      }\n    }\n  }\n\n  return results_all;\n}\n\n// Convert a python list to a vector\ntemplate <typename T>\nstd::vector<T> list_to_vec(const py::list& l) {\n  std::vector<T> v(py::len(l));\n  for (int i = 0; i < (int)py::len(l); ++i) {\n    v[i] = l[i].cast<T>();\n  }\n  return v;\n}\n\n// Helper function to Accumulate()\n// Considers the evaluation results applicable to a particular category, area\n// range, and max_detections parameter setting, which begin at\n// evaluations[evaluation_index].  Extracts a sorted list of length n of all\n// applicable detection instances concatenated across all images in the dataset,\n// which are represented by the outputs evaluation_indices, detection_scores,\n// image_detection_indices, and detection_sorted_indices--all of which are\n// length n. evaluation_indices[i] stores the applicable index into\n// evaluations[] for instance i, which has detection score detection_score[i],\n// and is the image_detection_indices[i]'th of the list of detections\n// for the image containing i.  detection_sorted_indices[] defines a sorted\n// permutation of the 3 other outputs\nint BuildSortedDetectionList(\n    const std::vector<ImageEvaluation>& evaluations,\n    const int64_t evaluation_index,\n    const int64_t num_images,\n    const int max_detections,\n    std::vector<uint64_t>* evaluation_indices,\n    std::vector<double>* detection_scores,\n    std::vector<uint64_t>* detection_sorted_indices,\n    std::vector<uint64_t>* image_detection_indices) {\n  assert(evaluations.size() >= evaluation_index + num_images);\n\n  // Extract a list of object instances of the applicable category, area\n  // range, and max detections requirements such that they can be sorted\n  image_detection_indices->clear();\n  evaluation_indices->clear();\n  detection_scores->clear();\n  image_detection_indices->reserve(num_images * max_detections);\n  evaluation_indices->reserve(num_images * max_detections);\n  detection_scores->reserve(num_images * max_detections);\n  int num_valid_ground_truth = 0;\n  for (auto i = 0; i < num_images; ++i) {\n    const ImageEvaluation& evaluation = evaluations[evaluation_index + i];\n\n    for (int d = 0;\n         d < (int)evaluation.detection_scores.size() && d < max_detections;\n         ++d) { // detected instances\n      evaluation_indices->push_back(evaluation_index + i);\n      image_detection_indices->push_back(d);\n      detection_scores->push_back(evaluation.detection_scores[d]);\n    }\n    for (auto ground_truth_ignore : evaluation.ground_truth_ignores) {\n      if (!ground_truth_ignore) {\n        ++num_valid_ground_truth;\n      }\n    }\n  }\n\n  // Sort detections by decreasing score, using stable sort to match\n  // python implementation\n  detection_sorted_indices->resize(detection_scores->size());\n  std::iota(\n      detection_sorted_indices->begin(), detection_sorted_indices->end(), 0);\n  std::stable_sort(\n      detection_sorted_indices->begin(),\n      detection_sorted_indices->end(),\n      [&detection_scores](size_t j1, size_t j2) {\n        return (*detection_scores)[j1] > (*detection_scores)[j2];\n      });\n\n  return num_valid_ground_truth;\n}\n\n// Helper function to Accumulate()\n// Compute a precision recall curve given a sorted list of detected instances\n// encoded in evaluations, evaluation_indices, detection_scores,\n// detection_sorted_indices, image_detection_indices (see\n// BuildSortedDetectionList()). Using vectors precisions and recalls\n// and temporary storage, output the results into precisions_out, recalls_out,\n// and scores_out, which are large buffers containing many precion/recall curves\n// for all possible parameter settings, with precisions_out_index and\n// recalls_out_index defining the applicable indices to store results.\nvoid ComputePrecisionRecallCurve(\n    const int64_t precisions_out_index,\n    const int64_t precisions_out_stride,\n    const int64_t recalls_out_index,\n    const std::vector<double>& recall_thresholds,\n    const int iou_threshold_index,\n    const int num_iou_thresholds,\n    const int num_valid_ground_truth,\n    const std::vector<ImageEvaluation>& evaluations,\n    const std::vector<uint64_t>& evaluation_indices,\n    const std::vector<double>& detection_scores,\n    const std::vector<uint64_t>& detection_sorted_indices,\n    const std::vector<uint64_t>& image_detection_indices,\n    std::vector<double>* precisions,\n    std::vector<double>* recalls,\n    std::vector<double>* precisions_out,\n    std::vector<double>* scores_out,\n    std::vector<double>* recalls_out) {\n  assert(recalls_out->size() > recalls_out_index);\n\n  // Compute precision/recall for each instance in the sorted list of detections\n  int64_t true_positives_sum = 0, false_positives_sum = 0;\n  precisions->clear();\n  recalls->clear();\n  precisions->reserve(detection_sorted_indices.size());\n  recalls->reserve(detection_sorted_indices.size());\n  assert(!evaluations.empty() || detection_sorted_indices.empty());\n  for (auto detection_sorted_index : detection_sorted_indices) {\n    const ImageEvaluation& evaluation =\n        evaluations[evaluation_indices[detection_sorted_index]];\n    const auto num_detections =\n        evaluation.detection_matches.size() / num_iou_thresholds;\n    const auto detection_index = iou_threshold_index * num_detections +\n        image_detection_indices[detection_sorted_index];\n    assert(evaluation.detection_matches.size() > detection_index);\n    assert(evaluation.detection_ignores.size() > detection_index);\n    const int64_t detection_match =\n        evaluation.detection_matches[detection_index];\n    const bool detection_ignores =\n        evaluation.detection_ignores[detection_index];\n    const auto true_positive = detection_match > 0 && !detection_ignores;\n    const auto false_positive = detection_match == 0 && !detection_ignores;\n    if (true_positive) {\n      ++true_positives_sum;\n    }\n    if (false_positive) {\n      ++false_positives_sum;\n    }\n\n    const double recall =\n        static_cast<double>(true_positives_sum) / num_valid_ground_truth;\n    recalls->push_back(recall);\n    const int64_t num_valid_detections =\n        true_positives_sum + false_positives_sum;\n    const double precision = num_valid_detections > 0\n        ? static_cast<double>(true_positives_sum) / num_valid_detections\n        : 0.0;\n    precisions->push_back(precision);\n  }\n\n  (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0;\n\n  for (int64_t i = static_cast<int64_t>(precisions->size()) - 1; i > 0; --i) {\n    if ((*precisions)[i] > (*precisions)[i - 1]) {\n      (*precisions)[i - 1] = (*precisions)[i];\n    }\n  }\n\n  // Sample the per instance precision/recall list at each recall threshold\n  for (size_t r = 0; r < recall_thresholds.size(); ++r) {\n    // first index in recalls >= recall_thresholds[r]\n    std::vector<double>::iterator low = std::lower_bound(\n        recalls->begin(), recalls->end(), recall_thresholds[r]);\n    size_t precisions_index = low - recalls->begin();\n\n    const auto results_ind = precisions_out_index + r * precisions_out_stride;\n    assert(results_ind < precisions_out->size());\n    assert(results_ind < scores_out->size());\n    if (precisions_index < precisions->size()) {\n      (*precisions_out)[results_ind] = (*precisions)[precisions_index];\n      (*scores_out)[results_ind] =\n          detection_scores[detection_sorted_indices[precisions_index]];\n    } else {\n      (*precisions_out)[results_ind] = 0;\n      (*scores_out)[results_ind] = 0;\n    }\n  }\n}\npy::dict Accumulate(\n    const py::object& params,\n    const std::vector<ImageEvaluation>& evaluations) {\n  const std::vector<double> recall_thresholds =\n      list_to_vec<double>(params.attr(\"recThrs\"));\n  const std::vector<int> max_detections =\n      list_to_vec<int>(params.attr(\"maxDets\"));\n  const int num_iou_thresholds = py::len(params.attr(\"iouThrs\"));\n  const int num_recall_thresholds = py::len(params.attr(\"recThrs\"));\n  const int num_categories = params.attr(\"useCats\").cast<int>() == 1\n      ? py::len(params.attr(\"catIds\"))\n      : 1;\n  const int num_area_ranges = py::len(params.attr(\"areaRng\"));\n  const int num_max_detections = py::len(params.attr(\"maxDets\"));\n  const int num_images = py::len(params.attr(\"imgIds\"));\n\n  std::vector<double> precisions_out(\n      num_iou_thresholds * num_recall_thresholds * num_categories *\n          num_area_ranges * num_max_detections,\n      -1);\n  std::vector<double> recalls_out(\n      num_iou_thresholds * num_categories * num_area_ranges *\n          num_max_detections,\n      -1);\n  std::vector<double> scores_out(\n      num_iou_thresholds * num_recall_thresholds * num_categories *\n          num_area_ranges * num_max_detections,\n      -1);\n\n  // Consider the list of all detected instances in the entire dataset in one\n  // large list.  evaluation_indices, detection_scores,\n  // image_detection_indices, and detection_sorted_indices all have the same\n  // length as this list, such that each entry corresponds to one detected\n  // instance\n  std::vector<uint64_t> evaluation_indices; // indices into evaluations[]\n  std::vector<double> detection_scores; // detection scores of each instance\n  std::vector<uint64_t> detection_sorted_indices; // sorted indices of all\n                                                  // instances in the dataset\n  std::vector<uint64_t>\n      image_detection_indices; // indices into the list of detected instances in\n                               // the same image as each instance\n  std::vector<double> precisions, recalls;\n\n  for (auto c = 0; c < num_categories; ++c) {\n    for (auto a = 0; a < num_area_ranges; ++a) {\n      for (auto m = 0; m < num_max_detections; ++m) {\n        // The COCO PythonAPI assumes evaluations[] (the return value of\n        // COCOeval::EvaluateImages() is one long list storing results for each\n        // combination of category, area range, and image id, with categories in\n        // the outermost loop and images in the innermost loop.\n        const int64_t evaluations_index =\n            c * num_area_ranges * num_images + a * num_images;\n        int num_valid_ground_truth = BuildSortedDetectionList(\n            evaluations,\n            evaluations_index,\n            num_images,\n            max_detections[m],\n            &evaluation_indices,\n            &detection_scores,\n            &detection_sorted_indices,\n            &image_detection_indices);\n\n        if (num_valid_ground_truth == 0) {\n          continue;\n        }\n\n        for (auto t = 0; t < num_iou_thresholds; ++t) {\n          // recalls_out is a flattened vectors representing a\n          // num_iou_thresholds X num_categories X num_area_ranges X\n          // num_max_detections matrix\n          const int64_t recalls_out_index =\n              t * num_categories * num_area_ranges * num_max_detections +\n              c * num_area_ranges * num_max_detections +\n              a * num_max_detections + m;\n\n          // precisions_out and scores_out are flattened vectors\n          // representing a num_iou_thresholds X num_recall_thresholds X\n          // num_categories X num_area_ranges X num_max_detections matrix\n          const int64_t precisions_out_stride =\n              num_categories * num_area_ranges * num_max_detections;\n          const int64_t precisions_out_index = t * num_recall_thresholds *\n                  num_categories * num_area_ranges * num_max_detections +\n              c * num_area_ranges * num_max_detections +\n              a * num_max_detections + m;\n\n          ComputePrecisionRecallCurve(\n              precisions_out_index,\n              precisions_out_stride,\n              recalls_out_index,\n              recall_thresholds,\n              t,\n              num_iou_thresholds,\n              num_valid_ground_truth,\n              evaluations,\n              evaluation_indices,\n              detection_scores,\n              detection_sorted_indices,\n              image_detection_indices,\n              &precisions,\n              &recalls,\n              &precisions_out,\n              &scores_out,\n              &recalls_out);\n        }\n      }\n    }\n  }\n\n  time_t rawtime;\n  struct tm local_time;\n  std::array<char, 200> buffer;\n  time(&rawtime);\n#ifdef _WIN32\n  localtime_s(&local_time, &rawtime);\n#else\n  localtime_r(&rawtime, &local_time);\n#endif\n  strftime(\n      buffer.data(), 200, \"%Y-%m-%d %H:%num_max_detections:%S\", &local_time);\n  return py::dict(\n      \"params\"_a = params,\n      \"counts\"_a = std::vector<int64_t>({num_iou_thresholds,\n                                         num_recall_thresholds,\n                                         num_categories,\n                                         num_area_ranges,\n                                         num_max_detections}),\n      \"date\"_a = buffer,\n      \"precision\"_a = precisions_out,\n      \"recall\"_a = recalls_out,\n      \"scores\"_a = scores_out);\n}\n\n} // namespace COCOeval\n"
  },
  {
    "path": "yolox/layers/cocoeval/cocoeval.h",
    "content": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n#pragma once\n\n#include <pybind11/numpy.h>\n#include <pybind11/pybind11.h>\n#include <pybind11/stl.h>\n#include <pybind11/stl_bind.h>\n#include <vector>\n\nnamespace py = pybind11;\n\nnamespace COCOeval {\n\n// Annotation data for a single object instance in an image\nstruct InstanceAnnotation {\n  InstanceAnnotation(\n      uint64_t id,\n      double score,\n      double area,\n      bool is_crowd,\n      bool ignore)\n      : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {}\n  uint64_t id;\n  double score = 0.;\n  double area = 0.;\n  bool is_crowd = false;\n  bool ignore = false;\n};\n\n// Stores intermediate results for evaluating detection results for a single\n// image that has D detected instances and G ground truth instances. This stores\n// matches between detected and ground truth instances\nstruct ImageEvaluation {\n  // For each of the D detected instances, the id of the matched ground truth\n  // instance, or 0 if unmatched\n  std::vector<uint64_t> detection_matches;\n\n  // The detection score of each of the D detected instances\n  std::vector<double> detection_scores;\n\n  // Marks whether or not each of G instances was ignored from evaluation (e.g.,\n  // because it's outside area_range)\n  std::vector<bool> ground_truth_ignores;\n\n  // Marks whether or not each of D instances was ignored from evaluation (e.g.,\n  // because it's outside aRng)\n  std::vector<bool> detection_ignores;\n};\n\ntemplate <class T>\nusing ImageCategoryInstances = std::vector<std::vector<std::vector<T>>>;\n\n// C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg().  For each\n// combination of image, category, area range settings, and IOU thresholds to\n// evaluate, it matches detected instances to ground truth instances and stores\n// the results into a vector of ImageEvaluation results, which will be\n// interpreted by the COCOeval::Accumulate() function to produce precion-recall\n// curves.  The parameters of nested vectors have the following semantics:\n//   image_category_ious[i][c][d][g] is the intersection over union of the d'th\n//     detected instance and g'th ground truth instance of\n//     category category_ids[c] in image image_ids[i]\n//   image_category_ground_truth_instances[i][c] is a vector of ground truth\n//     instances in image image_ids[i] of category category_ids[c]\n//   image_category_detection_instances[i][c] is a vector of detected\n//     instances in image image_ids[i] of category category_ids[c]\nstd::vector<ImageEvaluation> EvaluateImages(\n    const std::vector<std::array<double, 2>>& area_ranges, // vector of 2-tuples\n    int max_detections,\n    const std::vector<double>& iou_thresholds,\n    const ImageCategoryInstances<std::vector<double>>& image_category_ious,\n    const ImageCategoryInstances<InstanceAnnotation>&\n        image_category_ground_truth_instances,\n    const ImageCategoryInstances<InstanceAnnotation>&\n        image_category_detection_instances);\n\n// C++ implementation of COCOeval.accumulate(), which generates precision\n// recall curves for each set of category, IOU threshold, detection area range,\n// and max number of detections parameters.  It is assumed that the parameter\n// evaluations is the return value of the functon COCOeval::EvaluateImages(),\n// which was called with the same parameter settings params\npy::dict Accumulate(\n    const py::object& params,\n    const std::vector<ImageEvaluation>& evalutations);\n\n} // namespace COCOeval\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m)\n{\n    m.def(\"COCOevalAccumulate\", &COCOeval::Accumulate, \"COCOeval::Accumulate\");\n    m.def(\n        \"COCOevalEvaluateImages\",\n        &COCOeval::EvaluateImages,\n        \"COCOeval::EvaluateImages\");\n    pybind11::class_<COCOeval::InstanceAnnotation>(m, \"InstanceAnnotation\")\n        .def(pybind11::init<uint64_t, double, double, bool, bool>());\n    pybind11::class_<COCOeval::ImageEvaluation>(m, \"ImageEvaluation\")\n        .def(pybind11::init<>());\n}\n"
  },
  {
    "path": "yolox/layers/fast_coco_eval_api.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# This file comes from\n# https://github.com/facebookresearch/detectron2/blob/master/detectron2/evaluation/fast_eval_api.py\n# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport copy\nimport time\n\nimport numpy as np\nfrom pycocotools.cocoeval import COCOeval\n\nfrom .jit_ops import FastCOCOEvalOp\n\n\nclass COCOeval_opt(COCOeval):\n    \"\"\"\n    This is a slightly modified version of the original COCO API, where the functions evaluateImg()\n    and accumulate() are implemented in C++ to speedup evaluation\n    \"\"\"\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.module = FastCOCOEvalOp().load()\n\n    def evaluate(self):\n        \"\"\"\n        Run per image evaluation on given images and store results in self.evalImgs_cpp, a\n        datastructure that isn't readable from Python but is used by a c++ implementation of\n        accumulate().  Unlike the original COCO PythonAPI, we don't populate the datastructure\n        self.evalImgs because this datastructure is a computational bottleneck.\n        :return: None\n        \"\"\"\n        tic = time.time()\n\n        print(\"Running per image evaluation...\")\n        p = self.params\n        # add backward compatibility if useSegm is specified in params\n        if p.useSegm is not None:\n            p.iouType = \"segm\" if p.useSegm == 1 else \"bbox\"\n            print(\n                \"useSegm (deprecated) is not None. Running {} evaluation\".format(\n                    p.iouType\n                )\n            )\n        print(\"Evaluate annotation type *{}*\".format(p.iouType))\n        p.imgIds = list(np.unique(p.imgIds))\n        if p.useCats:\n            p.catIds = list(np.unique(p.catIds))\n        p.maxDets = sorted(p.maxDets)\n        self.params = p\n\n        self._prepare()\n\n        # loop through images, area range, max detection number\n        catIds = p.catIds if p.useCats else [-1]\n\n        if p.iouType == \"segm\" or p.iouType == \"bbox\":\n            computeIoU = self.computeIoU\n        elif p.iouType == \"keypoints\":\n            computeIoU = self.computeOks\n        self.ious = {\n            (imgId, catId): computeIoU(imgId, catId)\n            for imgId in p.imgIds\n            for catId in catIds\n        }\n\n        maxDet = p.maxDets[-1]\n\n        # <<<< Beginning of code differences with original COCO API\n        def convert_instances_to_cpp(instances, is_det=False):\n            # Convert annotations for a list of instances in an image to a format that's fast\n            # to access in C++\n            instances_cpp = []\n            for instance in instances:\n                instance_cpp = self.module.InstanceAnnotation(\n                    int(instance[\"id\"]),\n                    instance[\"score\"] if is_det else instance.get(\"score\", 0.0),\n                    instance[\"area\"],\n                    bool(instance.get(\"iscrowd\", 0)),\n                    bool(instance.get(\"ignore\", 0)),\n                )\n                instances_cpp.append(instance_cpp)\n            return instances_cpp\n\n        # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++\n        ground_truth_instances = [\n            [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]\n            for imgId in p.imgIds\n        ]\n        detected_instances = [\n            [\n                convert_instances_to_cpp(self._dts[imgId, catId], is_det=True)\n                for catId in p.catIds\n            ]\n            for imgId in p.imgIds\n        ]\n        ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]\n\n        if not p.useCats:\n            # For each image, flatten per-category lists into a single list\n            ground_truth_instances = [\n                [[o for c in i for o in c]] for i in ground_truth_instances\n            ]\n            detected_instances = [\n                [[o for c in i for o in c]] for i in detected_instances\n            ]\n\n        # Call C++ implementation of self.evaluateImgs()\n        self._evalImgs_cpp = self.module.COCOevalEvaluateImages(\n            p.areaRng,\n            maxDet,\n            p.iouThrs,\n            ious,\n            ground_truth_instances,\n            detected_instances,\n        )\n        self._evalImgs = None\n\n        self._paramsEval = copy.deepcopy(self.params)\n        toc = time.time()\n        print(\"COCOeval_opt.evaluate() finished in {:0.2f} seconds.\".format(toc - tic))\n        # >>>> End of code differences with original COCO API\n\n    def accumulate(self):\n        \"\"\"\n        Accumulate per image evaluation results and store the result in self.eval.  Does not\n        support changing parameter settings from those used by self.evaluate()\n        \"\"\"\n        print(\"Accumulating evaluation results...\")\n        tic = time.time()\n        if not hasattr(self, \"_evalImgs_cpp\"):\n            print(\"Please run evaluate() first\")\n\n        self.eval = self.module.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)\n\n        # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections\n        self.eval[\"recall\"] = np.array(self.eval[\"recall\"]).reshape(\n            self.eval[\"counts\"][:1] + self.eval[\"counts\"][2:]\n        )\n\n        # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X\n        # num_area_ranges X num_max_detections\n        self.eval[\"precision\"] = np.array(self.eval[\"precision\"]).reshape(\n            self.eval[\"counts\"]\n        )\n        self.eval[\"scores\"] = np.array(self.eval[\"scores\"]).reshape(self.eval[\"counts\"])\n        toc = time.time()\n        print(\n            \"COCOeval_opt.accumulate() finished in {:0.2f} seconds.\".format(toc - tic)\n        )\n"
  },
  {
    "path": "yolox/layers/jit_ops.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved\n\nimport glob\nimport importlib\nimport os\nimport sys\nimport time\nfrom typing import List\n\n__all__ = [\"JitOp\", \"FastCOCOEvalOp\"]\n\n\nclass JitOp:\n    \"\"\"\n    Just-in-time compilation of ops.\n\n    Some code of `JitOp` is inspired by `deepspeed.op_builder`,\n    check the following link for more details:\n    https://github.com/microsoft/DeepSpeed/blob/master/op_builder/builder.py\n    \"\"\"\n\n    def __init__(self, name):\n        self.name = name\n\n    def absolute_name(self) -> str:\n        \"\"\"Get absolute build path for cases where the op is pre-installed.\"\"\"\n        pass\n\n    def sources(self) -> List:\n        \"\"\"Get path list of source files of op.\n\n        NOTE: the path should be elative to root of package during building,\n            Otherwise, exception will be raised when building package.\n            However, for runtime building, path will be absolute.\n        \"\"\"\n        pass\n\n    def include_dirs(self) -> List:\n        \"\"\"\n        Get list of include paths, relative to root of package.\n\n        NOTE: the path should be elative to root of package.\n            Otherwise, exception will be raised when building package.\n        \"\"\"\n        return []\n\n    def define_macros(self) -> List:\n        \"\"\"Get list of macros to define for op\"\"\"\n        return []\n\n    def cxx_args(self) -> List:\n        \"\"\"Get optional list of compiler flags to forward\"\"\"\n        args = [\"-O2\"] if sys.platform == \"win32\" else [\"-O3\", \"-std=c++14\", \"-g\", \"-Wno-reorder\"]\n        return args\n\n    def nvcc_args(self) -> List:\n        \"\"\"Get optional list of compiler flags to forward to nvcc when building CUDA sources\"\"\"\n        args = [\n            \"-O3\", \"--use_fast_math\",\n            \"-std=c++17\" if sys.platform == \"win32\" else \"-std=c++14\",\n            \"-U__CUDA_NO_HALF_OPERATORS__\",\n            \"-U__CUDA_NO_HALF_CONVERSIONS__\",\n            \"-U__CUDA_NO_HALF2_OPERATORS__\",\n        ]\n        return args\n\n    def build_op(self):\n        from torch.utils.cpp_extension import CppExtension\n        return CppExtension(\n            name=self.absolute_name(),\n            sources=self.sources(),\n            include_dirs=self.include_dirs(),\n            define_macros=self.define_macros(),\n            extra_compile_args={\n                \"cxx\": self.cxx_args(),\n            },\n        )\n\n    def load(self, verbose=True):\n        try:\n            # try to import op from pre-installed package\n            return importlib.import_module(self.absolute_name())\n        except Exception:  # op not compiled, jit load\n            from yolox.utils import wait_for_the_master\n            with wait_for_the_master():  # to avoid race condition\n                return self.jit_load(verbose)\n\n    def jit_load(self, verbose=True):\n        from torch.utils.cpp_extension import load\n        from loguru import logger\n        try:\n            import ninja  # noqa\n        except ImportError:\n            if verbose:\n                logger.warning(\n                    f\"Ninja is not installed, fall back to normal installation for {self.name}.\"\n                )\n\n        build_tik = time.time()\n        # build op and load\n        op_module = load(\n            name=self.name,\n            sources=self.sources(),\n            extra_cflags=self.cxx_args(),\n            extra_cuda_cflags=self.nvcc_args(),\n            verbose=verbose,\n        )\n        build_duration = time.time() - build_tik\n        if verbose:\n            logger.info(f\"Load {self.name} op in {build_duration:.3f}s.\")\n        return op_module\n\n    def clear_dynamic_library(self):\n        \"\"\"Remove dynamic libraray files generated by JIT compilation.\"\"\"\n        module = self.load()\n        os.remove(module.__file__)\n\n\nclass FastCOCOEvalOp(JitOp):\n\n    def __init__(self, name=\"fast_cocoeval\"):\n        super().__init__(name=name)\n\n    def absolute_name(self):\n        return f'yolox.layers.{self.name}'\n\n    def sources(self):\n        sources = glob.glob(os.path.join(\"yolox\", \"layers\", \"cocoeval\", \"*.cpp\"))\n        if not sources:  # source will be empty list if the so file is removed after install\n            # use abosolute path to compile\n            import yolox\n            code_path = os.path.join(yolox.__path__[0], \"layers\", \"cocoeval\", \"*.cpp\")\n            sources = glob.glob(code_path)\n        return sources\n\n    def include_dirs(self):\n        return [os.path.join(\"yolox\", \"layers\", \"cocoeval\")]\n"
  },
  {
    "path": "yolox/models/__init__.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nfrom .build import *\nfrom .darknet import CSPDarknet, Darknet\nfrom .losses import IOUloss\nfrom .yolo_fpn import YOLOFPN\nfrom .yolo_head import YOLOXHead\nfrom .yolo_pafpn import YOLOPAFPN\nfrom .yolox import YOLOX\n"
  },
  {
    "path": "yolox/models/build.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n\nimport torch\nfrom torch import nn\nfrom torch.hub import load_state_dict_from_url\n\n__all__ = [\n    \"create_yolox_model\",\n    \"yolox_nano\",\n    \"yolox_tiny\",\n    \"yolox_s\",\n    \"yolox_m\",\n    \"yolox_l\",\n    \"yolox_x\",\n    \"yolov3\",\n    \"yolox_custom\"\n]\n\n_CKPT_ROOT_URL = \"https://github.com/Megvii-BaseDetection/YOLOX/releases/download\"\n_CKPT_FULL_PATH = {\n    \"yolox-nano\": f\"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_nano.pth\",\n    \"yolox-tiny\": f\"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_tiny.pth\",\n    \"yolox-s\": f\"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_s.pth\",\n    \"yolox-m\": f\"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_m.pth\",\n    \"yolox-l\": f\"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_l.pth\",\n    \"yolox-x\": f\"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_x.pth\",\n    \"yolov3\": f\"{_CKPT_ROOT_URL}/0.1.1rc0/yolox_darknet.pth\",\n}\n\n\ndef create_yolox_model(name: str, pretrained: bool = True, num_classes: int = 80, device=None,\n                       exp_path: str = None, ckpt_path: str = None) -> nn.Module:\n    \"\"\"creates and loads a YOLOX model\n\n    Args:\n        name (str): name of model. for example, \"yolox-s\", \"yolox-tiny\" or \"yolox_custom\"\n        if you want to load your own model.\n        pretrained (bool): load pretrained weights into the model. Default to True.\n        device (str): default device to for model. Default to None.\n        num_classes (int): number of model classes. Default to 80.\n        exp_path (str): path to your own experiment file. Required if name=\"yolox_custom\"\n        ckpt_path (str): path to your own ckpt. Required if name=\"yolox_custom\" and you want to\n            load a pretrained model\n\n\n    Returns:\n        YOLOX model (nn.Module)\n    \"\"\"\n    from yolox.exp import get_exp, Exp\n\n    if device is None:\n        device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\"\n    device = torch.device(device)\n\n    assert name in _CKPT_FULL_PATH or name == \"yolox_custom\", \\\n        f\"user should use one of value in {_CKPT_FULL_PATH.keys()} or \\\"yolox_custom\\\"\"\n    if name in _CKPT_FULL_PATH:\n        exp: Exp = get_exp(exp_name=name)\n        exp.num_classes = num_classes\n        yolox_model = exp.get_model()\n        if pretrained and num_classes == 80:\n            weights_url = _CKPT_FULL_PATH[name]\n            ckpt = load_state_dict_from_url(weights_url, map_location=\"cpu\")\n            if \"model\" in ckpt:\n                ckpt = ckpt[\"model\"]\n            yolox_model.load_state_dict(ckpt)\n    else:\n        assert exp_path is not None, \"for a \\\"yolox_custom\\\" model exp_path must be provided\"\n        exp: Exp = get_exp(exp_file=exp_path)\n        yolox_model = exp.get_model()\n        if ckpt_path:\n            ckpt = torch.load(ckpt_path, map_location=\"cpu\")\n            if \"model\" in ckpt:\n                ckpt = ckpt[\"model\"]\n            yolox_model.load_state_dict(ckpt)\n\n    yolox_model.to(device)\n    return yolox_model\n\n\ndef yolox_nano(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module:\n    return create_yolox_model(\"yolox-nano\", pretrained, num_classes, device)\n\n\ndef yolox_tiny(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module:\n    return create_yolox_model(\"yolox-tiny\", pretrained, num_classes, device)\n\n\ndef yolox_s(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module:\n    return create_yolox_model(\"yolox-s\", pretrained, num_classes, device)\n\n\ndef yolox_m(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module:\n    return create_yolox_model(\"yolox-m\", pretrained, num_classes, device)\n\n\ndef yolox_l(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module:\n    return create_yolox_model(\"yolox-l\", pretrained, num_classes, device)\n\n\ndef yolox_x(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module:\n    return create_yolox_model(\"yolox-x\", pretrained, num_classes, device)\n\n\ndef yolov3(pretrained: bool = True, num_classes: int = 80, device: str = None) -> nn.Module:\n    return create_yolox_model(\"yolov3\", pretrained, num_classes, device)\n\n\ndef yolox_custom(ckpt_path: str = None, exp_path: str = None, device: str = None) -> nn.Module:\n    return create_yolox_model(\"yolox_custom\", ckpt_path=ckpt_path, exp_path=exp_path, device=device)\n"
  },
  {
    "path": "yolox/models/darknet.py",
    "content": "#!/usr/bin/env python\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nfrom torch import nn\n\nfrom .network_blocks import BaseConv, CSPLayer, DWConv, Focus, ResLayer, SPPBottleneck\n\n\nclass Darknet(nn.Module):\n    # number of blocks from dark2 to dark5.\n    depth2blocks = {21: [1, 2, 2, 1], 53: [2, 8, 8, 4]}\n\n    def __init__(\n        self,\n        depth,\n        in_channels=3,\n        stem_out_channels=32,\n        out_features=(\"dark3\", \"dark4\", \"dark5\"),\n    ):\n        \"\"\"\n        Args:\n            depth (int): depth of darknet used in model, usually use [21, 53] for this param.\n            in_channels (int): number of input channels, for example, use 3 for RGB image.\n            stem_out_channels (int): number of output channels of darknet stem.\n                It decides channels of darknet layer2 to layer5.\n            out_features (Tuple[str]): desired output layer name.\n        \"\"\"\n        super().__init__()\n        assert out_features, \"please provide output features of Darknet\"\n        self.out_features = out_features\n        self.stem = nn.Sequential(\n            BaseConv(in_channels, stem_out_channels, ksize=3, stride=1, act=\"lrelu\"),\n            *self.make_group_layer(stem_out_channels, num_blocks=1, stride=2),\n        )\n        in_channels = stem_out_channels * 2  # 64\n\n        num_blocks = Darknet.depth2blocks[depth]\n        # create darknet with `stem_out_channels` and `num_blocks` layers.\n        # to make model structure more clear, we don't use `for` statement in python.\n        self.dark2 = nn.Sequential(\n            *self.make_group_layer(in_channels, num_blocks[0], stride=2)\n        )\n        in_channels *= 2  # 128\n        self.dark3 = nn.Sequential(\n            *self.make_group_layer(in_channels, num_blocks[1], stride=2)\n        )\n        in_channels *= 2  # 256\n        self.dark4 = nn.Sequential(\n            *self.make_group_layer(in_channels, num_blocks[2], stride=2)\n        )\n        in_channels *= 2  # 512\n\n        self.dark5 = nn.Sequential(\n            *self.make_group_layer(in_channels, num_blocks[3], stride=2),\n            *self.make_spp_block([in_channels, in_channels * 2], in_channels * 2),\n        )\n\n    def make_group_layer(self, in_channels: int, num_blocks: int, stride: int = 1):\n        \"starts with conv layer then has `num_blocks` `ResLayer`\"\n        return [\n            BaseConv(in_channels, in_channels * 2, ksize=3, stride=stride, act=\"lrelu\"),\n            *[(ResLayer(in_channels * 2)) for _ in range(num_blocks)],\n        ]\n\n    def make_spp_block(self, filters_list, in_filters):\n        m = nn.Sequential(\n            *[\n                BaseConv(in_filters, filters_list[0], 1, stride=1, act=\"lrelu\"),\n                BaseConv(filters_list[0], filters_list[1], 3, stride=1, act=\"lrelu\"),\n                SPPBottleneck(\n                    in_channels=filters_list[1],\n                    out_channels=filters_list[0],\n                    activation=\"lrelu\",\n                ),\n                BaseConv(filters_list[0], filters_list[1], 3, stride=1, act=\"lrelu\"),\n                BaseConv(filters_list[1], filters_list[0], 1, stride=1, act=\"lrelu\"),\n            ]\n        )\n        return m\n\n    def forward(self, x):\n        outputs = {}\n        x = self.stem(x)\n        outputs[\"stem\"] = x\n        x = self.dark2(x)\n        outputs[\"dark2\"] = x\n        x = self.dark3(x)\n        outputs[\"dark3\"] = x\n        x = self.dark4(x)\n        outputs[\"dark4\"] = x\n        x = self.dark5(x)\n        outputs[\"dark5\"] = x\n        return {k: v for k, v in outputs.items() if k in self.out_features}\n\n\nclass CSPDarknet(nn.Module):\n    def __init__(\n        self,\n        dep_mul,\n        wid_mul,\n        out_features=(\"dark3\", \"dark4\", \"dark5\"),\n        depthwise=False,\n        act=\"silu\",\n    ):\n        super().__init__()\n        assert out_features, \"please provide output features of Darknet\"\n        self.out_features = out_features\n        Conv = DWConv if depthwise else BaseConv\n\n        base_channels = int(wid_mul * 64)  # 64\n        base_depth = max(round(dep_mul * 3), 1)  # 3\n\n        # stem\n        self.stem = Focus(3, base_channels, ksize=3, act=act)\n\n        # dark2\n        self.dark2 = nn.Sequential(\n            Conv(base_channels, base_channels * 2, 3, 2, act=act),\n            CSPLayer(\n                base_channels * 2,\n                base_channels * 2,\n                n=base_depth,\n                depthwise=depthwise,\n                act=act,\n            ),\n        )\n\n        # dark3\n        self.dark3 = nn.Sequential(\n            Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),\n            CSPLayer(\n                base_channels * 4,\n                base_channels * 4,\n                n=base_depth * 3,\n                depthwise=depthwise,\n                act=act,\n            ),\n        )\n\n        # dark4\n        self.dark4 = nn.Sequential(\n            Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),\n            CSPLayer(\n                base_channels * 8,\n                base_channels * 8,\n                n=base_depth * 3,\n                depthwise=depthwise,\n                act=act,\n            ),\n        )\n\n        # dark5\n        self.dark5 = nn.Sequential(\n            Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),\n            SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),\n            CSPLayer(\n                base_channels * 16,\n                base_channels * 16,\n                n=base_depth,\n                shortcut=False,\n                depthwise=depthwise,\n                act=act,\n            ),\n        )\n\n    def forward(self, x):\n        outputs = {}\n        x = self.stem(x)\n        outputs[\"stem\"] = x\n        x = self.dark2(x)\n        outputs[\"dark2\"] = x\n        x = self.dark3(x)\n        outputs[\"dark3\"] = x\n        x = self.dark4(x)\n        outputs[\"dark4\"] = x\n        x = self.dark5(x)\n        outputs[\"dark5\"] = x\n        return {k: v for k, v in outputs.items() if k in self.out_features}\n"
  },
  {
    "path": "yolox/models/losses.py",
    "content": "#!/usr/bin/env python\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport torch\nimport torch.nn as nn\n\n\nclass IOUloss(nn.Module):\n    def __init__(self, reduction=\"none\", loss_type=\"iou\"):\n        super(IOUloss, self).__init__()\n        self.reduction = reduction\n        self.loss_type = loss_type\n\n    def forward(self, pred, target):\n        assert pred.shape[0] == target.shape[0]\n\n        pred = pred.view(-1, 4)\n        target = target.view(-1, 4)\n        tl = torch.max(\n            (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)\n        )\n        br = torch.min(\n            (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)\n        )\n\n        area_p = torch.prod(pred[:, 2:], 1)\n        area_g = torch.prod(target[:, 2:], 1)\n\n        en = (tl < br).type(tl.type()).prod(dim=1)\n        area_i = torch.prod(br - tl, 1) * en\n        area_u = area_p + area_g - area_i\n        iou = (area_i) / (area_u + 1e-16)\n\n        if self.loss_type == \"iou\":\n            loss = 1 - iou ** 2\n        elif self.loss_type == \"giou\":\n            c_tl = torch.min(\n                (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)\n            )\n            c_br = torch.max(\n                (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)\n            )\n            area_c = torch.prod(c_br - c_tl, 1)\n            giou = iou - (area_c - area_u) / area_c.clamp(1e-16)\n            loss = 1 - giou.clamp(min=-1.0, max=1.0)\n\n        if self.reduction == \"mean\":\n            loss = loss.mean()\n        elif self.reduction == \"sum\":\n            loss = loss.sum()\n\n        return loss\n"
  },
  {
    "path": "yolox/models/network_blocks.py",
    "content": "#!/usr/bin/env python\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport torch\nimport torch.nn as nn\n\n\nclass SiLU(nn.Module):\n    \"\"\"export-friendly version of nn.SiLU()\"\"\"\n\n    @staticmethod\n    def forward(x):\n        return x * torch.sigmoid(x)\n\n\ndef get_activation(name=\"silu\", inplace=True):\n    if name == \"silu\":\n        module = nn.SiLU(inplace=inplace)\n    elif name == \"relu\":\n        module = nn.ReLU(inplace=inplace)\n    elif name == \"lrelu\":\n        module = nn.LeakyReLU(0.1, inplace=inplace)\n    else:\n        raise AttributeError(\"Unsupported act type: {}\".format(name))\n    return module\n\n\nclass BaseConv(nn.Module):\n    \"\"\"A Conv2d -> Batchnorm -> silu/leaky relu block\"\"\"\n\n    def __init__(\n        self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act=\"silu\"\n    ):\n        super().__init__()\n        # same padding\n        pad = (ksize - 1) // 2\n        self.conv = nn.Conv2d(\n            in_channels,\n            out_channels,\n            kernel_size=ksize,\n            stride=stride,\n            padding=pad,\n            groups=groups,\n            bias=bias,\n        )\n        self.bn = nn.BatchNorm2d(out_channels)\n        self.act = get_activation(act, inplace=True)\n\n    def forward(self, x):\n        return self.act(self.bn(self.conv(x)))\n\n    def fuseforward(self, x):\n        return self.act(self.conv(x))\n\n\nclass DWConv(nn.Module):\n    \"\"\"Depthwise Conv + Conv\"\"\"\n\n    def __init__(self, in_channels, out_channels, ksize, stride=1, act=\"silu\"):\n        super().__init__()\n        self.dconv = BaseConv(\n            in_channels,\n            in_channels,\n            ksize=ksize,\n            stride=stride,\n            groups=in_channels,\n            act=act,\n        )\n        self.pconv = BaseConv(\n            in_channels, out_channels, ksize=1, stride=1, groups=1, act=act\n        )\n\n    def forward(self, x):\n        x = self.dconv(x)\n        return self.pconv(x)\n\n\nclass Bottleneck(nn.Module):\n    # Standard bottleneck\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        shortcut=True,\n        expansion=0.5,\n        depthwise=False,\n        act=\"silu\",\n    ):\n        super().__init__()\n        hidden_channels = int(out_channels * expansion)\n        Conv = DWConv if depthwise else BaseConv\n        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)\n        self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act)\n        self.use_add = shortcut and in_channels == out_channels\n\n    def forward(self, x):\n        y = self.conv2(self.conv1(x))\n        if self.use_add:\n            y = y + x\n        return y\n\n\nclass ResLayer(nn.Module):\n    \"Residual layer with `in_channels` inputs.\"\n\n    def __init__(self, in_channels: int):\n        super().__init__()\n        mid_channels = in_channels // 2\n        self.layer1 = BaseConv(\n            in_channels, mid_channels, ksize=1, stride=1, act=\"lrelu\"\n        )\n        self.layer2 = BaseConv(\n            mid_channels, in_channels, ksize=3, stride=1, act=\"lrelu\"\n        )\n\n    def forward(self, x):\n        out = self.layer2(self.layer1(x))\n        return x + out\n\n\nclass SPPBottleneck(nn.Module):\n    \"\"\"Spatial pyramid pooling layer used in YOLOv3-SPP\"\"\"\n\n    def __init__(\n        self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation=\"silu\"\n    ):\n        super().__init__()\n        hidden_channels = in_channels // 2\n        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation)\n        self.m = nn.ModuleList(\n            [\n                nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2)\n                for ks in kernel_sizes\n            ]\n        )\n        conv2_channels = hidden_channels * (len(kernel_sizes) + 1)\n        self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = torch.cat([x] + [m(x) for m in self.m], dim=1)\n        x = self.conv2(x)\n        return x\n\n\nclass CSPLayer(nn.Module):\n    \"\"\"C3 in yolov5, CSP Bottleneck with 3 convolutions\"\"\"\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        n=1,\n        shortcut=True,\n        expansion=0.5,\n        depthwise=False,\n        act=\"silu\",\n    ):\n        \"\"\"\n        Args:\n            in_channels (int): input channels.\n            out_channels (int): output channels.\n            n (int): number of Bottlenecks. Default value: 1.\n        \"\"\"\n        # ch_in, ch_out, number, shortcut, groups, expansion\n        super().__init__()\n        hidden_channels = int(out_channels * expansion)  # hidden channels\n        self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)\n        self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)\n        self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act)\n        module_list = [\n            Bottleneck(\n                hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act\n            )\n            for _ in range(n)\n        ]\n        self.m = nn.Sequential(*module_list)\n\n    def forward(self, x):\n        x_1 = self.conv1(x)\n        x_2 = self.conv2(x)\n        x_1 = self.m(x_1)\n        x = torch.cat((x_1, x_2), dim=1)\n        return self.conv3(x)\n\n\nclass Focus(nn.Module):\n    \"\"\"Focus width and height information into channel space.\"\"\"\n\n    def __init__(self, in_channels, out_channels, ksize=1, stride=1, act=\"silu\"):\n        super().__init__()\n        self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act)\n\n    def forward(self, x):\n        # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)\n        patch_top_left = x[..., ::2, ::2]\n        patch_top_right = x[..., ::2, 1::2]\n        patch_bot_left = x[..., 1::2, ::2]\n        patch_bot_right = x[..., 1::2, 1::2]\n        x = torch.cat(\n            (\n                patch_top_left,\n                patch_bot_left,\n                patch_top_right,\n                patch_bot_right,\n            ),\n            dim=1,\n        )\n        return self.conv(x)\n"
  },
  {
    "path": "yolox/models/yolo_fpn.py",
    "content": "#!/usr/bin/env python\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport torch\nimport torch.nn as nn\n\nfrom .darknet import Darknet\nfrom .network_blocks import BaseConv\n\n\nclass YOLOFPN(nn.Module):\n    \"\"\"\n    YOLOFPN module. Darknet 53 is the default backbone of this model.\n    \"\"\"\n\n    def __init__(\n        self,\n        depth=53,\n        in_features=[\"dark3\", \"dark4\", \"dark5\"],\n    ):\n        super().__init__()\n\n        self.backbone = Darknet(depth)\n        self.in_features = in_features\n\n        # out 1\n        self.out1_cbl = self._make_cbl(512, 256, 1)\n        self.out1 = self._make_embedding([256, 512], 512 + 256)\n\n        # out 2\n        self.out2_cbl = self._make_cbl(256, 128, 1)\n        self.out2 = self._make_embedding([128, 256], 256 + 128)\n\n        # upsample\n        self.upsample = nn.Upsample(scale_factor=2, mode=\"nearest\")\n\n    def _make_cbl(self, _in, _out, ks):\n        return BaseConv(_in, _out, ks, stride=1, act=\"lrelu\")\n\n    def _make_embedding(self, filters_list, in_filters):\n        m = nn.Sequential(\n            *[\n                self._make_cbl(in_filters, filters_list[0], 1),\n                self._make_cbl(filters_list[0], filters_list[1], 3),\n                self._make_cbl(filters_list[1], filters_list[0], 1),\n                self._make_cbl(filters_list[0], filters_list[1], 3),\n                self._make_cbl(filters_list[1], filters_list[0], 1),\n            ]\n        )\n        return m\n\n    def load_pretrained_model(self, filename=\"./weights/darknet53.mix.pth\"):\n        with open(filename, \"rb\") as f:\n            state_dict = torch.load(f, map_location=\"cpu\")\n        print(\"loading pretrained weights...\")\n        self.backbone.load_state_dict(state_dict)\n\n    def forward(self, inputs):\n        \"\"\"\n        Args:\n            inputs (Tensor): input image.\n\n        Returns:\n            Tuple[Tensor]: FPN output features..\n        \"\"\"\n        #  backbone\n        out_features = self.backbone(inputs)\n        x2, x1, x0 = [out_features[f] for f in self.in_features]\n\n        #  yolo branch 1\n        x1_in = self.out1_cbl(x0)\n        x1_in = self.upsample(x1_in)\n        x1_in = torch.cat([x1_in, x1], 1)\n        out_dark4 = self.out1(x1_in)\n\n        #  yolo branch 2\n        x2_in = self.out2_cbl(out_dark4)\n        x2_in = self.upsample(x2_in)\n        x2_in = torch.cat([x2_in, x2], 1)\n        out_dark3 = self.out2(x2_in)\n\n        outputs = (out_dark3, out_dark4, x0)\n        return outputs\n"
  },
  {
    "path": "yolox/models/yolo_head.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport math\nfrom loguru import logger\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom yolox.utils import bboxes_iou, cxcywh2xyxy, meshgrid, visualize_assign\n\nfrom .losses import IOUloss\nfrom .network_blocks import BaseConv, DWConv\n\n\nclass YOLOXHead(nn.Module):\n    def __init__(\n        self,\n        num_classes,\n        width=1.0,\n        strides=[8, 16, 32],\n        in_channels=[256, 512, 1024],\n        act=\"silu\",\n        depthwise=False,\n    ):\n        \"\"\"\n        Args:\n            act (str): activation type of conv. Defalut value: \"silu\".\n            depthwise (bool): whether apply depthwise conv in conv branch. Defalut value: False.\n        \"\"\"\n        super().__init__()\n\n        self.num_classes = num_classes\n        self.decode_in_inference = True  # for deploy, set to False\n\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        self.cls_preds = nn.ModuleList()\n        self.reg_preds = nn.ModuleList()\n        self.obj_preds = nn.ModuleList()\n        self.stems = nn.ModuleList()\n        Conv = DWConv if depthwise else BaseConv\n\n        for i in range(len(in_channels)):\n            self.stems.append(\n                BaseConv(\n                    in_channels=int(in_channels[i] * width),\n                    out_channels=int(256 * width),\n                    ksize=1,\n                    stride=1,\n                    act=act,\n                )\n            )\n            self.cls_convs.append(\n                nn.Sequential(\n                    *[\n                        Conv(\n                            in_channels=int(256 * width),\n                            out_channels=int(256 * width),\n                            ksize=3,\n                            stride=1,\n                            act=act,\n                        ),\n                        Conv(\n                            in_channels=int(256 * width),\n                            out_channels=int(256 * width),\n                            ksize=3,\n                            stride=1,\n                            act=act,\n                        ),\n                    ]\n                )\n            )\n            self.reg_convs.append(\n                nn.Sequential(\n                    *[\n                        Conv(\n                            in_channels=int(256 * width),\n                            out_channels=int(256 * width),\n                            ksize=3,\n                            stride=1,\n                            act=act,\n                        ),\n                        Conv(\n                            in_channels=int(256 * width),\n                            out_channels=int(256 * width),\n                            ksize=3,\n                            stride=1,\n                            act=act,\n                        ),\n                    ]\n                )\n            )\n            self.cls_preds.append(\n                nn.Conv2d(\n                    in_channels=int(256 * width),\n                    out_channels=self.num_classes,\n                    kernel_size=1,\n                    stride=1,\n                    padding=0,\n                )\n            )\n            self.reg_preds.append(\n                nn.Conv2d(\n                    in_channels=int(256 * width),\n                    out_channels=4,\n                    kernel_size=1,\n                    stride=1,\n                    padding=0,\n                )\n            )\n            self.obj_preds.append(\n                nn.Conv2d(\n                    in_channels=int(256 * width),\n                    out_channels=1,\n                    kernel_size=1,\n                    stride=1,\n                    padding=0,\n                )\n            )\n\n        self.use_l1 = False\n        self.l1_loss = nn.L1Loss(reduction=\"none\")\n        self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction=\"none\")\n        self.iou_loss = IOUloss(reduction=\"none\")\n        self.strides = strides\n        self.grids = [torch.zeros(1)] * len(in_channels)\n\n    def initialize_biases(self, prior_prob):\n        for conv in self.cls_preds:\n            b = conv.bias.view(1, -1)\n            b.data.fill_(-math.log((1 - prior_prob) / prior_prob))\n            conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)\n\n        for conv in self.obj_preds:\n            b = conv.bias.view(1, -1)\n            b.data.fill_(-math.log((1 - prior_prob) / prior_prob))\n            conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)\n\n    def forward(self, xin, labels=None, imgs=None):\n        outputs = []\n        origin_preds = []\n        x_shifts = []\n        y_shifts = []\n        expanded_strides = []\n\n        for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate(\n            zip(self.cls_convs, self.reg_convs, self.strides, xin)\n        ):\n            x = self.stems[k](x)\n            cls_x = x\n            reg_x = x\n\n            cls_feat = cls_conv(cls_x)\n            cls_output = self.cls_preds[k](cls_feat)\n\n            reg_feat = reg_conv(reg_x)\n            reg_output = self.reg_preds[k](reg_feat)\n            obj_output = self.obj_preds[k](reg_feat)\n\n            if self.training:\n                output = torch.cat([reg_output, obj_output, cls_output], 1)\n                output, grid = self.get_output_and_grid(\n                    output, k, stride_this_level, xin[0].type()\n                )\n                x_shifts.append(grid[:, :, 0])\n                y_shifts.append(grid[:, :, 1])\n                expanded_strides.append(\n                    torch.zeros(1, grid.shape[1])\n                    .fill_(stride_this_level)\n                    .type_as(xin[0])\n                )\n                if self.use_l1:\n                    batch_size = reg_output.shape[0]\n                    hsize, wsize = reg_output.shape[-2:]\n                    reg_output = reg_output.view(\n                        batch_size, 1, 4, hsize, wsize\n                    )\n                    reg_output = reg_output.permute(0, 1, 3, 4, 2).reshape(\n                        batch_size, -1, 4\n                    )\n                    origin_preds.append(reg_output.clone())\n\n            else:\n                output = torch.cat(\n                    [reg_output, obj_output.sigmoid(), cls_output.sigmoid()], 1\n                )\n\n            outputs.append(output)\n\n        if self.training:\n            return self.get_losses(\n                imgs,\n                x_shifts,\n                y_shifts,\n                expanded_strides,\n                labels,\n                torch.cat(outputs, 1),\n                origin_preds,\n                dtype=xin[0].dtype,\n            )\n        else:\n            self.hw = [x.shape[-2:] for x in outputs]\n            # [batch, n_anchors_all, 85]\n            outputs = torch.cat(\n                [x.flatten(start_dim=2) for x in outputs], dim=2\n            ).permute(0, 2, 1)\n            if self.decode_in_inference:\n                return self.decode_outputs(outputs, dtype=xin[0].type())\n            else:\n                return outputs\n\n    def get_output_and_grid(self, output, k, stride, dtype):\n        grid = self.grids[k]\n\n        batch_size = output.shape[0]\n        n_ch = 5 + self.num_classes\n        hsize, wsize = output.shape[-2:]\n        if grid.shape[2:4] != output.shape[2:4]:\n            yv, xv = meshgrid([torch.arange(hsize), torch.arange(wsize)])\n            grid = torch.stack((xv, yv), 2).view(1, 1, hsize, wsize, 2).type(dtype)\n            self.grids[k] = grid\n\n        output = output.view(batch_size, 1, n_ch, hsize, wsize)\n        output = output.permute(0, 1, 3, 4, 2).reshape(\n            batch_size, hsize * wsize, -1\n        )\n        grid = grid.view(1, -1, 2)\n        output[..., :2] = (output[..., :2] + grid) * stride\n        output[..., 2:4] = torch.exp(output[..., 2:4]) * stride\n        return output, grid\n\n    def decode_outputs(self, outputs, dtype):\n        grids = []\n        strides = []\n        for (hsize, wsize), stride in zip(self.hw, self.strides):\n            yv, xv = meshgrid([torch.arange(hsize), torch.arange(wsize)])\n            grid = torch.stack((xv, yv), 2).view(1, -1, 2)\n            grids.append(grid)\n            shape = grid.shape[:2]\n            strides.append(torch.full((*shape, 1), stride))\n\n        grids = torch.cat(grids, dim=1).type(dtype)\n        strides = torch.cat(strides, dim=1).type(dtype)\n\n        outputs = torch.cat([\n            (outputs[..., 0:2] + grids) * strides,\n            torch.exp(outputs[..., 2:4]) * strides,\n            outputs[..., 4:]\n        ], dim=-1)\n        return outputs\n\n    def get_losses(\n        self,\n        imgs,\n        x_shifts,\n        y_shifts,\n        expanded_strides,\n        labels,\n        outputs,\n        origin_preds,\n        dtype,\n    ):\n        bbox_preds = outputs[:, :, :4]  # [batch, n_anchors_all, 4]\n        obj_preds = outputs[:, :, 4:5]  # [batch, n_anchors_all, 1]\n        cls_preds = outputs[:, :, 5:]  # [batch, n_anchors_all, n_cls]\n\n        # calculate targets\n        nlabel = (labels.sum(dim=2) > 0).sum(dim=1)  # number of objects\n\n        total_num_anchors = outputs.shape[1]\n        x_shifts = torch.cat(x_shifts, 1)  # [1, n_anchors_all]\n        y_shifts = torch.cat(y_shifts, 1)  # [1, n_anchors_all]\n        expanded_strides = torch.cat(expanded_strides, 1)\n        if self.use_l1:\n            origin_preds = torch.cat(origin_preds, 1)\n\n        cls_targets = []\n        reg_targets = []\n        l1_targets = []\n        obj_targets = []\n        fg_masks = []\n\n        num_fg = 0.0\n        num_gts = 0.0\n\n        for batch_idx in range(outputs.shape[0]):\n            num_gt = int(nlabel[batch_idx])\n            num_gts += num_gt\n            if num_gt == 0:\n                cls_target = outputs.new_zeros((0, self.num_classes))\n                reg_target = outputs.new_zeros((0, 4))\n                l1_target = outputs.new_zeros((0, 4))\n                obj_target = outputs.new_zeros((total_num_anchors, 1))\n                fg_mask = outputs.new_zeros(total_num_anchors).bool()\n            else:\n                gt_bboxes_per_image = labels[batch_idx, :num_gt, 1:5]\n                gt_classes = labels[batch_idx, :num_gt, 0]\n                bboxes_preds_per_image = bbox_preds[batch_idx]\n\n                try:\n                    (\n                        gt_matched_classes,\n                        fg_mask,\n                        pred_ious_this_matching,\n                        matched_gt_inds,\n                        num_fg_img,\n                    ) = self.get_assignments(  # noqa\n                        batch_idx,\n                        num_gt,\n                        gt_bboxes_per_image,\n                        gt_classes,\n                        bboxes_preds_per_image,\n                        expanded_strides,\n                        x_shifts,\n                        y_shifts,\n                        cls_preds,\n                        obj_preds,\n                    )\n                except RuntimeError as e:\n                    # TODO: the string might change, consider a better way\n                    if \"CUDA out of memory. \" not in str(e):\n                        raise  # RuntimeError might not caused by CUDA OOM\n\n                    logger.error(\n                        \"OOM RuntimeError is raised due to the huge memory cost during label assignment. \\\n                           CPU mode is applied in this batch. If you want to avoid this issue, \\\n                           try to reduce the batch size or image size.\"\n                    )\n                    torch.cuda.empty_cache()\n                    (\n                        gt_matched_classes,\n                        fg_mask,\n                        pred_ious_this_matching,\n                        matched_gt_inds,\n                        num_fg_img,\n                    ) = self.get_assignments(  # noqa\n                        batch_idx,\n                        num_gt,\n                        gt_bboxes_per_image,\n                        gt_classes,\n                        bboxes_preds_per_image,\n                        expanded_strides,\n                        x_shifts,\n                        y_shifts,\n                        cls_preds,\n                        obj_preds,\n                        \"cpu\",\n                    )\n\n                torch.cuda.empty_cache()\n                num_fg += num_fg_img\n\n                cls_target = F.one_hot(\n                    gt_matched_classes.to(torch.int64), self.num_classes\n                ) * pred_ious_this_matching.unsqueeze(-1)\n                obj_target = fg_mask.unsqueeze(-1)\n                reg_target = gt_bboxes_per_image[matched_gt_inds]\n                if self.use_l1:\n                    l1_target = self.get_l1_target(\n                        outputs.new_zeros((num_fg_img, 4)),\n                        gt_bboxes_per_image[matched_gt_inds],\n                        expanded_strides[0][fg_mask],\n                        x_shifts=x_shifts[0][fg_mask],\n                        y_shifts=y_shifts[0][fg_mask],\n                    )\n\n            cls_targets.append(cls_target)\n            reg_targets.append(reg_target)\n            obj_targets.append(obj_target.to(dtype))\n            fg_masks.append(fg_mask)\n            if self.use_l1:\n                l1_targets.append(l1_target)\n\n        cls_targets = torch.cat(cls_targets, 0)\n        reg_targets = torch.cat(reg_targets, 0)\n        obj_targets = torch.cat(obj_targets, 0)\n        fg_masks = torch.cat(fg_masks, 0)\n        if self.use_l1:\n            l1_targets = torch.cat(l1_targets, 0)\n\n        num_fg = max(num_fg, 1)\n        loss_iou = (\n            self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets)\n        ).sum() / num_fg\n        loss_obj = (\n            self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets)\n        ).sum() / num_fg\n        loss_cls = (\n            self.bcewithlog_loss(\n                cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets\n            )\n        ).sum() / num_fg\n        if self.use_l1:\n            loss_l1 = (\n                self.l1_loss(origin_preds.view(-1, 4)[fg_masks], l1_targets)\n            ).sum() / num_fg\n        else:\n            loss_l1 = 0.0\n\n        reg_weight = 5.0\n        loss = reg_weight * loss_iou + loss_obj + loss_cls + loss_l1\n\n        return (\n            loss,\n            reg_weight * loss_iou,\n            loss_obj,\n            loss_cls,\n            loss_l1,\n            num_fg / max(num_gts, 1),\n        )\n\n    def get_l1_target(self, l1_target, gt, stride, x_shifts, y_shifts, eps=1e-8):\n        l1_target[:, 0] = gt[:, 0] / stride - x_shifts\n        l1_target[:, 1] = gt[:, 1] / stride - y_shifts\n        l1_target[:, 2] = torch.log(gt[:, 2] / stride + eps)\n        l1_target[:, 3] = torch.log(gt[:, 3] / stride + eps)\n        return l1_target\n\n    @torch.no_grad()\n    def get_assignments(\n        self,\n        batch_idx,\n        num_gt,\n        gt_bboxes_per_image,\n        gt_classes,\n        bboxes_preds_per_image,\n        expanded_strides,\n        x_shifts,\n        y_shifts,\n        cls_preds,\n        obj_preds,\n        mode=\"gpu\",\n    ):\n\n        if mode == \"cpu\":\n            print(\"-----------Using CPU for the Current Batch-------------\")\n            gt_bboxes_per_image = gt_bboxes_per_image.cpu().float()\n            bboxes_preds_per_image = bboxes_preds_per_image.cpu().float()\n            gt_classes = gt_classes.cpu().float()\n            expanded_strides = expanded_strides.cpu().float()\n            x_shifts = x_shifts.cpu()\n            y_shifts = y_shifts.cpu()\n\n        fg_mask, geometry_relation = self.get_geometry_constraint(\n            gt_bboxes_per_image,\n            expanded_strides,\n            x_shifts,\n            y_shifts,\n        )\n\n        bboxes_preds_per_image = bboxes_preds_per_image[fg_mask]\n        cls_preds_ = cls_preds[batch_idx][fg_mask]\n        obj_preds_ = obj_preds[batch_idx][fg_mask]\n        num_in_boxes_anchor = bboxes_preds_per_image.shape[0]\n\n        if mode == \"cpu\":\n            gt_bboxes_per_image = gt_bboxes_per_image.cpu()\n            bboxes_preds_per_image = bboxes_preds_per_image.cpu()\n\n        pair_wise_ious = bboxes_iou(gt_bboxes_per_image, bboxes_preds_per_image, False)\n\n        gt_cls_per_image = (\n            F.one_hot(gt_classes.to(torch.int64), self.num_classes)\n            .float()\n        )\n        pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)\n\n        if mode == \"cpu\":\n            cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu()\n\n        with torch.cuda.amp.autocast(enabled=False):\n            cls_preds_ = (\n                cls_preds_.float().sigmoid_() * obj_preds_.float().sigmoid_()\n            ).sqrt()\n            pair_wise_cls_loss = F.binary_cross_entropy(\n                cls_preds_.unsqueeze(0).repeat(num_gt, 1, 1),\n                gt_cls_per_image.unsqueeze(1).repeat(1, num_in_boxes_anchor, 1),\n                reduction=\"none\"\n            ).sum(-1)\n        del cls_preds_\n\n        cost = (\n            pair_wise_cls_loss\n            + 3.0 * pair_wise_ious_loss\n            + float(1e6) * (~geometry_relation)\n        )\n\n        (\n            num_fg,\n            gt_matched_classes,\n            pred_ious_this_matching,\n            matched_gt_inds,\n        ) = self.simota_matching(cost, pair_wise_ious, gt_classes, num_gt, fg_mask)\n        del pair_wise_cls_loss, cost, pair_wise_ious, pair_wise_ious_loss\n\n        if mode == \"cpu\":\n            gt_matched_classes = gt_matched_classes.cuda()\n            fg_mask = fg_mask.cuda()\n            pred_ious_this_matching = pred_ious_this_matching.cuda()\n            matched_gt_inds = matched_gt_inds.cuda()\n\n        return (\n            gt_matched_classes,\n            fg_mask,\n            pred_ious_this_matching,\n            matched_gt_inds,\n            num_fg,\n        )\n\n    def get_geometry_constraint(\n        self, gt_bboxes_per_image, expanded_strides, x_shifts, y_shifts,\n    ):\n        \"\"\"\n        Calculate whether the center of an object is located in a fixed range of\n        an anchor. This is used to avert inappropriate matching. It can also reduce\n        the number of candidate anchors so that the GPU memory is saved.\n        \"\"\"\n        expanded_strides_per_image = expanded_strides[0]\n        x_centers_per_image = ((x_shifts[0] + 0.5) * expanded_strides_per_image).unsqueeze(0)\n        y_centers_per_image = ((y_shifts[0] + 0.5) * expanded_strides_per_image).unsqueeze(0)\n\n        # in fixed center\n        center_radius = 1.5\n        center_dist = expanded_strides_per_image.unsqueeze(0) * center_radius\n        gt_bboxes_per_image_l = (gt_bboxes_per_image[:, 0:1]) - center_dist\n        gt_bboxes_per_image_r = (gt_bboxes_per_image[:, 0:1]) + center_dist\n        gt_bboxes_per_image_t = (gt_bboxes_per_image[:, 1:2]) - center_dist\n        gt_bboxes_per_image_b = (gt_bboxes_per_image[:, 1:2]) + center_dist\n\n        c_l = x_centers_per_image - gt_bboxes_per_image_l\n        c_r = gt_bboxes_per_image_r - x_centers_per_image\n        c_t = y_centers_per_image - gt_bboxes_per_image_t\n        c_b = gt_bboxes_per_image_b - y_centers_per_image\n        center_deltas = torch.stack([c_l, c_t, c_r, c_b], 2)\n        is_in_centers = center_deltas.min(dim=-1).values > 0.0\n        anchor_filter = is_in_centers.sum(dim=0) > 0\n        geometry_relation = is_in_centers[:, anchor_filter]\n\n        return anchor_filter, geometry_relation\n\n    def simota_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):\n        matching_matrix = torch.zeros_like(cost, dtype=torch.uint8)\n\n        n_candidate_k = min(10, pair_wise_ious.size(1))\n        topk_ious, _ = torch.topk(pair_wise_ious, n_candidate_k, dim=1)\n        dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1)\n        for gt_idx in range(num_gt):\n            _, pos_idx = torch.topk(\n                cost[gt_idx], k=dynamic_ks[gt_idx], largest=False\n            )\n            matching_matrix[gt_idx][pos_idx] = 1\n\n        del topk_ious, dynamic_ks, pos_idx\n\n        anchor_matching_gt = matching_matrix.sum(0)\n        # deal with the case that one anchor matches multiple ground-truths\n        if anchor_matching_gt.max() > 1:\n            multiple_match_mask = anchor_matching_gt > 1\n            _, cost_argmin = torch.min(cost[:, multiple_match_mask], dim=0)\n            matching_matrix[:, multiple_match_mask] *= 0\n            matching_matrix[cost_argmin, multiple_match_mask] = 1\n        fg_mask_inboxes = anchor_matching_gt > 0\n        num_fg = fg_mask_inboxes.sum().item()\n\n        fg_mask[fg_mask.clone()] = fg_mask_inboxes\n\n        matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)\n        gt_matched_classes = gt_classes[matched_gt_inds]\n\n        pred_ious_this_matching = (matching_matrix * pair_wise_ious).sum(0)[\n            fg_mask_inboxes\n        ]\n        return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds\n\n    def visualize_assign_result(self, xin, labels=None, imgs=None, save_prefix=\"assign_vis_\"):\n        # original forward logic\n        outputs, x_shifts, y_shifts, expanded_strides = [], [], [], []\n        # TODO: use forward logic here.\n\n        for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate(\n            zip(self.cls_convs, self.reg_convs, self.strides, xin)\n        ):\n            x = self.stems[k](x)\n            cls_x = x\n            reg_x = x\n\n            cls_feat = cls_conv(cls_x)\n            cls_output = self.cls_preds[k](cls_feat)\n            reg_feat = reg_conv(reg_x)\n            reg_output = self.reg_preds[k](reg_feat)\n            obj_output = self.obj_preds[k](reg_feat)\n\n            output = torch.cat([reg_output, obj_output, cls_output], 1)\n            output, grid = self.get_output_and_grid(output, k, stride_this_level, xin[0].type())\n            x_shifts.append(grid[:, :, 0])\n            y_shifts.append(grid[:, :, 1])\n            expanded_strides.append(\n                torch.full((1, grid.shape[1]), stride_this_level).type_as(xin[0])\n            )\n            outputs.append(output)\n\n        outputs = torch.cat(outputs, 1)\n        bbox_preds = outputs[:, :, :4]  # [batch, n_anchors_all, 4]\n        obj_preds = outputs[:, :, 4:5]  # [batch, n_anchors_all, 1]\n        cls_preds = outputs[:, :, 5:]  # [batch, n_anchors_all, n_cls]\n\n        # calculate targets\n        total_num_anchors = outputs.shape[1]\n        x_shifts = torch.cat(x_shifts, 1)  # [1, n_anchors_all]\n        y_shifts = torch.cat(y_shifts, 1)  # [1, n_anchors_all]\n        expanded_strides = torch.cat(expanded_strides, 1)\n\n        nlabel = (labels.sum(dim=2) > 0).sum(dim=1)  # number of objects\n        for batch_idx, (img, num_gt, label) in enumerate(zip(imgs, nlabel, labels)):\n            img = imgs[batch_idx].permute(1, 2, 0).to(torch.uint8)\n            num_gt = int(num_gt)\n            if num_gt == 0:\n                fg_mask = outputs.new_zeros(total_num_anchors).bool()\n            else:\n                gt_bboxes_per_image = label[:num_gt, 1:5]\n                gt_classes = label[:num_gt, 0]\n                bboxes_preds_per_image = bbox_preds[batch_idx]\n                _, fg_mask, _, matched_gt_inds, _ = self.get_assignments(  # noqa\n                    batch_idx, num_gt, gt_bboxes_per_image, gt_classes,\n                    bboxes_preds_per_image, expanded_strides, x_shifts,\n                    y_shifts, cls_preds, obj_preds,\n                )\n\n            img = img.cpu().numpy().copy()  # copy is crucial here\n            coords = torch.stack([\n                ((x_shifts + 0.5) * expanded_strides).flatten()[fg_mask],\n                ((y_shifts + 0.5) * expanded_strides).flatten()[fg_mask],\n            ], 1)\n\n            xyxy_boxes = cxcywh2xyxy(gt_bboxes_per_image)\n            save_name = save_prefix + str(batch_idx) + \".png\"\n            img = visualize_assign(img, xyxy_boxes, coords, matched_gt_inds, save_name)\n            logger.info(f\"save img to {save_name}\")\n"
  },
  {
    "path": "yolox/models/yolo_pafpn.py",
    "content": "#!/usr/bin/env python\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport torch\nimport torch.nn as nn\n\nfrom .darknet import CSPDarknet\nfrom .network_blocks import BaseConv, CSPLayer, DWConv\n\n\nclass YOLOPAFPN(nn.Module):\n    \"\"\"\n    YOLOv3 model. Darknet 53 is the default backbone of this model.\n    \"\"\"\n\n    def __init__(\n        self,\n        depth=1.0,\n        width=1.0,\n        in_features=(\"dark3\", \"dark4\", \"dark5\"),\n        in_channels=[256, 512, 1024],\n        depthwise=False,\n        act=\"silu\",\n    ):\n        super().__init__()\n        self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)\n        self.in_features = in_features\n        self.in_channels = in_channels\n        Conv = DWConv if depthwise else BaseConv\n\n        self.upsample = nn.Upsample(scale_factor=2, mode=\"nearest\")\n        self.lateral_conv0 = BaseConv(\n            int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act\n        )\n        self.C3_p4 = CSPLayer(\n            int(2 * in_channels[1] * width),\n            int(in_channels[1] * width),\n            round(3 * depth),\n            False,\n            depthwise=depthwise,\n            act=act,\n        )  # cat\n\n        self.reduce_conv1 = BaseConv(\n            int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act\n        )\n        self.C3_p3 = CSPLayer(\n            int(2 * in_channels[0] * width),\n            int(in_channels[0] * width),\n            round(3 * depth),\n            False,\n            depthwise=depthwise,\n            act=act,\n        )\n\n        # bottom-up conv\n        self.bu_conv2 = Conv(\n            int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act\n        )\n        self.C3_n3 = CSPLayer(\n            int(2 * in_channels[0] * width),\n            int(in_channels[1] * width),\n            round(3 * depth),\n            False,\n            depthwise=depthwise,\n            act=act,\n        )\n\n        # bottom-up conv\n        self.bu_conv1 = Conv(\n            int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act\n        )\n        self.C3_n4 = CSPLayer(\n            int(2 * in_channels[1] * width),\n            int(in_channels[2] * width),\n            round(3 * depth),\n            False,\n            depthwise=depthwise,\n            act=act,\n        )\n\n    def forward(self, input):\n        \"\"\"\n        Args:\n            inputs: input images.\n\n        Returns:\n            Tuple[Tensor]: FPN feature.\n        \"\"\"\n\n        #  backbone\n        out_features = self.backbone(input)\n        features = [out_features[f] for f in self.in_features]\n        [x2, x1, x0] = features\n\n        fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32\n        f_out0 = self.upsample(fpn_out0)  # 512/16\n        f_out0 = torch.cat([f_out0, x1], 1)  # 512->1024/16\n        f_out0 = self.C3_p4(f_out0)  # 1024->512/16\n\n        fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16\n        f_out1 = self.upsample(fpn_out1)  # 256/8\n        f_out1 = torch.cat([f_out1, x2], 1)  # 256->512/8\n        pan_out2 = self.C3_p3(f_out1)  # 512->256/8\n\n        p_out1 = self.bu_conv2(pan_out2)  # 256->256/16\n        p_out1 = torch.cat([p_out1, fpn_out1], 1)  # 256->512/16\n        pan_out1 = self.C3_n3(p_out1)  # 512->512/16\n\n        p_out0 = self.bu_conv1(pan_out1)  # 512->512/32\n        p_out0 = torch.cat([p_out0, fpn_out0], 1)  # 512->1024/32\n        pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32\n\n        outputs = (pan_out2, pan_out1, pan_out0)\n        return outputs\n"
  },
  {
    "path": "yolox/models/yolox.py",
    "content": "#!/usr/bin/env python\n# -*- encoding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport torch.nn as nn\n\nfrom .yolo_head import YOLOXHead\nfrom .yolo_pafpn import YOLOPAFPN\n\n\nclass YOLOX(nn.Module):\n    \"\"\"\n    YOLOX model module. The module list is defined by create_yolov3_modules function.\n    The network returns loss values from three YOLO layers during training\n    and detection results during test.\n    \"\"\"\n\n    def __init__(self, backbone=None, head=None):\n        super().__init__()\n        if backbone is None:\n            backbone = YOLOPAFPN()\n        if head is None:\n            head = YOLOXHead(80)\n\n        self.backbone = backbone\n        self.head = head\n\n    def forward(self, x, targets=None):\n        # fpn output content features of [dark3, dark4, dark5]\n        fpn_outs = self.backbone(x)\n\n        if self.training:\n            assert targets is not None\n            loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head(\n                fpn_outs, targets, x\n            )\n            outputs = {\n                \"total_loss\": loss,\n                \"iou_loss\": iou_loss,\n                \"l1_loss\": l1_loss,\n                \"conf_loss\": conf_loss,\n                \"cls_loss\": cls_loss,\n                \"num_fg\": num_fg,\n            }\n        else:\n            outputs = self.head(fpn_outs)\n\n        return outputs\n\n    def visualize(self, x, targets, save_prefix=\"assign_vis_\"):\n        fpn_outs = self.backbone(x)\n        self.head.visualize_assign_result(fpn_outs, targets, x, save_prefix)\n"
  },
  {
    "path": "yolox/tools/__init__.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n\n# This file is used for package installation. Script of train/eval/export will be available.\n\nimport sys\nfrom importlib import abc, util\nfrom pathlib import Path\n\n_TOOLS_PATH = Path(__file__).resolve().parent.parent.parent / \"tools\"\n\nif _TOOLS_PATH.is_dir():\n    # This is true only for in-place installation (pip install -e, setup.py develop),\n    # where setup(package_dir=) does not work: https://github.com/pypa/setuptools/issues/230\n\n    class _PathFinder(abc.MetaPathFinder):\n\n        def find_spec(self, name, path, target=None):\n            if not name.startswith(\"yolox.tools.\"):\n                return\n            project_name = name.split(\".\")[-1] + \".py\"\n            target_file = _TOOLS_PATH / project_name\n            if not target_file.is_file():\n                return\n            return util.spec_from_file_location(name, target_file)\n\n    sys.meta_path.append(_PathFinder())\n"
  },
  {
    "path": "yolox/utils/__init__.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n\nfrom .allreduce_norm import *\nfrom .boxes import *\nfrom .checkpoint import load_ckpt, save_checkpoint\nfrom .compat import meshgrid\nfrom .demo_utils import *\nfrom .dist import *\nfrom .ema import *\nfrom .logger import WandbLogger, setup_logger\nfrom .lr_scheduler import LRScheduler\nfrom .metric import *\nfrom .mlflow_logger import MlflowLogger\nfrom .model_utils import *\nfrom .setup_env import *\nfrom .visualize import *\n"
  },
  {
    "path": "yolox/utils/allreduce_norm.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport pickle\nfrom collections import OrderedDict\n\nimport torch\nfrom torch import distributed as dist\nfrom torch import nn\n\nfrom .dist import _get_global_gloo_group, get_world_size\n\nASYNC_NORM = (\n    nn.BatchNorm1d,\n    nn.BatchNorm2d,\n    nn.BatchNorm3d,\n    nn.InstanceNorm1d,\n    nn.InstanceNorm2d,\n    nn.InstanceNorm3d,\n)\n\n__all__ = [\n    \"get_async_norm_states\",\n    \"pyobj2tensor\",\n    \"tensor2pyobj\",\n    \"all_reduce\",\n    \"all_reduce_norm\",\n]\n\n\ndef get_async_norm_states(module):\n    async_norm_states = OrderedDict()\n    for name, child in module.named_modules():\n        if isinstance(child, ASYNC_NORM):\n            for k, v in child.state_dict().items():\n                async_norm_states[\".\".join([name, k])] = v\n    return async_norm_states\n\n\ndef pyobj2tensor(pyobj, device=\"cuda\"):\n    \"\"\"serialize picklable python object to tensor\"\"\"\n    storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj))\n    return torch.ByteTensor(storage).to(device=device)\n\n\ndef tensor2pyobj(tensor):\n    \"\"\"deserialize tensor to picklable python object\"\"\"\n    return pickle.loads(tensor.cpu().numpy().tobytes())\n\n\ndef _get_reduce_op(op_name):\n    return {\n        \"sum\": dist.ReduceOp.SUM,\n        \"mean\": dist.ReduceOp.SUM,\n    }[op_name.lower()]\n\n\ndef all_reduce(py_dict, op=\"sum\", group=None):\n    \"\"\"\n    Apply all reduce function for python dict object.\n    NOTE: make sure that every py_dict has the same keys and values are in the same shape.\n\n    Args:\n        py_dict (dict): dict to apply all reduce op.\n        op (str): operator, could be \"sum\" or \"mean\".\n    \"\"\"\n    world_size = get_world_size()\n    if world_size == 1:\n        return py_dict\n    if group is None:\n        group = _get_global_gloo_group()\n    if dist.get_world_size(group) == 1:\n        return py_dict\n\n    # all reduce logic across different devices.\n    py_key = list(py_dict.keys())\n    py_key_tensor = pyobj2tensor(py_key)\n    dist.broadcast(py_key_tensor, src=0)\n    py_key = tensor2pyobj(py_key_tensor)\n\n    tensor_shapes = [py_dict[k].shape for k in py_key]\n    tensor_numels = [py_dict[k].numel() for k in py_key]\n\n    flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key])\n    dist.all_reduce(flatten_tensor, op=_get_reduce_op(op))\n    if op == \"mean\":\n        flatten_tensor /= world_size\n\n    split_tensors = [\n        x.reshape(shape)\n        for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)\n    ]\n    return OrderedDict({k: v for k, v in zip(py_key, split_tensors)})\n\n\ndef all_reduce_norm(module):\n    \"\"\"\n    All reduce norm statistics in different devices.\n    \"\"\"\n    states = get_async_norm_states(module)\n    states = all_reduce(states, op=\"mean\")\n    module.load_state_dict(states, strict=False)\n"
  },
  {
    "path": "yolox/utils/boxes.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport numpy as np\n\nimport torch\nimport torchvision\n\n__all__ = [\n    \"filter_box\",\n    \"postprocess\",\n    \"bboxes_iou\",\n    \"matrix_iou\",\n    \"adjust_box_anns\",\n    \"xyxy2xywh\",\n    \"xyxy2cxcywh\",\n    \"cxcywh2xyxy\",\n]\n\n\ndef filter_box(output, scale_range):\n    \"\"\"\n    output: (N, 5+class) shape\n    \"\"\"\n    min_scale, max_scale = scale_range\n    w = output[:, 2] - output[:, 0]\n    h = output[:, 3] - output[:, 1]\n    keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale)\n    return output[keep]\n\n\ndef postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agnostic=False):\n    box_corner = prediction.new(prediction.shape)\n    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2\n    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2\n    box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2\n    box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2\n    prediction[:, :, :4] = box_corner[:, :, :4]\n\n    output = [None for _ in range(len(prediction))]\n    for i, image_pred in enumerate(prediction):\n\n        # If none are remaining => process next image\n        if not image_pred.size(0):\n            continue\n        # Get score and class with highest confidence\n        class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)\n\n        conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()\n        # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)\n        detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)\n        detections = detections[conf_mask]\n        if not detections.size(0):\n            continue\n\n        if class_agnostic:\n            nms_out_index = torchvision.ops.nms(\n                detections[:, :4],\n                detections[:, 4] * detections[:, 5],\n                nms_thre,\n            )\n        else:\n            nms_out_index = torchvision.ops.batched_nms(\n                detections[:, :4],\n                detections[:, 4] * detections[:, 5],\n                detections[:, 6],\n                nms_thre,\n            )\n\n        detections = detections[nms_out_index]\n        if output[i] is None:\n            output[i] = detections\n        else:\n            output[i] = torch.cat((output[i], detections))\n\n    return output\n\n\ndef bboxes_iou(bboxes_a, bboxes_b, xyxy=True):\n    if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:\n        raise IndexError\n\n    if xyxy:\n        tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])\n        br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])\n        area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)\n        area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)\n    else:\n        tl = torch.max(\n            (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),\n            (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),\n        )\n        br = torch.min(\n            (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),\n            (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),\n        )\n\n        area_a = torch.prod(bboxes_a[:, 2:], 1)\n        area_b = torch.prod(bboxes_b[:, 2:], 1)\n    en = (tl < br).type(tl.type()).prod(dim=2)\n    area_i = torch.prod(br - tl, 2) * en  # * ((tl < br).all())\n    return area_i / (area_a[:, None] + area_b - area_i)\n\n\ndef matrix_iou(a, b):\n    \"\"\"\n    return iou of a and b, numpy version for data augenmentation\n    \"\"\"\n    lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])\n    rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])\n\n    area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)\n    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)\n    area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)\n    return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)\n\n\ndef adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max):\n    bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max)\n    bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max)\n    return bbox\n\n\ndef xyxy2xywh(bboxes):\n    bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]\n    bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]\n    return bboxes\n\n\ndef xyxy2cxcywh(bboxes):\n    bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]\n    bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]\n    bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5\n    bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5\n    return bboxes\n\n\ndef cxcywh2xyxy(bboxes):\n    bboxes[:, 0] = bboxes[:, 0] - bboxes[:, 2] * 0.5\n    bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5\n    bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]\n    bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]\n    return bboxes\n"
  },
  {
    "path": "yolox/utils/checkpoint.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\nimport os\nimport shutil\nfrom loguru import logger\n\nimport torch\n\n\ndef load_ckpt(model, ckpt):\n    model_state_dict = model.state_dict()\n    load_dict = {}\n    for key_model, v in model_state_dict.items():\n        if key_model not in ckpt:\n            logger.warning(\n                \"{} is not in the ckpt. Please double check and see if this is desired.\".format(\n                    key_model\n                )\n            )\n            continue\n        v_ckpt = ckpt[key_model]\n        if v.shape != v_ckpt.shape:\n            logger.warning(\n                \"Shape of {} in checkpoint is {}, while shape of {} in model is {}.\".format(\n                    key_model, v_ckpt.shape, key_model, v.shape\n                )\n            )\n            continue\n        load_dict[key_model] = v_ckpt\n\n    model.load_state_dict(load_dict, strict=False)\n    return model\n\n\ndef save_checkpoint(state, is_best, save_dir, model_name=\"\"):\n    if not os.path.exists(save_dir):\n        os.makedirs(save_dir)\n    filename = os.path.join(save_dir, model_name + \"_ckpt.pth\")\n    torch.save(state, filename)\n    if is_best:\n        best_filename = os.path.join(save_dir, \"best_ckpt.pth\")\n        shutil.copyfile(filename, best_filename)\n"
  },
  {
    "path": "yolox/utils/compat.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n\nimport torch\n\n_TORCH_VER = [int(x) for x in torch.__version__.split(\".\")[:2]]\n\n__all__ = [\"meshgrid\"]\n\n\ndef meshgrid(*tensors):\n    if _TORCH_VER >= [1, 10]:\n        return torch.meshgrid(*tensors, indexing=\"ij\")\n    else:\n        return torch.meshgrid(*tensors)\n"
  },
  {
    "path": "yolox/utils/demo_utils.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport os\nimport random\n\nimport cv2\nimport numpy as np\n\n__all__ = [\n    \"mkdir\", \"nms\", \"multiclass_nms\", \"demo_postprocess\", \"random_color\", \"visualize_assign\"\n]\n\n\ndef random_color():\n    return random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)\n\n\ndef visualize_assign(img, boxes, coords, match_results, save_name=None) -> np.ndarray:\n    \"\"\"visualize label assign result.\n\n    Args:\n        img: img to visualize\n        boxes: gt boxes in xyxy format\n        coords: coords of matched anchors\n        match_results: match results of each gt box and coord.\n        save_name: name of save image, if None, image will not be saved. Default: None.\n    \"\"\"\n    for box_id, box in enumerate(boxes):\n        x1, y1, x2, y2 = box\n        color = random_color()\n        assign_coords = coords[match_results == box_id]\n        if assign_coords.numel() == 0:\n            # unmatched boxes are red\n            color = (0, 0, 255)\n            cv2.putText(\n                img, \"unmatched\", (int(x1), int(y1) - 5),\n                cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1\n            )\n        else:\n            for coord in assign_coords:\n                # draw assigned anchor\n                cv2.circle(img, (int(coord[0]), int(coord[1])), 3, color, -1)\n        cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)\n\n    if save_name is not None:\n        cv2.imwrite(save_name, img)\n\n    return img\n\n\ndef mkdir(path):\n    if not os.path.exists(path):\n        os.makedirs(path)\n\n\ndef nms(boxes, scores, nms_thr):\n    \"\"\"Single class NMS implemented in Numpy.\"\"\"\n    x1 = boxes[:, 0]\n    y1 = boxes[:, 1]\n    x2 = boxes[:, 2]\n    y2 = boxes[:, 3]\n\n    areas = (x2 - x1 + 1) * (y2 - y1 + 1)\n    order = scores.argsort()[::-1]\n\n    keep = []\n    while order.size > 0:\n        i = order[0]\n        keep.append(i)\n        xx1 = np.maximum(x1[i], x1[order[1:]])\n        yy1 = np.maximum(y1[i], y1[order[1:]])\n        xx2 = np.minimum(x2[i], x2[order[1:]])\n        yy2 = np.minimum(y2[i], y2[order[1:]])\n\n        w = np.maximum(0.0, xx2 - xx1 + 1)\n        h = np.maximum(0.0, yy2 - yy1 + 1)\n        inter = w * h\n        ovr = inter / (areas[i] + areas[order[1:]] - inter)\n\n        inds = np.where(ovr <= nms_thr)[0]\n        order = order[inds + 1]\n\n    return keep\n\n\ndef multiclass_nms(boxes, scores, nms_thr, score_thr, class_agnostic=True):\n    \"\"\"Multiclass NMS implemented in Numpy\"\"\"\n    if class_agnostic:\n        nms_method = multiclass_nms_class_agnostic\n    else:\n        nms_method = multiclass_nms_class_aware\n    return nms_method(boxes, scores, nms_thr, score_thr)\n\n\ndef multiclass_nms_class_aware(boxes, scores, nms_thr, score_thr):\n    \"\"\"Multiclass NMS implemented in Numpy. Class-aware version.\"\"\"\n    final_dets = []\n    num_classes = scores.shape[1]\n    for cls_ind in range(num_classes):\n        cls_scores = scores[:, cls_ind]\n        valid_score_mask = cls_scores > score_thr\n        if valid_score_mask.sum() == 0:\n            continue\n        else:\n            valid_scores = cls_scores[valid_score_mask]\n            valid_boxes = boxes[valid_score_mask]\n            keep = nms(valid_boxes, valid_scores, nms_thr)\n            if len(keep) > 0:\n                cls_inds = np.ones((len(keep), 1)) * cls_ind\n                dets = np.concatenate(\n                    [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1\n                )\n                final_dets.append(dets)\n    if len(final_dets) == 0:\n        return None\n    return np.concatenate(final_dets, 0)\n\n\ndef multiclass_nms_class_agnostic(boxes, scores, nms_thr, score_thr):\n    \"\"\"Multiclass NMS implemented in Numpy. Class-agnostic version.\"\"\"\n    cls_inds = scores.argmax(1)\n    cls_scores = scores[np.arange(len(cls_inds)), cls_inds]\n\n    valid_score_mask = cls_scores > score_thr\n    if valid_score_mask.sum() == 0:\n        return None\n    valid_scores = cls_scores[valid_score_mask]\n    valid_boxes = boxes[valid_score_mask]\n    valid_cls_inds = cls_inds[valid_score_mask]\n    keep = nms(valid_boxes, valid_scores, nms_thr)\n    if keep:\n        dets = np.concatenate(\n            [valid_boxes[keep], valid_scores[keep, None], valid_cls_inds[keep, None]], 1\n        )\n    return dets\n\n\ndef demo_postprocess(outputs, img_size, p6=False):\n    grids = []\n    expanded_strides = []\n    strides = [8, 16, 32] if not p6 else [8, 16, 32, 64]\n\n    hsizes = [img_size[0] // stride for stride in strides]\n    wsizes = [img_size[1] // stride for stride in strides]\n\n    for hsize, wsize, stride in zip(hsizes, wsizes, strides):\n        xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))\n        grid = np.stack((xv, yv), 2).reshape(1, -1, 2)\n        grids.append(grid)\n        shape = grid.shape[:2]\n        expanded_strides.append(np.full((*shape, 1), stride))\n\n    grids = np.concatenate(grids, 1)\n    expanded_strides = np.concatenate(expanded_strides, 1)\n    outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides\n    outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides\n\n    return outputs\n"
  },
  {
    "path": "yolox/utils/dist.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# This file mainly comes from\n# https://github.com/facebookresearch/detectron2/blob/master/detectron2/utils/comm.py\n# Copyright (c) Facebook, Inc. and its affiliates.\n# Copyright (c) Megvii Inc. All rights reserved.\n\"\"\"\nThis file contains primitives for multi-gpu communication.\nThis is useful when doing distributed training.\n\"\"\"\n\nimport functools\nimport os\nimport pickle\nimport time\nfrom contextlib import contextmanager\nfrom loguru import logger\n\nimport numpy as np\n\nimport torch\nfrom torch import distributed as dist\n\n__all__ = [\n    \"get_num_devices\",\n    \"wait_for_the_master\",\n    \"is_main_process\",\n    \"synchronize\",\n    \"get_world_size\",\n    \"get_rank\",\n    \"get_local_rank\",\n    \"get_local_size\",\n    \"time_synchronized\",\n    \"gather\",\n    \"all_gather\",\n]\n\n_LOCAL_PROCESS_GROUP = None\n\n\ndef get_num_devices():\n    gpu_list = os.getenv('CUDA_VISIBLE_DEVICES', None)\n    if gpu_list is not None:\n        return len(gpu_list.split(','))\n    else:\n        devices_list_info = os.popen(\"nvidia-smi -L\")\n        devices_list_info = devices_list_info.read().strip().split(\"\\n\")\n        return len(devices_list_info)\n\n\n@contextmanager\ndef wait_for_the_master(local_rank: int = None):\n    \"\"\"\n    Make all processes waiting for the master to do some task.\n\n    Args:\n        local_rank (int): the rank of the current process. Default to None.\n            If None, it will use the rank of the current process.\n    \"\"\"\n    if local_rank is None:\n        local_rank = get_local_rank()\n\n    if local_rank > 0:\n        dist.barrier()\n    yield\n    if local_rank == 0:\n        if not dist.is_available():\n            return\n        if not dist.is_initialized():\n            return\n        else:\n            dist.barrier()\n\n\ndef synchronize():\n    \"\"\"\n    Helper function to synchronize (barrier) among all processes when using distributed training\n    \"\"\"\n    if not dist.is_available():\n        return\n    if not dist.is_initialized():\n        return\n    world_size = dist.get_world_size()\n    if world_size == 1:\n        return\n    dist.barrier()\n\n\ndef get_world_size() -> int:\n    if not dist.is_available():\n        return 1\n    if not dist.is_initialized():\n        return 1\n    return dist.get_world_size()\n\n\ndef get_rank() -> int:\n    if not dist.is_available():\n        return 0\n    if not dist.is_initialized():\n        return 0\n    return dist.get_rank()\n\n\ndef get_local_rank() -> int:\n    \"\"\"\n    Returns:\n        The rank of the current process within the local (per-machine) process group.\n    \"\"\"\n    if _LOCAL_PROCESS_GROUP is None:\n        return get_rank()\n\n    if not dist.is_available():\n        return 0\n    if not dist.is_initialized():\n        return 0\n    return dist.get_rank(group=_LOCAL_PROCESS_GROUP)\n\n\ndef get_local_size() -> int:\n    \"\"\"\n    Returns:\n        The size of the per-machine process group, i.e. the number of processes per machine.\n    \"\"\"\n    if not dist.is_available():\n        return 1\n    if not dist.is_initialized():\n        return 1\n    return dist.get_world_size(group=_LOCAL_PROCESS_GROUP)\n\n\ndef is_main_process() -> bool:\n    return get_rank() == 0\n\n\n@functools.lru_cache()\ndef _get_global_gloo_group():\n    \"\"\"\n    Return a process group based on gloo backend, containing all the ranks\n    The result is cached.\n    \"\"\"\n    if dist.get_backend() == \"nccl\":\n        return dist.new_group(backend=\"gloo\")\n    else:\n        return dist.group.WORLD\n\n\ndef _serialize_to_tensor(data, group):\n    backend = dist.get_backend(group)\n    assert backend in [\"gloo\", \"nccl\"]\n    device = torch.device(\"cpu\" if backend == \"gloo\" else \"cuda\")\n\n    buffer = pickle.dumps(data)\n    if len(buffer) > 1024 ** 3:\n        logger.warning(\n            \"Rank {} trying to all-gather {:.2f} GB of data on device {}\".format(\n                get_rank(), len(buffer) / (1024 ** 3), device\n            )\n        )\n    storage = torch.ByteStorage.from_buffer(buffer)\n    tensor = torch.ByteTensor(storage).to(device=device)\n    return tensor\n\n\ndef _pad_to_largest_tensor(tensor, group):\n    \"\"\"\n    Returns:\n        list[int]: size of the tensor, on each rank\n        Tensor: padded tensor that has the max size\n    \"\"\"\n    world_size = dist.get_world_size(group=group)\n    assert (\n        world_size >= 1\n    ), \"comm.gather/all_gather must be called from ranks within the given group!\"\n    local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device)\n    size_list = [\n        torch.zeros([1], dtype=torch.int64, device=tensor.device)\n        for _ in range(world_size)\n    ]\n    dist.all_gather(size_list, local_size, group=group)\n    size_list = [int(size.item()) for size in size_list]\n\n    max_size = max(size_list)\n\n    # we pad the tensor because torch all_gather does not support\n    # gathering tensors of different shapes\n    if local_size != max_size:\n        padding = torch.zeros(\n            (max_size - local_size,), dtype=torch.uint8, device=tensor.device\n        )\n        tensor = torch.cat((tensor, padding), dim=0)\n    return size_list, tensor\n\n\ndef all_gather(data, group=None):\n    \"\"\"\n    Run all_gather on arbitrary picklable data (not necessarily tensors).\n\n    Args:\n        data: any picklable object\n        group: a torch process group. By default, will use a group which\n            contains all ranks on gloo backend.\n    Returns:\n        list[data]: list of data gathered from each rank\n    \"\"\"\n    if get_world_size() == 1:\n        return [data]\n    if group is None:\n        group = _get_global_gloo_group()\n    if dist.get_world_size(group) == 1:\n        return [data]\n\n    tensor = _serialize_to_tensor(data, group)\n\n    size_list, tensor = _pad_to_largest_tensor(tensor, group)\n    max_size = max(size_list)\n\n    # receiving Tensor from all ranks\n    tensor_list = [\n        torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)\n        for _ in size_list\n    ]\n    dist.all_gather(tensor_list, tensor, group=group)\n\n    data_list = []\n    for size, tensor in zip(size_list, tensor_list):\n        buffer = tensor.cpu().numpy().tobytes()[:size]\n        data_list.append(pickle.loads(buffer))\n\n    return data_list\n\n\ndef gather(data, dst=0, group=None):\n    \"\"\"\n    Run gather on arbitrary picklable data (not necessarily tensors).\n\n    Args:\n        data: any picklable object\n        dst (int): destination rank\n        group: a torch process group. By default, will use a group which\n            contains all ranks on gloo backend.\n\n    Returns:\n        list[data]: on dst, a list of data gathered from each rank. Otherwise,\n            an empty list.\n    \"\"\"\n    if get_world_size() == 1:\n        return [data]\n    if group is None:\n        group = _get_global_gloo_group()\n    if dist.get_world_size(group=group) == 1:\n        return [data]\n    rank = dist.get_rank(group=group)\n\n    tensor = _serialize_to_tensor(data, group)\n    size_list, tensor = _pad_to_largest_tensor(tensor, group)\n\n    # receiving Tensor from all ranks\n    if rank == dst:\n        max_size = max(size_list)\n        tensor_list = [\n            torch.empty((max_size,), dtype=torch.uint8, device=tensor.device)\n            for _ in size_list\n        ]\n        dist.gather(tensor, tensor_list, dst=dst, group=group)\n\n        data_list = []\n        for size, tensor in zip(size_list, tensor_list):\n            buffer = tensor.cpu().numpy().tobytes()[:size]\n            data_list.append(pickle.loads(buffer))\n        return data_list\n    else:\n        dist.gather(tensor, [], dst=dst, group=group)\n        return []\n\n\ndef shared_random_seed():\n    \"\"\"\n    Returns:\n        int: a random number that is the same across all workers.\n            If workers need a shared RNG, they can use this shared seed to\n            create one.\n    All workers must call this function, otherwise it will deadlock.\n    \"\"\"\n    ints = np.random.randint(2 ** 31)\n    all_ints = all_gather(ints)\n    return all_ints[0]\n\n\ndef time_synchronized():\n    \"\"\"pytorch-accurate time\"\"\"\n    if torch.cuda.is_available():\n        torch.cuda.synchronize()\n    return time.time()\n"
  },
  {
    "path": "yolox/utils/ema.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\nimport math\nfrom copy import deepcopy\n\nimport torch\nimport torch.nn as nn\n\n__all__ = [\"ModelEMA\", \"is_parallel\"]\n\n\ndef is_parallel(model):\n    \"\"\"check if model is in parallel mode.\"\"\"\n    parallel_type = (\n        nn.parallel.DataParallel,\n        nn.parallel.DistributedDataParallel,\n    )\n    return isinstance(model, parallel_type)\n\n\nclass ModelEMA:\n    \"\"\"\n    Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models\n    Keep a moving average of everything in the model state_dict (parameters and buffers).\n    This is intended to allow functionality like\n    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage\n    A smoothed version of the weights is necessary for some training schemes to perform well.\n    This class is sensitive where it is initialized in the sequence of model init,\n    GPU assignment and distributed training wrappers.\n    \"\"\"\n\n    def __init__(self, model, decay=0.9999, updates=0):\n        \"\"\"\n        Args:\n            model (nn.Module): model to apply EMA.\n            decay (float): ema decay reate.\n            updates (int): counter of EMA updates.\n        \"\"\"\n        # Create EMA(FP32)\n        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()\n        self.updates = updates\n        # decay exponential ramp (to help early epochs)\n        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))\n        for p in self.ema.parameters():\n            p.requires_grad_(False)\n\n    def update(self, model):\n        # Update EMA parameters\n        with torch.no_grad():\n            self.updates += 1\n            d = self.decay(self.updates)\n\n            msd = (\n                model.module.state_dict() if is_parallel(model) else model.state_dict()\n            )  # model state_dict\n            for k, v in self.ema.state_dict().items():\n                if v.dtype.is_floating_point:\n                    v *= d\n                    v += (1.0 - d) * msd[k].detach()\n"
  },
  {
    "path": "yolox/utils/logger.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport inspect\nimport os\nimport sys\nfrom collections import defaultdict\nfrom loguru import logger\n\nimport cv2\nimport numpy as np\n\nimport torch\n\n\ndef get_caller_name(depth=0):\n    \"\"\"\n    Args:\n        depth (int): Depth of caller conext, use 0 for caller depth.\n        Default value: 0.\n\n    Returns:\n        str: module name of the caller\n    \"\"\"\n    # the following logic is a little bit faster than inspect.stack() logic\n    frame = inspect.currentframe().f_back\n    for _ in range(depth):\n        frame = frame.f_back\n\n    return frame.f_globals[\"__name__\"]\n\n\nclass StreamToLoguru:\n    \"\"\"\n    stream object that redirects writes to a logger instance.\n    \"\"\"\n\n    def __init__(self, level=\"INFO\", caller_names=(\"apex\", \"pycocotools\")):\n        \"\"\"\n        Args:\n            level(str): log level string of loguru. Default value: \"INFO\".\n            caller_names(tuple): caller names of redirected module.\n                Default value: (apex, pycocotools).\n        \"\"\"\n        self.level = level\n        self.linebuf = \"\"\n        self.caller_names = caller_names\n\n    def write(self, buf):\n        full_name = get_caller_name(depth=1)\n        module_name = full_name.rsplit(\".\", maxsplit=-1)[0]\n        if module_name in self.caller_names:\n            for line in buf.rstrip().splitlines():\n                # use caller level log\n                logger.opt(depth=2).log(self.level, line.rstrip())\n        else:\n            sys.__stdout__.write(buf)\n\n    def flush(self):\n        # flush is related with CPR(cursor position report) in terminal\n        return sys.__stdout__.flush()\n\n    def isatty(self):\n        # when using colab, jax is installed by default and issue like\n        # https://github.com/Megvii-BaseDetection/YOLOX/issues/1437 might be raised\n        # due to missing attribute like`isatty`.\n        # For more details, checked the following link:\n        # https://github.com/google/jax/blob/10720258ea7fb5bde997dfa2f3f71135ab7a6733/jax/_src/pretty_printer.py#L54  # noqa\n        return sys.__stdout__.isatty()\n\n    def fileno(self):\n        # To solve the issue when using debug tools like pdb\n        return sys.__stdout__.fileno()\n\n\ndef redirect_sys_output(log_level=\"INFO\"):\n    redirect_logger = StreamToLoguru(log_level)\n    sys.stderr = redirect_logger\n    sys.stdout = redirect_logger\n\n\ndef setup_logger(save_dir, distributed_rank=0, filename=\"log.txt\", mode=\"a\"):\n    \"\"\"setup logger for training and testing.\n    Args:\n        save_dir(str): location to save log file\n        distributed_rank(int): device rank when multi-gpu environment\n        filename (string): log save name.\n        mode(str): log file write mode, `append` or `override`. default is `a`.\n\n    Return:\n        logger instance.\n    \"\"\"\n    loguru_format = (\n        \"<green>{time:YYYY-MM-DD HH:mm:ss}</green> | \"\n        \"<level>{level: <8}</level> | \"\n        \"<cyan>{name}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>\"\n    )\n\n    logger.remove()\n    save_file = os.path.join(save_dir, filename)\n    if mode == \"o\" and os.path.exists(save_file):\n        os.remove(save_file)\n    # only keep logger in rank0 process\n    if distributed_rank == 0:\n        logger.add(\n            sys.stderr,\n            format=loguru_format,\n            level=\"INFO\",\n            enqueue=True,\n        )\n        logger.add(save_file)\n\n    # redirect stdout/stderr to loguru\n    redirect_sys_output(\"INFO\")\n\n\nclass WandbLogger(object):\n    \"\"\"\n    Log training runs, datasets, models, and predictions to Weights & Biases.\n    This logger sends information to W&B at wandb.ai.\n    By default, this information includes hyperparameters,\n    system configuration and metrics, model metrics,\n    and basic data metrics and analyses.\n\n    For more information, please refer to:\n    https://docs.wandb.ai/guides/track\n    https://docs.wandb.ai/guides/integrations/other/yolox\n    \"\"\"\n    def __init__(self,\n                 project=None,\n                 name=None,\n                 id=None,\n                 entity=None,\n                 save_dir=None,\n                 config=None,\n                 val_dataset=None,\n                 num_eval_images=100,\n                 log_checkpoints=False,\n                 **kwargs):\n        \"\"\"\n        Args:\n            project (str): wandb project name.\n            name (str): wandb run name.\n            id (str): wandb run id.\n            entity (str): wandb entity name.\n            save_dir (str): save directory.\n            config (dict): config dict.\n            val_dataset (Dataset): validation dataset.\n            num_eval_images (int): number of images from the validation set to log.\n            log_checkpoints (bool): log checkpoints\n            **kwargs: other kwargs.\n\n        Usage:\n            Any arguments for wandb.init can be provided on the command line using\n            the prefix `wandb-`.\n            Example\n            ```\n            python tools/train.py .... --logger wandb wandb-project <project-name> \\\n                wandb-name <run-name> \\\n                wandb-id <run-id> \\\n                wandb-save_dir <save-dir> \\\n                wandb-num_eval_imges <num-images> \\\n                wandb-log_checkpoints <bool>\n            ```\n            The val_dataset argument is not open to the command line.\n        \"\"\"\n        try:\n            import wandb\n            self.wandb = wandb\n        except ModuleNotFoundError:\n            raise ModuleNotFoundError(\n                \"wandb is not installed.\"\n                \"Please install wandb using pip install wandb\"\n                )\n\n        from yolox.data.datasets import VOCDetection\n\n        self.project = project\n        self.name = name\n        self.id = id\n        self.save_dir = save_dir\n        self.config = config\n        self.kwargs = kwargs\n        self.entity = entity\n        self._run = None\n        self.val_artifact = None\n        if num_eval_images == -1:\n            self.num_log_images = len(val_dataset)\n        else:\n            self.num_log_images = min(num_eval_images, len(val_dataset))\n        self.log_checkpoints = (log_checkpoints == \"True\" or log_checkpoints == \"true\")\n        self._wandb_init = dict(\n            project=self.project,\n            name=self.name,\n            id=self.id,\n            entity=self.entity,\n            dir=self.save_dir,\n            resume=\"allow\"\n        )\n        self._wandb_init.update(**kwargs)\n\n        _ = self.run\n\n        if self.config:\n            self.run.config.update(self.config)\n        self.run.define_metric(\"train/epoch\")\n        self.run.define_metric(\"val/*\", step_metric=\"train/epoch\")\n        self.run.define_metric(\"train/step\")\n        self.run.define_metric(\"train/*\", step_metric=\"train/step\")\n\n        self.voc_dataset = VOCDetection\n\n        if val_dataset and self.num_log_images != 0:\n            self.val_dataset = val_dataset\n            self.cats = val_dataset.cats\n            self.id_to_class = {\n                cls['id']: cls['name'] for cls in self.cats\n            }\n            self._log_validation_set(val_dataset)\n\n    @property\n    def run(self):\n        if self._run is None:\n            if self.wandb.run is not None:\n                logger.info(\n                    \"There is a wandb run already in progress \"\n                    \"and newly created instances of `WandbLogger` will reuse\"\n                    \" this run. If this is not desired, call `wandb.finish()`\"\n                    \"before instantiating `WandbLogger`.\"\n                )\n                self._run = self.wandb.run\n            else:\n                self._run = self.wandb.init(**self._wandb_init)\n        return self._run\n\n    def _log_validation_set(self, val_dataset):\n        \"\"\"\n        Log validation set to wandb.\n\n        Args:\n            val_dataset (Dataset): validation dataset.\n        \"\"\"\n        if self.val_artifact is None:\n            self.val_artifact = self.wandb.Artifact(name=\"validation_images\", type=\"dataset\")\n            self.val_table = self.wandb.Table(columns=[\"id\", \"input\"])\n\n            for i in range(self.num_log_images):\n                data_point = val_dataset[i]\n                img = data_point[0]\n                id = data_point[3]\n                img = np.transpose(img, (1, 2, 0))\n                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n\n                if isinstance(id, torch.Tensor):\n                    id = id.item()\n\n                self.val_table.add_data(\n                    id,\n                    self.wandb.Image(img)\n                )\n\n            self.val_artifact.add(self.val_table, \"validation_images_table\")\n            self.run.use_artifact(self.val_artifact)\n            self.val_artifact.wait()\n\n    def _convert_prediction_format(self, predictions):\n        image_wise_data = defaultdict(int)\n\n        for key, val in predictions.items():\n            img_id = key\n\n            try:\n                bboxes, cls, scores = val\n            except KeyError:\n                bboxes, cls, scores = val[\"bboxes\"], val[\"categories\"], val[\"scores\"]\n\n            # These store information of actual bounding boxes i.e. the ones which are not None\n            act_box = []\n            act_scores = []\n            act_cls = []\n\n            if bboxes is not None:\n                for box, classes, score in zip(bboxes, cls, scores):\n                    if box is None or score is None or classes is None:\n                        continue\n                    act_box.append(box)\n                    act_scores.append(score)\n                    act_cls.append(classes)\n\n            image_wise_data.update({\n                int(img_id): {\n                    \"bboxes\": [box.numpy().tolist() for box in act_box],\n                    \"scores\": [score.numpy().item() for score in act_scores],\n                    \"categories\": [\n                        self.val_dataset.class_ids[int(act_cls[ind])]\n                        for ind in range(len(act_box))\n                    ],\n                }\n            })\n\n        return image_wise_data\n\n    def log_metrics(self, metrics, step=None):\n        \"\"\"\n        Args:\n            metrics (dict): metrics dict.\n            step (int): step number.\n        \"\"\"\n\n        for k, v in metrics.items():\n            if isinstance(v, torch.Tensor):\n                metrics[k] = v.item()\n\n        if step is not None:\n            metrics.update({\"train/step\": step})\n            self.run.log(metrics)\n        else:\n            self.run.log(metrics)\n\n    def log_images(self, predictions):\n        if len(predictions) == 0 or self.val_artifact is None or self.num_log_images == 0:\n            return\n\n        table_ref = self.val_artifact.get(\"validation_images_table\")\n\n        columns = [\"id\", \"predicted\"]\n        for cls in self.cats:\n            columns.append(cls[\"name\"])\n\n        if isinstance(self.val_dataset, self.voc_dataset):\n            predictions = self._convert_prediction_format(predictions)\n\n        result_table = self.wandb.Table(columns=columns)\n\n        for idx, val in table_ref.iterrows():\n\n            avg_scores = defaultdict(int)\n            num_occurrences = defaultdict(int)\n\n            id = val[0]\n            if isinstance(id, list):\n                id = id[0]\n\n            if id in predictions:\n                prediction = predictions[id]\n                boxes = []\n                for i in range(len(prediction[\"bboxes\"])):\n                    bbox = prediction[\"bboxes\"][i]\n                    x0 = bbox[0]\n                    y0 = bbox[1]\n                    x1 = bbox[2]\n                    y1 = bbox[3]\n                    box = {\n                        \"position\": {\n                            \"minX\": min(x0, x1),\n                            \"minY\": min(y0, y1),\n                            \"maxX\": max(x0, x1),\n                            \"maxY\": max(y0, y1)\n                        },\n                        \"class_id\": prediction[\"categories\"][i],\n                        \"domain\": \"pixel\"\n                    }\n                    avg_scores[\n                        self.id_to_class[prediction[\"categories\"][i]]\n                    ] += prediction[\"scores\"][i]\n                    num_occurrences[self.id_to_class[prediction[\"categories\"][i]]] += 1\n                    boxes.append(box)\n            else:\n                boxes = []\n            average_class_score = []\n            for cls in self.cats:\n                if cls[\"name\"] not in num_occurrences:\n                    score = 0\n                else:\n                    score = avg_scores[cls[\"name\"]] / num_occurrences[cls[\"name\"]]\n                average_class_score.append(score)\n            result_table.add_data(\n                idx,\n                self.wandb.Image(val[1], boxes={\n                        \"prediction\": {\n                            \"box_data\": boxes,\n                            \"class_labels\": self.id_to_class\n                        }\n                    }\n                ),\n                *average_class_score\n            )\n\n        self.wandb.log({\"val_results/result_table\": result_table})\n\n    def save_checkpoint(self, save_dir, model_name, is_best, metadata=None):\n        \"\"\"\n        Args:\n            save_dir (str): save directory.\n            model_name (str): model name.\n            is_best (bool): whether the model is the best model.\n            metadata (dict): metadata to save corresponding to the checkpoint.\n        \"\"\"\n\n        if not self.log_checkpoints:\n            return\n\n        if \"epoch\" in metadata:\n            epoch = metadata[\"epoch\"]\n        else:\n            epoch = None\n\n        filename = os.path.join(save_dir, model_name + \"_ckpt.pth\")\n        artifact = self.wandb.Artifact(\n            name=f\"run_{self.run.id}_model\",\n            type=\"model\",\n            metadata=metadata\n        )\n        artifact.add_file(filename, name=\"model_ckpt.pth\")\n\n        aliases = [\"latest\"]\n\n        if is_best:\n            aliases.append(\"best\")\n\n        if epoch:\n            aliases.append(f\"epoch-{epoch}\")\n\n        self.run.log_artifact(artifact, aliases=aliases)\n\n    def finish(self):\n        self.run.finish()\n\n    @classmethod\n    def initialize_wandb_logger(cls, args, exp, val_dataset):\n        wandb_params = dict()\n        prefix = \"wandb-\"\n        for k, v in zip(args.opts[0::2], args.opts[1::2]):\n            if k.startswith(\"wandb-\"):\n                try:\n                    wandb_params.update({k[len(prefix):]: int(v)})\n                except ValueError:\n                    wandb_params.update({k[len(prefix):]: v})\n\n        return cls(config=vars(exp), val_dataset=val_dataset, **wandb_params)\n"
  },
  {
    "path": "yolox/utils/lr_scheduler.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport math\nfrom functools import partial\n\n\nclass LRScheduler:\n    def __init__(self, name, lr, iters_per_epoch, total_epochs, **kwargs):\n        \"\"\"\n        Supported lr schedulers: [cos, warmcos, multistep]\n\n        Args:\n            lr (float): learning rate.\n            iters_per_epoch (int): number of iterations in one epoch.\n            total_epochs (int): number of epochs in training.\n            kwargs (dict):\n                - cos: None\n                - warmcos: [warmup_epochs, warmup_lr_start (default 1e-6)]\n                - multistep: [milestones (epochs), gamma (default 0.1)]\n        \"\"\"\n\n        self.lr = lr\n        self.iters_per_epoch = iters_per_epoch\n        self.total_epochs = total_epochs\n        self.total_iters = iters_per_epoch * total_epochs\n\n        self.__dict__.update(kwargs)\n\n        self.lr_func = self._get_lr_func(name)\n\n    def update_lr(self, iters):\n        return self.lr_func(iters)\n\n    def _get_lr_func(self, name):\n        if name == \"cos\":  # cosine lr schedule\n            lr_func = partial(cos_lr, self.lr, self.total_iters)\n        elif name == \"warmcos\":\n            warmup_total_iters = self.iters_per_epoch * self.warmup_epochs\n            warmup_lr_start = getattr(self, \"warmup_lr_start\", 1e-6)\n            lr_func = partial(\n                warm_cos_lr,\n                self.lr,\n                self.total_iters,\n                warmup_total_iters,\n                warmup_lr_start,\n            )\n        elif name == \"yoloxwarmcos\":\n            warmup_total_iters = self.iters_per_epoch * self.warmup_epochs\n            no_aug_iters = self.iters_per_epoch * self.no_aug_epochs\n            warmup_lr_start = getattr(self, \"warmup_lr_start\", 0)\n            min_lr_ratio = getattr(self, \"min_lr_ratio\", 0.2)\n            lr_func = partial(\n                yolox_warm_cos_lr,\n                self.lr,\n                min_lr_ratio,\n                self.total_iters,\n                warmup_total_iters,\n                warmup_lr_start,\n                no_aug_iters,\n            )\n        elif name == \"yoloxsemiwarmcos\":\n            warmup_lr_start = getattr(self, \"warmup_lr_start\", 0)\n            min_lr_ratio = getattr(self, \"min_lr_ratio\", 0.2)\n            warmup_total_iters = self.iters_per_epoch * self.warmup_epochs\n            no_aug_iters = self.iters_per_epoch * self.no_aug_epochs\n            normal_iters = self.iters_per_epoch * self.semi_epoch\n            semi_iters = self.iters_per_epoch_semi * (\n                self.total_epochs - self.semi_epoch - self.no_aug_epochs\n            )\n            lr_func = partial(\n                yolox_semi_warm_cos_lr,\n                self.lr,\n                min_lr_ratio,\n                warmup_lr_start,\n                self.total_iters,\n                normal_iters,\n                no_aug_iters,\n                warmup_total_iters,\n                semi_iters,\n                self.iters_per_epoch,\n                self.iters_per_epoch_semi,\n            )\n        elif name == \"multistep\":  # stepwise lr schedule\n            milestones = [\n                int(self.total_iters * milestone / self.total_epochs)\n                for milestone in self.milestones\n            ]\n            gamma = getattr(self, \"gamma\", 0.1)\n            lr_func = partial(multistep_lr, self.lr, milestones, gamma)\n        else:\n            raise ValueError(\"Scheduler version {} not supported.\".format(name))\n        return lr_func\n\n\ndef cos_lr(lr, total_iters, iters):\n    \"\"\"Cosine learning rate\"\"\"\n    lr *= 0.5 * (1.0 + math.cos(math.pi * iters / total_iters))\n    return lr\n\n\ndef warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters):\n    \"\"\"Cosine learning rate with warm up.\"\"\"\n    if iters <= warmup_total_iters:\n        lr = (lr - warmup_lr_start) * iters / float(\n            warmup_total_iters\n        ) + warmup_lr_start\n    else:\n        lr *= 0.5 * (\n            1.0\n            + math.cos(\n                math.pi\n                * (iters - warmup_total_iters)\n                / (total_iters - warmup_total_iters)\n            )\n        )\n    return lr\n\n\ndef yolox_warm_cos_lr(\n    lr,\n    min_lr_ratio,\n    total_iters,\n    warmup_total_iters,\n    warmup_lr_start,\n    no_aug_iter,\n    iters,\n):\n    \"\"\"Cosine learning rate with warm up.\"\"\"\n    min_lr = lr * min_lr_ratio\n    if iters <= warmup_total_iters:\n        # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start\n        lr = (lr - warmup_lr_start) * pow(\n            iters / float(warmup_total_iters), 2\n        ) + warmup_lr_start\n    elif iters >= total_iters - no_aug_iter:\n        lr = min_lr\n    else:\n        lr = min_lr + 0.5 * (lr - min_lr) * (\n            1.0\n            + math.cos(\n                math.pi\n                * (iters - warmup_total_iters)\n                / (total_iters - warmup_total_iters - no_aug_iter)\n            )\n        )\n    return lr\n\n\ndef yolox_semi_warm_cos_lr(\n    lr,\n    min_lr_ratio,\n    warmup_lr_start,\n    total_iters,\n    normal_iters,\n    no_aug_iters,\n    warmup_total_iters,\n    semi_iters,\n    iters_per_epoch,\n    iters_per_epoch_semi,\n    iters,\n):\n    \"\"\"Cosine learning rate with warm up.\"\"\"\n    min_lr = lr * min_lr_ratio\n    if iters <= warmup_total_iters:\n        # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start\n        lr = (lr - warmup_lr_start) * pow(\n            iters / float(warmup_total_iters), 2\n        ) + warmup_lr_start\n    elif iters >= normal_iters + semi_iters:\n        lr = min_lr\n    elif iters <= normal_iters:\n        lr = min_lr + 0.5 * (lr - min_lr) * (\n            1.0\n            + math.cos(\n                math.pi\n                * (iters - warmup_total_iters)\n                / (total_iters - warmup_total_iters - no_aug_iters)\n            )\n        )\n    else:\n        lr = min_lr + 0.5 * (lr - min_lr) * (\n            1.0\n            + math.cos(\n                math.pi\n                * (\n                    normal_iters\n                    - warmup_total_iters\n                    + (iters - normal_iters)\n                    * iters_per_epoch\n                    * 1.0\n                    / iters_per_epoch_semi\n                )\n                / (total_iters - warmup_total_iters - no_aug_iters)\n            )\n        )\n    return lr\n\n\ndef multistep_lr(lr, milestones, gamma, iters):\n    \"\"\"MultiStep learning rate\"\"\"\n    for milestone in milestones:\n        lr *= gamma if iters >= milestone else 1.0\n    return lr\n"
  },
  {
    "path": "yolox/utils/metric.py",
    "content": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\nimport functools\nimport os\nimport time\nfrom collections import defaultdict, deque\nimport psutil\n\nimport numpy as np\n\nimport torch\n\n__all__ = [\n    \"AverageMeter\",\n    \"MeterBuffer\",\n    \"get_total_and_free_memory_in_Mb\",\n    \"occupy_mem\",\n    \"gpu_mem_usage\",\n    \"mem_usage\"\n]\n\n\ndef get_total_and_free_memory_in_Mb(cuda_device):\n    devices_info_str = os.popen(\n        \"nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader\"\n    )\n    devices_info = devices_info_str.read().strip().split(\"\\n\")\n    if \"CUDA_VISIBLE_DEVICES\" in os.environ:\n        visible_devices = os.environ[\"CUDA_VISIBLE_DEVICES\"].split(',')\n        cuda_device = int(visible_devices[cuda_device])\n    total, used = devices_info[int(cuda_device)].split(\",\")\n    return int(total), int(used)\n\n\ndef occupy_mem(cuda_device, mem_ratio=0.9):\n    \"\"\"\n    pre-allocate gpu memory for training to avoid memory Fragmentation.\n    \"\"\"\n    total, used = get_total_and_free_memory_in_Mb(cuda_device)\n    max_mem = int(total * mem_ratio)\n    block_mem = max_mem - used\n    x = torch.cuda.FloatTensor(256, 1024, block_mem)\n    del x\n    time.sleep(5)\n\n\ndef gpu_mem_usage():\n    \"\"\"\n    Compute the GPU memory usage for the current device (MB).\n    \"\"\"\n    mem_usage_bytes = torch.cuda.max_memory_allocated()\n    return mem_usage_bytes / (1024 * 1024)\n\n\ndef mem_usage():\n    \"\"\"\n    Compute the memory usage for the current machine (GB).\n    \"\"\"\n    gb = 1 << 30\n    mem = psutil.virtual_memory()\n    return mem.used / gb\n\n\nclass AverageMeter:\n    \"\"\"Track a series of values and provide access to smoothed values over a\n    window or the global series average.\n    \"\"\"\n\n    def __init__(self, window_size=50):\n        self._deque = deque(maxlen=window_size)\n        self._total = 0.0\n        self._count = 0\n\n    def update(self, value):\n        self._deque.append(value)\n        self._count += 1\n        self._total += value\n\n    @property\n    def median(self):\n        d = np.array(list(self._deque))\n        return np.median(d)\n\n    @property\n    def avg(self):\n        # if deque is empty, nan will be returned.\n        d = np.array(list(self._deque))\n        return d.mean()\n\n    @property\n    def global_avg(self):\n        return self._total / max(self._count, 1e-5)\n\n    @property\n    def latest(self):\n        return self._deque[-1] if len(self._deque) > 0 else None\n\n    @property\n    def total(self):\n        return self._total\n\n    def reset(self):\n        self._deque.clear()\n        self._total = 0.0\n        self._count = 0\n\n    def clear(self):\n        self._deque.clear()\n\n\nclass MeterBuffer(defaultdict):\n    \"\"\"Computes and stores the average and current value\"\"\"\n\n    def __init__(self, window_size=20):\n        factory = functools.partial(AverageMeter, window_size=window_size)\n        super().__init__(factory)\n\n    def reset(self):\n        for v in self.values():\n            v.reset()\n\n    def get_filtered_meter(self, filter_key=\"time\"):\n        return {k: v for k, v in self.items() if filter_key in k}\n\n    def update(self, values=None, **kwargs):\n        if values is None:\n            values = {}\n        values.update(kwargs)\n        for k, v in values.items():\n            if isinstance(v, torch.Tensor):\n                v = v.detach()\n            self[k].update(v)\n\n    def clear_meters(self):\n        for v in self.values():\n            v.clear()\n"
  },
  {
    "path": "yolox/utils/mlflow_logger.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Megvii Inc. All rights reserved.\n# Please read docs/mlflow_integration.md for more details.\n\"\"\"\nLogging training runs with hyperparameter, datasets and trained models to MlFlow.\nMlflow support Model Tracking, Experiment Tracking, and Model Registry.\nIt can be hosted on-premises or in all the major cloud provider or with databricks also.\nPlease read docs/mlflow_integration.md for more details.\n\nFor changing default logging Behaviour you can change mlflow environment variables:\n    https://mlflow.org/docs/latest/python_api/mlflow.environment_variables.html\n\nFor more information, please refer to:\nhttps://mlflow.org/docs/latest/introduction/index.html\n\"\"\"\nimport importlib.metadata\nimport importlib.util\nimport json\nimport os\nfrom collections.abc import MutableMapping\nimport packaging.version\nfrom loguru import logger\n\nimport torch\n\nfrom yolox.utils import is_main_process\n\n\nclass MlflowLogger:\n    \"\"\"\n    Main Mlflow logging class to log hyperparameters, metrics, and models to Mlflow.\n    \"\"\"\n    def __init__(self):\n        if not self.is_required_library_available():\n            raise RuntimeError(\n                \"MLflow Logging requires mlflow and python-dotenv to be installed. \"\n                \"Run `pip install mlflow python-dotenv`.\")\n\n        import mlflow\n        from dotenv import find_dotenv, load_dotenv\n        load_dotenv(find_dotenv())\n        self.ENV_VARS_TRUE_VALUES = {\"1\", \"ON\", \"YES\", \"TRUE\"}\n        self._MAX_PARAM_VAL_LENGTH = mlflow.utils.validation.MAX_PARAM_VAL_LENGTH\n        self._MAX_PARAMS_TAGS_PER_BATCH = mlflow.utils.validation.MAX_PARAMS_TAGS_PER_BATCH\n        self._initialized = False\n        self._auto_end_run = False\n        self.best_ckpt_upload_pending = False\n        self._tracking_uri = None\n        self._experiment_name = None\n        self._mlflow_log_artifacts = None\n        self._mlflow_log_model_per_n_epochs = None\n        self._mlflow_log_nth_epoch_models = None\n        self.run_name = None\n        self._flatten_params = None\n        self._nested_run = None\n        self._run_id = None\n        self._async_log = None\n        self._ml_flow = mlflow\n\n    def is_required_library_available(self):\n        \"\"\"\n        check if required libraries are available.\n\n        Args: None\n\n        Returns:\n            bool: True if required libraries are available, False otherwise.\n        \"\"\"\n        dotenv_availaible = importlib.util.find_spec(\"dotenv\") is not None\n        mlflow_available = importlib.util.find_spec(\"mlflow\") is not None\n        return dotenv_availaible and mlflow_available\n\n    def flatten_dict(self, d: MutableMapping, parent_key: str = \"\", delimiter: str = \".\"):\n        \"\"\"\n        Flatten a nested dict into a single level dict.\n\n        Args:\n            d(MutableMapping): nested dictionary\n            parent_key(str): parent key\n            delimiter(str): delimiter to use\n\n        Returns:\n            flattened_dict(dict): flattened dictionary\n\n        \"\"\"\n\n        def _flatten_dict(d, parent_key=\"\", delimiter=\".\"):\n            for k, v in d.items():\n                key = str(parent_key) + delimiter + str(k) if parent_key else k\n                if v and isinstance(v, MutableMapping):\n                    yield from self.flatten_dict(v, key, delimiter=delimiter).items()\n                else:\n                    yield key, v\n\n        return dict(_flatten_dict(d, parent_key, delimiter))\n\n    def setup(self, args, exp):\n        \"\"\"\n        Set up the optional MLflow integration.\n\n        Args:\n            args(dict): training args dictionary\n            exp(dict): Experiment related hyperparameters\n\n        Returns:\n            None\n\n        Environment:\n        - **YOLOX_MLFLOW_LOG_MODEL_ARTIFACTS** (`str`, *optional*, defaults to `False`):\n            Whether to use MLflow `.log_artifact()` facility to log artifacts. This only makes\n            sense if logging to a remote server, e.g. s3 or GCS. If set to `True` or *1*,\n            will copy each check-points on each save in [`TrainingArguments`]'s `output_dir` to the\n            local or remote artifact storage. Using it without a remote storage will just copy the\n            files to your artifact location.\n        - **YOLOX_MLFLOW_LOG_MODEL_PER_n_EPOCHS** (`int`, *optional*, defaults to 30):\n            If ``YOLOX_MLFLOW_LOG_MODEL_ARTIFACTS`` is enabled then Log model checkpoints after\n            every n epochs. Default is 30. ``best_ckpt.pth`` will be updated after `n` epochs if\n            it has been updated during last `n`  epochs.\n        - **YOLOX_MLFLOW_LOG_Nth_EPOCH_MODELS** (`str`, *optional*, defaults to `False`):\n            Whether to log the ``epoch_n_ckpt.pth`` models along with best_ckpt.pth model after\n             every `n` epoch as per YOLOX_MLFLOW_LOG_MODEL_PER_n_EPOCHS.\n             If set to `True` or *1*, will log ``epoch_n_ckpt.pth`` along with\n             ``best_ckpt.pth`` and as mlflow artifacts in different folders.\n        - **YOLOX_MLFLOW_RUN_NAME** (`str`, *optional*, defaults to random name):\n            Name of new run. Used only when ``run_id`` is unspecified. If a new run is\n            created and ``run_name`` is not specified, a random name will be generated for the run.\n        - **YOLOX_MLFLOW_FLATTEN_PARAMS** (`str`, *optional*, defaults to `False`):\n            Whether to flatten the parameters dictionary before logging.\n        - **MLFLOW_TRACKING_URI** (`str`, *optional*):\n            Whether to store runs at a specific path or remote server. Unset by default, which\n            skips setting the tracking URI entirely.\n        - **MLFLOW_EXPERIMENT_NAME** (`str`, *optional*, defaults to `None`):\n            Whether to use an MLflow experiment_name under which to launch the run. Default to\n            `None` which will point to the `Default` experiment in MLflow. Otherwise, it is a\n            case-sensitive name of the experiment to be activated. If an experiment with this\n            name does not exist, a new experiment with this name is created.\n        - **MLFLOW_TAGS** (`str`, *optional*):\n            A string dump of a dictionary of key/value pair to be added to the MLflow run as tags.\n             Example: `os.environ['MLFLOW_TAGS']=\n             '{\"release.candidate\": \"RC1\", \"release.version\": \"2.2.0\"}'`.\n        - **MLFLOW_NESTED_RUN** (`str`, *optional*):\n            Whether to use MLflow nested runs. If set to `True` or *1*, will create a nested run\n            inside the current run.\n        - **MLFLOW_RUN_ID** (`str`, *optional*):\n            Allow to reattach to an existing run which can be useful when resuming training from a\n             checkpoint. When `MLFLOW_RUN_ID` environment variable is set, `start_run` attempts\n             to resume a run with the specified run ID and other parameters are ignored.\n        - Other MLflow environment variables: For changing default logging Behaviour refer mlflow\n            environment variables:\n        https://mlflow.org/docs/latest/python_api/mlflow.environment_variables.html\n        - Setup ``Databricks`` integration with MLflow: Provide these two environment variables:\n            DATABRICKS_HOST=\"https://adb-4273978218682429.9.azuredatabricks.net\"\n            DATABRICKS_TOKEN=\"dapixxxxxxxxxxxxx\"\n        \"\"\"\n        self._tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", None)\n        self._experiment_name = os.getenv(\"MLFLOW_EXPERIMENT_NAME\", None)\n        self._mlflow_log_artifacts = os.getenv(\"YOLOX_MLFLOW_LOG_MODEL_ARTIFACTS\",\n                                               \"False\").upper() in self.ENV_VARS_TRUE_VALUES\n        self._mlflow_log_model_per_n_epochs = int(os.getenv(\n            \"YOLOX_MLFLOW_LOG_MODEL_PER_n_EPOCHS\", 30))\n\n        self._mlflow_log_nth_epoch_models = os.getenv(\"YOLOX_MLFLOW_LOG_Nth_EPOCH_MODELS\",\n                                                      \"False\").upper() in self.ENV_VARS_TRUE_VALUES\n        self.run_name = os.getenv(\"YOLOX_MLFLOW_RUN_NAME\", None)\n        self.run_name = None if len(self.run_name.strip()) == 0 else self.run_name\n        self._flatten_params = os.getenv(\"YOLOX_MLFLOW_FLATTEN_PARAMS\",\n                                         \"FALSE\").upper() in self.ENV_VARS_TRUE_VALUES\n        self._nested_run = os.getenv(\"MLFLOW_NESTED_RUN\",\n                                     \"FALSE\").upper() in self.ENV_VARS_TRUE_VALUES\n        self._run_id = os.getenv(\"MLFLOW_RUN_ID\", None)\n\n        # \"synchronous\" flag is only available with mlflow version >= 2.8.0\n        # https://github.com/mlflow/mlflow/pull/9705\n        # https://github.com/mlflow/mlflow/releases/tag/v2.8.0\n        self._async_log = packaging.version.parse(\n            self._ml_flow.__version__) >= packaging.version.parse(\"2.8.0\")\n\n        logger.debug(\n            f\"MLflow experiment_name={self._experiment_name}, run_name={self.run_name}, \"\n            f\"nested={self._nested_run}, tags={self._nested_run}, tracking_uri={self._tracking_uri}\"\n        )\n        if is_main_process():\n            if not self._ml_flow.is_tracking_uri_set():\n                if self._tracking_uri:\n                    self._ml_flow.set_tracking_uri(self._tracking_uri)\n                    logger.debug(f\"MLflow tracking URI is set to {self._tracking_uri}\")\n                else:\n                    logger.debug(\n                        \"Environment variable `MLFLOW_TRACKING_URI` is not provided and therefore\"\n                        \" will not be explicitly set.\"\n                    )\n            else:\n                logger.debug(f\"MLflow tracking URI is set to {self._ml_flow.get_tracking_uri()}\")\n\n            if self._ml_flow.active_run() is None or self._nested_run or self._run_id:\n                if self._experiment_name:\n                    # Use of set_experiment() ensure that Experiment is created if not exists\n                    self._ml_flow.set_experiment(self._experiment_name)\n                self._ml_flow.start_run(run_name=self.run_name, nested=self._nested_run)\n                logger.debug(\n                    f\"MLflow run started with run_id={self._ml_flow.active_run().info.run_id}\")\n                self._auto_end_run = True\n                self._initialized = True\n            # filters these params from args\n            keys = ['experiment_name', 'batch_size', 'exp_file', 'resume', 'ckpt', 'start_epoch',\n                    'num_machines', 'fp16', 'logger']\n            combined_dict = {k: v for k, v in vars(args).items() if k in keys}\n            if exp is not None:\n                exp_dict = self.convert_exp_todict(exp)\n                combined_dict = {**exp_dict, **combined_dict}\n            self.log_params_mlflow(combined_dict)\n            mlflow_tags = os.getenv(\"MLFLOW_TAGS\", None)\n            if mlflow_tags:\n                mlflow_tags = json.loads(mlflow_tags)\n                self._ml_flow.set_tags(mlflow_tags)\n\n    def log_params_mlflow(self, params_dict):\n        \"\"\"\n        Log hyperparameters to MLflow.\n        MLflow's log_param() only accepts values no longer than 250 characters.\n        No overwriting of existing parameters is allowed by default from mlflow.\n\n        Args:\n            params_dict(dict): dict of hyperparameters\n\n        Returns:\n            None\n        \"\"\"\n        if is_main_process():\n            params_dict = self.flatten_dict(params_dict) if self._flatten_params else params_dict\n            # remove params that are too long for MLflow\n            for name, value in list(params_dict.items()):\n                # internally, all values are converted to str in MLflow\n                if len(str(value)) > self._MAX_PARAM_VAL_LENGTH:\n                    logger.warning(\n                        f'Trainer is attempting to log a value of \"{value}\" for key \"{name}\" as a '\n                        f'parameter. MLflow\\'s log_param() only accepts values no longer than 250 '\n                        f'characters so we dropped this attribute. You can use '\n                        f'`MLFLOW_FLATTEN_PARAMS` environment variable to flatten the parameters '\n                        f'and avoid this message.'\n                    )\n                    del params_dict[name]\n            # MLflow cannot log more than 100 values in one go, so we have to split it\n            combined_dict_items = list(params_dict.items())\n            for i in range(0, len(combined_dict_items), self._MAX_PARAMS_TAGS_PER_BATCH):\n                if self._async_log:\n                    self._ml_flow.log_params(\n                        dict(combined_dict_items[i: i + self._MAX_PARAMS_TAGS_PER_BATCH]),\n                        synchronous=False\n                    )\n                else:\n                    self._ml_flow.log_params(\n                        dict(combined_dict_items[i: i + self._MAX_PARAMS_TAGS_PER_BATCH])\n                    )\n\n    def convert_exp_todict(self, exp):\n        \"\"\"\n        Convert the experiment object to dictionary for required parameter only\n\n        Args:\n            exp(dict): Experiment object\n\n        Returns:\n            exp_dict(dict): dict of experiment parameters\n\n        \"\"\"\n        filter_keys = ['max_epoch', 'num_classes', 'input_size', 'output_dir',\n                       'data_dir', 'train_ann', 'val_ann', 'test_ann',\n                       'test_conf', 'nmsthre']\n        exp_dict = {k: v for k, v in exp.__dict__.items()\n                    if not k.startswith(\"__\") and k in filter_keys}\n        return exp_dict\n\n    def on_log(self, args, exp, step, logs):\n        \"\"\"\n        Log metrics to MLflow.\n\n        Args:\n            args(dict): training args dictionary\n            exp(dict): Experiment related hyperparameters\n            step(int): current training step\n            logs(dict): dictionary of logs to be logged\n\n        Returns:\n            None\n        \"\"\"\n        # step = trainer.progress_in_iter\n        if not self._initialized:\n            self.setup(args, exp)\n        if is_main_process():  # master thread only\n            metrics = {}\n            for k, v in logs.items():\n                if isinstance(v, (int, float)):\n                    metrics[k] = v\n                elif isinstance(v, torch.Tensor) and v.numel() == 1:\n                    metrics[k] = v.item()\n                else:\n                    logger.warning(\n                        f'Trainer is attempting to log a value of \"{v}\" of type {type(v)} for key '\n                        f'\"{k}\" as a metric. MLflow log_metric() only accepts float and int types '\n                        f'so we dropped this attribute.'\n                    )\n\n            if self._async_log:\n                self._ml_flow.log_metrics(metrics=metrics, step=step, synchronous=False)\n            else:\n                self._ml_flow.log_metrics(metrics=metrics, step=step)\n\n    def on_train_end(self, args, file_name, metadata):\n        \"\"\"\n        Mlflow logging action to take when training ends:\n            1. log the training log file\n            2. publish the latest best model to model_registry if it is allowed in config file\n            3. close the mlfow run\n\n        Args:\n            args(dict): training args dictionary\n            file_name(str): output directory\n            metadata(dict): model related metadata\n\n        Returns:\n            None\n        \"\"\"\n        if is_main_process() and self._initialized:\n            self.save_log_file(args, file_name)\n            if self.best_ckpt_upload_pending:\n                model_file_name = \"best_ckpt\"\n                mlflow_out_dir = f\"{args.experiment_name}/{model_file_name}\"\n                artifact_path = os.path.join(file_name, f\"{model_file_name}.pth\")\n                self.mlflow_save_pyfunc_model(metadata, artifact_path, mlflow_out_dir)\n            if self._auto_end_run and self._ml_flow.active_run():\n                self._ml_flow.end_run()\n\n    def save_log_file(self, args, file_name):\n        \"\"\"\n        Save the training log file to mlflow artifact path\n        Args:\n            args(dict): training args dictionary\n            file_name(str): output directory\n\n        Returns:\n            None\n        \"\"\"\n        log_file_path = os.path.join(file_name, \"train_log.txt\")\n        mlflow_out_dir = f\"{args.experiment_name}\"\n        logger.info(f\"Logging logfile: {log_file_path} in mlflow artifact path: {mlflow_out_dir}.\")\n        self._ml_flow.log_artifact(log_file_path, mlflow_out_dir)\n\n    def save_checkpoints(self, args, exp, file_name, epoch, metadata, update_best_ckpt):\n        \"\"\"\n        Save the model checkpoints to mlflow artifact path\n        if save_history_ckpt is enabled then\n\n        Args:\n            args(dict): training args dictionary\n            exp(dict): Experiment related hyperparameters\n            file_name(str): output directory\n            epoch(int): current epoch\n            metadata(dict): model related metadata\n            update_best_ckpt(bool): bool to show if best_ckpt was updated\n\n        Returns:\n            None\n        \"\"\"\n        if is_main_process() and self._mlflow_log_artifacts:\n            if update_best_ckpt:\n                self.best_ckpt_upload_pending = True\n            if ((epoch + 1) % self._mlflow_log_model_per_n_epochs) == 0:\n                self.save_log_file(args, file_name)\n                if self.best_ckpt_upload_pending:\n                    model_file_name = \"best_ckpt\"\n                    mlflow_out_dir = f\"{args.experiment_name}/{model_file_name}\"\n                    artifact_path = os.path.join(file_name, f\"{model_file_name}.pth\")\n                    self.mlflow_save_pyfunc_model(metadata, artifact_path, mlflow_out_dir)\n                    self.best_ckpt_upload_pending = False\n                if self._mlflow_log_nth_epoch_models and exp.save_history_ckpt:\n                    model_file_name = f\"epoch_{epoch + 1}_ckpt\"\n                    mlflow_out_dir = f\"{args.experiment_name}/hist_epochs/{model_file_name}\"\n                    artifact_path = os.path.join(file_name, f\"{model_file_name}.pth\")\n                    self.mlflow_save_pyfunc_model(metadata, artifact_path, mlflow_out_dir)\n\n    def mlflow_save_pyfunc_model(self, metadata, artifact_path, mlflow_out_dir):\n        \"\"\"\n        This will send the given model to mlflow server if HF_MLFLOW_LOG_ARTIFACTS is true\n            - optionally publish to model registry if allowed in config file\n\n        Args:\n            metadata(dict): model related metadata\n            artifact_path(str): model checkpoint path\n            mlflow_out_dir(str): mlflow artifact path\n\n        Returns:\n            None\n        \"\"\"\n        if is_main_process() and self._initialized and self._mlflow_log_artifacts:\n            logger.info(\n                f\"Logging checkpoint {artifact_path} artifacts in mlflow artifact path: \"\n                f\"{mlflow_out_dir}. This may take time.\")\n            if os.path.exists(artifact_path):\n                self._ml_flow.pyfunc.log_model(\n                    mlflow_out_dir,\n                    artifacts={\"model_path\": artifact_path},\n                    python_model=self._ml_flow.pyfunc.PythonModel(),\n                    metadata=metadata\n                )\n\n    def __del__(self):\n        \"\"\"\n        if the previous run is not terminated correctly, the fluent API will\n        not let you start a new run before the previous one is killed\n\n        Args: None\n        Return: None\n        \"\"\"\n        if (\n                self._auto_end_run\n                and callable(getattr(self._ml_flow, \"active_run\", None))\n                and self._ml_flow.active_run() is not None\n        ):\n            self._ml_flow.end_run()\n"
  },
  {
    "path": "yolox/utils/model_utils.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport contextlib\nfrom copy import deepcopy\nfrom typing import Sequence\n\nimport torch\nimport torch.nn as nn\n\n__all__ = [\n    \"fuse_conv_and_bn\",\n    \"fuse_model\",\n    \"get_model_info\",\n    \"replace_module\",\n    \"freeze_module\",\n    \"adjust_status\",\n]\n\n\ndef get_model_info(model: nn.Module, tsize: Sequence[int]) -> str:\n    from thop import profile\n\n    stride = 64\n    img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device)\n    flops, params = profile(deepcopy(model), inputs=(img,), verbose=False)\n    params /= 1e6\n    flops /= 1e9\n    flops *= tsize[0] * tsize[1] / stride / stride * 2  # Gflops\n    info = \"Params: {:.2f}M, Gflops: {:.2f}\".format(params, flops)\n    return info\n\n\ndef fuse_conv_and_bn(conv: nn.Conv2d, bn: nn.BatchNorm2d) -> nn.Conv2d:\n    \"\"\"\n    Fuse convolution and batchnorm layers.\n    check more info on https://tehnokv.com/posts/fusing-batchnorm-and-conv/\n\n    Args:\n        conv (nn.Conv2d): convolution to fuse.\n        bn (nn.BatchNorm2d): batchnorm to fuse.\n\n    Returns:\n        nn.Conv2d: fused convolution behaves the same as the input conv and bn.\n    \"\"\"\n    fusedconv = (\n        nn.Conv2d(\n            conv.in_channels,\n            conv.out_channels,\n            kernel_size=conv.kernel_size,\n            stride=conv.stride,\n            padding=conv.padding,\n            groups=conv.groups,\n            bias=True,\n        )\n        .requires_grad_(False)\n        .to(conv.weight.device)\n    )\n\n    # prepare filters\n    w_conv = conv.weight.clone().view(conv.out_channels, -1)\n    w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))\n    fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))\n\n    # prepare spatial bias\n    b_conv = (\n        torch.zeros(conv.weight.size(0), device=conv.weight.device)\n        if conv.bias is None\n        else conv.bias\n    )\n    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(\n        torch.sqrt(bn.running_var + bn.eps)\n    )\n    fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)\n\n    return fusedconv\n\n\ndef fuse_model(model: nn.Module) -> nn.Module:\n    \"\"\"fuse conv and bn in model\n\n    Args:\n        model (nn.Module): model to fuse\n\n    Returns:\n        nn.Module: fused model\n    \"\"\"\n    from yolox.models.network_blocks import BaseConv\n\n    for m in model.modules():\n        if type(m) is BaseConv and hasattr(m, \"bn\"):\n            m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv\n            delattr(m, \"bn\")  # remove batchnorm\n            m.forward = m.fuseforward  # update forward\n    return model\n\n\ndef replace_module(module, replaced_module_type, new_module_type, replace_func=None) -> nn.Module:\n    \"\"\"\n    Replace given type in module to a new type. mostly used in deploy.\n\n    Args:\n        module (nn.Module): model to apply replace operation.\n        replaced_module_type (Type): module type to be replaced.\n        new_module_type (Type)\n        replace_func (function): python function to describe replace logic. Defalut value None.\n\n    Returns:\n        model (nn.Module): module that already been replaced.\n    \"\"\"\n\n    def default_replace_func(replaced_module_type, new_module_type):\n        return new_module_type()\n\n    if replace_func is None:\n        replace_func = default_replace_func\n\n    model = module\n    if isinstance(module, replaced_module_type):\n        model = replace_func(replaced_module_type, new_module_type)\n    else:  # recurrsively replace\n        for name, child in module.named_children():\n            new_child = replace_module(child, replaced_module_type, new_module_type)\n            if new_child is not child:  # child is already replaced\n                model.add_module(name, new_child)\n\n    return model\n\n\ndef freeze_module(module: nn.Module, name=None) -> nn.Module:\n    \"\"\"freeze module inplace\n\n    Args:\n        module (nn.Module): module to freeze.\n        name (str, optional): name to freeze. If not given, freeze the whole module.\n            Note that fuzzy match is not supported. Defaults to None.\n\n    Examples:\n        freeze the backbone of model\n        >>> freeze_moudle(model.backbone)\n\n        or freeze the backbone of model by name\n        >>> freeze_moudle(model, name=\"backbone\")\n    \"\"\"\n    for param_name, parameter in module.named_parameters():\n        if name is None or name in param_name:\n            parameter.requires_grad = False\n\n    # ensure module like BN and dropout are freezed\n    for module_name, sub_module in module.named_modules():\n        # actually there are no needs to call eval for every single sub_module\n        if name is None or name in module_name:\n            sub_module.eval()\n\n    return module\n\n\n@contextlib.contextmanager\ndef adjust_status(module: nn.Module, training: bool = False) -> nn.Module:\n    \"\"\"Adjust module to training/eval mode temporarily.\n\n    Args:\n        module (nn.Module): module to adjust status.\n        training (bool): training mode to set. True for train mode, False fro eval mode.\n\n    Examples:\n        >>> with adjust_status(model, training=False):\n        ...     model(data)\n    \"\"\"\n    status = {}\n\n    def backup_status(module):\n        for m in module.modules():\n            # save prev status to dict\n            status[m] = m.training\n            m.training = training\n\n    def recover_status(module):\n        for m in module.modules():\n            # recover prev status from dict\n            m.training = status.pop(m)\n\n    backup_status(module)\n    yield module\n    recover_status(module)\n"
  },
  {
    "path": "yolox/utils/setup_env.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport os\nimport subprocess\nfrom loguru import logger\n\nimport cv2\n\nfrom .dist import get_world_size, is_main_process\n\n__all__ = [\"configure_nccl\", \"configure_module\", \"configure_omp\"]\n\n\ndef configure_nccl():\n    \"\"\"Configure multi-machine environment variables of NCCL.\"\"\"\n    os.environ[\"NCCL_LAUNCH_MODE\"] = \"PARALLEL\"\n    os.environ[\"NCCL_IB_HCA\"] = subprocess.getoutput(\n        \"pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; \"\n        \"do cat $i/ports/1/gid_attrs/types/* 2>/dev/null \"\n        \"| grep v >/dev/null && echo $i ; done; popd > /dev/null\"\n    )\n    os.environ[\"NCCL_IB_GID_INDEX\"] = \"3\"\n    os.environ[\"NCCL_IB_TC\"] = \"106\"\n\n\ndef configure_omp(num_threads=1):\n    \"\"\"\n    If OMP_NUM_THREADS is not configured and world_size is greater than 1,\n    Configure OMP_NUM_THREADS environment variables of NCCL to `num_thread`.\n\n    Args:\n        num_threads (int): value of `OMP_NUM_THREADS` to set.\n    \"\"\"\n    # We set OMP_NUM_THREADS=1 by default, which achieves the best speed on our machines\n    # feel free to change it for better performance.\n    if \"OMP_NUM_THREADS\" not in os.environ and get_world_size() > 1:\n        os.environ[\"OMP_NUM_THREADS\"] = str(num_threads)\n        if is_main_process():\n            logger.info(\n                \"\\n***************************************************************\\n\"\n                \"We set `OMP_NUM_THREADS` for each process to {} to speed up.\\n\"\n                \"please further tune the variable for optimal performance.\\n\"\n                \"***************************************************************\".format(\n                    os.environ[\"OMP_NUM_THREADS\"]\n                )\n            )\n\n\ndef configure_module(ulimit_value=8192):\n    \"\"\"\n    Configure pytorch module environment. setting of ulimit and cv2 will be set.\n\n    Args:\n        ulimit_value(int): default open file number on linux. Default value: 8192.\n    \"\"\"\n    # system setting\n    try:\n        import resource\n\n        rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)\n        resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1]))\n    except Exception:\n        # Exception might be raised in Windows OS or rlimit reaches max limit number.\n        # However, set rlimit value might not be necessary.\n        pass\n\n    # cv2\n    # multiprocess might be harmful on performance of torch dataloader\n    os.environ[\"OPENCV_OPENCL_RUNTIME\"] = \"disabled\"\n    try:\n        cv2.setNumThreads(0)\n        cv2.ocl.setUseOpenCL(False)\n    except Exception:\n        # cv2 version mismatch might rasie exceptions.\n        pass\n"
  },
  {
    "path": "yolox/utils/visualize.py",
    "content": "#!/usr/bin/env python3\n# -*- coding:utf-8 -*-\n# Copyright (c) Megvii Inc. All rights reserved.\n\nimport cv2\nimport numpy as np\n\n__all__ = [\"vis\"]\n\n\ndef vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):\n\n    for i in range(len(boxes)):\n        box = boxes[i]\n        cls_id = int(cls_ids[i])\n        score = scores[i]\n        if score < conf:\n            continue\n        x0 = int(box[0])\n        y0 = int(box[1])\n        x1 = int(box[2])\n        y1 = int(box[3])\n\n        color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()\n        text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)\n        txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)\n        font = cv2.FONT_HERSHEY_SIMPLEX\n\n        txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]\n        cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)\n\n        txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()\n        cv2.rectangle(\n            img,\n            (x0, y0 + 1),\n            (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),\n            txt_bk_color,\n            -1\n        )\n        cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)\n\n    return img\n\n\n_COLORS = np.array(\n    [\n        0.000, 0.447, 0.741,\n        0.850, 0.325, 0.098,\n        0.929, 0.694, 0.125,\n        0.494, 0.184, 0.556,\n        0.466, 0.674, 0.188,\n        0.301, 0.745, 0.933,\n        0.635, 0.078, 0.184,\n        0.300, 0.300, 0.300,\n        0.600, 0.600, 0.600,\n        1.000, 0.000, 0.000,\n        1.000, 0.500, 0.000,\n        0.749, 0.749, 0.000,\n        0.000, 1.000, 0.000,\n        0.000, 0.000, 1.000,\n        0.667, 0.000, 1.000,\n        0.333, 0.333, 0.000,\n        0.333, 0.667, 0.000,\n        0.333, 1.000, 0.000,\n        0.667, 0.333, 0.000,\n        0.667, 0.667, 0.000,\n        0.667, 1.000, 0.000,\n        1.000, 0.333, 0.000,\n        1.000, 0.667, 0.000,\n        1.000, 1.000, 0.000,\n        0.000, 0.333, 0.500,\n        0.000, 0.667, 0.500,\n        0.000, 1.000, 0.500,\n        0.333, 0.000, 0.500,\n        0.333, 0.333, 0.500,\n        0.333, 0.667, 0.500,\n        0.333, 1.000, 0.500,\n        0.667, 0.000, 0.500,\n        0.667, 0.333, 0.500,\n        0.667, 0.667, 0.500,\n        0.667, 1.000, 0.500,\n        1.000, 0.000, 0.500,\n        1.000, 0.333, 0.500,\n        1.000, 0.667, 0.500,\n        1.000, 1.000, 0.500,\n        0.000, 0.333, 1.000,\n        0.000, 0.667, 1.000,\n        0.000, 1.000, 1.000,\n        0.333, 0.000, 1.000,\n        0.333, 0.333, 1.000,\n        0.333, 0.667, 1.000,\n        0.333, 1.000, 1.000,\n        0.667, 0.000, 1.000,\n        0.667, 0.333, 1.000,\n        0.667, 0.667, 1.000,\n        0.667, 1.000, 1.000,\n        1.000, 0.000, 1.000,\n        1.000, 0.333, 1.000,\n        1.000, 0.667, 1.000,\n        0.333, 0.000, 0.000,\n        0.500, 0.000, 0.000,\n        0.667, 0.000, 0.000,\n        0.833, 0.000, 0.000,\n        1.000, 0.000, 0.000,\n        0.000, 0.167, 0.000,\n        0.000, 0.333, 0.000,\n        0.000, 0.500, 0.000,\n        0.000, 0.667, 0.000,\n        0.000, 0.833, 0.000,\n        0.000, 1.000, 0.000,\n        0.000, 0.000, 0.167,\n        0.000, 0.000, 0.333,\n        0.000, 0.000, 0.500,\n        0.000, 0.000, 0.667,\n        0.000, 0.000, 0.833,\n        0.000, 0.000, 1.000,\n        0.000, 0.000, 0.000,\n        0.143, 0.143, 0.143,\n        0.286, 0.286, 0.286,\n        0.429, 0.429, 0.429,\n        0.571, 0.571, 0.571,\n        0.714, 0.714, 0.714,\n        0.857, 0.857, 0.857,\n        0.000, 0.447, 0.741,\n        0.314, 0.717, 0.741,\n        0.50, 0.5, 0\n    ]\n).astype(np.float32).reshape(-1, 3)\n"
  }
]