[
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n*.ipynb\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n\n# cython generated cpp\ndata\n.vscode\n.idea\n\n# custom\n*.pkl\n*.gif\n*.pkl.json\n*.log.json\nwork_dirs/\nwork_dirs_bak/\ndebug_img/\nmodel_file/\nexps/\n*~\nmmdet3d/.mim\n\n# Pytorch\n*.pth\n\n# demo\ndemo/\n*.jpg\n*.png\n*.obj\n*.ply\n*.zip\n*.tar\n*.tar.gz\n*.json\n\n# datasets\n/datasets\n/data_ann\n/datasets_local\n\n# softlinks\nav2\nnuScenes\n\n# viz\nviz\nviz_bak\n\n*pkl*\n\ndemo\nmmdetection3d\nwork_dirs\nvis_global\nvis_local\n\n"
  },
  {
    "path": "LICENSE",
    "content": "The code, data, and model weights in this repository are not allowed for commercial usage. For research purposes, the terms follow the GPLv3 as in the separate file \"LICENSE_GPL\". \n\n-- Authors of the paper \"MapTracker: Tracking with Strided Memory Fusion for Consistent Vector HD Mapping\".\n"
  },
  {
    "path": "LICENSE_GPL",
    "content": "                    GNU GENERAL PUBLIC LICENSE\n                       Version 3, 29 June 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n                            Preamble\n\n  The GNU General Public License is a free, copyleft license for\nsoftware and other kinds of works.\n\n  The licenses for most software and other practical works are designed\nto take away your freedom to share and change the works.  By contrast,\nthe GNU General Public License is intended to guarantee your freedom to\nshare and change all versions of a program--to make sure it remains free\nsoftware for all its users.  We, the Free Software Foundation, use the\nGNU General Public License for most of our software; it applies also to\nany other work released this way by its authors.  You can apply it to\nyour programs, too.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthem if you wish), that you receive source code or can get it if you\nwant it, that you can change the software or use pieces of it in new\nfree programs, and that you know you can do these things.\n\n  To protect your rights, we need to prevent others from denying you\nthese rights or asking you to surrender the rights.  Therefore, you have\ncertain responsibilities if you distribute copies of the software, or if\nyou modify it: responsibilities to respect the freedom of others.\n\n  For example, if you distribute copies of such a program, whether\ngratis or for a fee, you must pass on to the recipients the same\nfreedoms that you received.  You must make sure that they, too, receive\nor can get the source code.  And you must show them these terms so they\nknow their rights.\n\n  Developers that use the GNU GPL protect your rights with two steps:\n(1) assert copyright on the software, and (2) offer you this License\ngiving you legal permission to copy, distribute and/or modify it.\n\n  For the developers' and authors' protection, the GPL clearly explains\nthat there is no warranty for this free software.  For both users' and\nauthors' sake, the GPL requires that modified versions be marked as\nchanged, so that their problems will not be attributed erroneously to\nauthors of previous versions.\n\n  Some devices are designed to deny users access to install or run\nmodified versions of the software inside them, although the manufacturer\ncan do so.  This is fundamentally incompatible with the aim of\nprotecting users' freedom to change the software.  The systematic\npattern of such abuse occurs in the area of products for individuals to\nuse, which is precisely where it is most unacceptable.  Therefore, we\nhave designed this version of the GPL to prohibit the practice for those\nproducts.  If such problems arise substantially in other domains, we\nstand ready to extend this provision to those domains in future versions\nof the GPL, as needed to protect the freedom of users.\n\n  Finally, every program is threatened constantly by software patents.\nStates should not allow patents to restrict development and use of\nsoftware on general-purpose computers, but in those that do, we wish to\navoid the special danger that patents applied to a free program could\nmake it effectively proprietary.  To prevent this, the GPL assures that\npatents cannot be used to render the program non-free.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n                       TERMS AND CONDITIONS\n\n  0. Definitions.\n\n  \"This License\" refers to version 3 of the GNU General Public License.\n\n  \"Copyright\" also means copyright-like laws that apply to other kinds of\nworks, such as semiconductor masks.\n\n  \"The Program\" refers to any copyrightable work licensed under this\nLicense.  Each licensee is addressed as \"you\".  \"Licensees\" and\n\"recipients\" may be individuals or organizations.\n\n  To \"modify\" a work means to copy from or adapt all or part of the work\nin a fashion requiring copyright permission, other than the making of an\nexact copy.  The resulting work is called a \"modified version\" of the\nearlier work or a work \"based on\" the earlier work.\n\n  A \"covered work\" means either the unmodified Program or a work based\non the Program.\n\n  To \"propagate\" a work means to do anything with it that, without\npermission, would make you directly or secondarily liable for\ninfringement under applicable copyright law, except executing it on a\ncomputer or modifying a private copy.  Propagation includes copying,\ndistribution (with or without modification), making available to the\npublic, and in some countries other activities as well.\n\n  To \"convey\" a work means any kind of propagation that enables other\nparties to make or receive copies.  Mere interaction with a user through\na computer network, with no transfer of a copy, is not conveying.\n\n  An interactive user interface displays \"Appropriate Legal Notices\"\nto the extent that it includes a convenient and prominently visible\nfeature that (1) displays an appropriate copyright notice, and (2)\ntells the user that there is no warranty for the work (except to the\nextent that warranties are provided), that licensees may convey the\nwork under this License, and how to view a copy of this License.  If\nthe interface presents a list of user commands or options, such as a\nmenu, a prominent item in the list meets this criterion.\n\n  1. Source Code.\n\n  The \"source code\" for a work means the preferred form of the work\nfor making modifications to it.  \"Object code\" means any non-source\nform of a work.\n\n  A \"Standard Interface\" means an interface that either is an official\nstandard defined by a recognized standards body, or, in the case of\ninterfaces specified for a particular programming language, one that\nis widely used among developers working in that language.\n\n  The \"System Libraries\" of an executable work include anything, other\nthan the work as a whole, that (a) is included in the normal form of\npackaging a Major Component, but which is not part of that Major\nComponent, and (b) serves only to enable use of the work with that\nMajor Component, or to implement a Standard Interface for which an\nimplementation is available to the public in source code form.  A\n\"Major Component\", in this context, means a major essential component\n(kernel, window system, and so on) of the specific operating system\n(if any) on which the executable work runs, or a compiler used to\nproduce the work, or an object code interpreter used to run it.\n\n  The \"Corresponding Source\" for a work in object code form means all\nthe source code needed to generate, install, and (for an executable\nwork) run the object code and to modify the work, including scripts to\ncontrol those activities.  However, it does not include the work's\nSystem Libraries, or general-purpose tools or generally available free\nprograms which are used unmodified in performing those activities but\nwhich are not part of the work.  For example, Corresponding Source\nincludes interface definition files associated with source files for\nthe work, and the source code for shared libraries and dynamically\nlinked subprograms that the work is specifically designed to require,\nsuch as by intimate data communication or control flow between those\nsubprograms and other parts of the work.\n\n  The Corresponding Source need not include anything that users\ncan regenerate automatically from other parts of the Corresponding\nSource.\n\n  The Corresponding Source for a work in source code form is that\nsame work.\n\n  2. Basic Permissions.\n\n  All rights granted under this License are granted for the term of\ncopyright on the Program, and are irrevocable provided the stated\nconditions are met.  This License explicitly affirms your unlimited\npermission to run the unmodified Program.  The output from running a\ncovered work is covered by this License only if the output, given its\ncontent, constitutes a covered work.  This License acknowledges your\nrights of fair use or other equivalent, as provided by copyright law.\n\n  You may make, run and propagate covered works that you do not\nconvey, without conditions so long as your license otherwise remains\nin force.  You may convey covered works to others for the sole purpose\nof having them make modifications exclusively for you, or provide you\nwith facilities for running those works, provided that you comply with\nthe terms of this License in conveying all material for which you do\nnot control copyright.  Those thus making or running the covered works\nfor you must do so exclusively on your behalf, under your direction\nand control, on terms that prohibit them from making any copies of\nyour copyrighted material outside their relationship with you.\n\n  Conveying under any other circumstances is permitted solely under\nthe conditions stated below.  Sublicensing is not allowed; section 10\nmakes it unnecessary.\n\n  3. Protecting Users' Legal Rights From Anti-Circumvention Law.\n\n  No covered work shall be deemed part of an effective technological\nmeasure under any applicable law fulfilling obligations under article\n11 of the WIPO copyright treaty adopted on 20 December 1996, or\nsimilar laws prohibiting or restricting circumvention of such\nmeasures.\n\n  When you convey a covered work, you waive any legal power to forbid\ncircumvention of technological measures to the extent such circumvention\nis effected by exercising rights under this License with respect to\nthe covered work, and you disclaim any intention to limit operation or\nmodification of the work as a means of enforcing, against the work's\nusers, your or third parties' legal rights to forbid circumvention of\ntechnological measures.\n\n  4. Conveying Verbatim Copies.\n\n  You may convey verbatim copies of the Program's source code as you\nreceive it, in any medium, provided that you conspicuously and\nappropriately publish on each copy an appropriate copyright notice;\nkeep intact all notices stating that this License and any\nnon-permissive terms added in accord with section 7 apply to the code;\nkeep intact all notices of the absence of any warranty; and give all\nrecipients a copy of this License along with the Program.\n\n  You may charge any price or no price for each copy that you convey,\nand you may offer support or warranty protection for a fee.\n\n  5. Conveying Modified Source Versions.\n\n  You may convey a work based on the Program, or the modifications to\nproduce it from the Program, in the form of source code under the\nterms of section 4, provided that you also meet all of these conditions:\n\n    a) The work must carry prominent notices stating that you modified\n    it, and giving a relevant date.\n\n    b) The work must carry prominent notices stating that it is\n    released under this License and any conditions added under section\n    7.  This requirement modifies the requirement in section 4 to\n    \"keep intact all notices\".\n\n    c) You must license the entire work, as a whole, under this\n    License to anyone who comes into possession of a copy.  This\n    License will therefore apply, along with any applicable section 7\n    additional terms, to the whole of the work, and all its parts,\n    regardless of how they are packaged.  This License gives no\n    permission to license the work in any other way, but it does not\n    invalidate such permission if you have separately received it.\n\n    d) If the work has interactive user interfaces, each must display\n    Appropriate Legal Notices; however, if the Program has interactive\n    interfaces that do not display Appropriate Legal Notices, your\n    work need not make them do so.\n\n  A compilation of a covered work with other separate and independent\nworks, which are not by their nature extensions of the covered work,\nand which are not combined with it such as to form a larger program,\nin or on a volume of a storage or distribution medium, is called an\n\"aggregate\" if the compilation and its resulting copyright are not\nused to limit the access or legal rights of the compilation's users\nbeyond what the individual works permit.  Inclusion of a covered work\nin an aggregate does not cause this License to apply to the other\nparts of the aggregate.\n\n  6. Conveying Non-Source Forms.\n\n  You may convey a covered work in object code form under the terms\nof sections 4 and 5, provided that you also convey the\nmachine-readable Corresponding Source under the terms of this License,\nin one of these ways:\n\n    a) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by the\n    Corresponding Source fixed on a durable physical medium\n    customarily used for software interchange.\n\n    b) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by a\n    written offer, valid for at least three years and valid for as\n    long as you offer spare parts or customer support for that product\n    model, to give anyone who possesses the object code either (1) a\n    copy of the Corresponding Source for all the software in the\n    product that is covered by this License, on a durable physical\n    medium customarily used for software interchange, for a price no\n    more than your reasonable cost of physically performing this\n    conveying of source, or (2) access to copy the\n    Corresponding Source from a network server at no charge.\n\n    c) Convey individual copies of the object code with a copy of the\n    written offer to provide the Corresponding Source.  This\n    alternative is allowed only occasionally and noncommercially, and\n    only if you received the object code with such an offer, in accord\n    with subsection 6b.\n\n    d) Convey the object code by offering access from a designated\n    place (gratis or for a charge), and offer equivalent access to the\n    Corresponding Source in the same way through the same place at no\n    further charge.  You need not require recipients to copy the\n    Corresponding Source along with the object code.  If the place to\n    copy the object code is a network server, the Corresponding Source\n    may be on a different server (operated by you or a third party)\n    that supports equivalent copying facilities, provided you maintain\n    clear directions next to the object code saying where to find the\n    Corresponding Source.  Regardless of what server hosts the\n    Corresponding Source, you remain obligated to ensure that it is\n    available for as long as needed to satisfy these requirements.\n\n    e) Convey the object code using peer-to-peer transmission, provided\n    you inform other peers where the object code and Corresponding\n    Source of the work are being offered to the general public at no\n    charge under subsection 6d.\n\n  A separable portion of the object code, whose source code is excluded\nfrom the Corresponding Source as a System Library, need not be\nincluded in conveying the object code work.\n\n  A \"User Product\" is either (1) a \"consumer product\", which means any\ntangible personal property which is normally used for personal, family,\nor household purposes, or (2) anything designed or sold for incorporation\ninto a dwelling.  In determining whether a product is a consumer product,\ndoubtful cases shall be resolved in favor of coverage.  For a particular\nproduct received by a particular user, \"normally used\" refers to a\ntypical or common use of that class of product, regardless of the status\nof the particular user or of the way in which the particular user\nactually uses, or expects or is expected to use, the product.  A product\nis a consumer product regardless of whether the product has substantial\ncommercial, industrial or non-consumer uses, unless such uses represent\nthe only significant mode of use of the product.\n\n  \"Installation Information\" for a User Product means any methods,\nprocedures, authorization keys, or other information required to install\nand execute modified versions of a covered work in that User Product from\na modified version of its Corresponding Source.  The information must\nsuffice to ensure that the continued functioning of the modified object\ncode is in no case prevented or interfered with solely because\nmodification has been made.\n\n  If you convey an object code work under this section in, or with, or\nspecifically for use in, a User Product, and the conveying occurs as\npart of a transaction in which the right of possession and use of the\nUser Product is transferred to the recipient in perpetuity or for a\nfixed term (regardless of how the transaction is characterized), the\nCorresponding Source conveyed under this section must be accompanied\nby the Installation Information.  But this requirement does not apply\nif neither you nor any third party retains the ability to install\nmodified object code on the User Product (for example, the work has\nbeen installed in ROM).\n\n  The requirement to provide Installation Information does not include a\nrequirement to continue to provide support service, warranty, or updates\nfor a work that has been modified or installed by the recipient, or for\nthe User Product in which it has been modified or installed.  Access to a\nnetwork may be denied when the modification itself materially and\nadversely affects the operation of the network or violates the rules and\nprotocols for communication across the network.\n\n  Corresponding Source conveyed, and Installation Information provided,\nin accord with this section must be in a format that is publicly\ndocumented (and with an implementation available to the public in\nsource code form), and must require no special password or key for\nunpacking, reading or copying.\n\n  7. Additional Terms.\n\n  \"Additional permissions\" are terms that supplement the terms of this\nLicense by making exceptions from one or more of its conditions.\nAdditional permissions that are applicable to the entire Program shall\nbe treated as though they were included in this License, to the extent\nthat they are valid under applicable law.  If additional permissions\napply only to part of the Program, that part may be used separately\nunder those permissions, but the entire Program remains governed by\nthis License without regard to the additional permissions.\n\n  When you convey a copy of a covered work, you may at your option\nremove any additional permissions from that copy, or from any part of\nit.  (Additional permissions may be written to require their own\nremoval in certain cases when you modify the work.)  You may place\nadditional permissions on material, added by you to a covered work,\nfor which you have or can give appropriate copyright permission.\n\n  Notwithstanding any other provision of this License, for material you\nadd to a covered work, you may (if authorized by the copyright holders of\nthat material) supplement the terms of this License with terms:\n\n    a) Disclaiming warranty or limiting liability differently from the\n    terms of sections 15 and 16 of this License; or\n\n    b) Requiring preservation of specified reasonable legal notices or\n    author attributions in that material or in the Appropriate Legal\n    Notices displayed by works containing it; or\n\n    c) Prohibiting misrepresentation of the origin of that material, or\n    requiring that modified versions of such material be marked in\n    reasonable ways as different from the original version; or\n\n    d) Limiting the use for publicity purposes of names of licensors or\n    authors of the material; or\n\n    e) Declining to grant rights under trademark law for use of some\n    trade names, trademarks, or service marks; or\n\n    f) Requiring indemnification of licensors and authors of that\n    material by anyone who conveys the material (or modified versions of\n    it) with contractual assumptions of liability to the recipient, for\n    any liability that these contractual assumptions directly impose on\n    those licensors and authors.\n\n  All other non-permissive additional terms are considered \"further\nrestrictions\" within the meaning of section 10.  If the Program as you\nreceived it, or any part of it, contains a notice stating that it is\ngoverned by this License along with a term that is a further\nrestriction, you may remove that term.  If a license document contains\na further restriction but permits relicensing or conveying under this\nLicense, you may add to a covered work material governed by the terms\nof that license document, provided that the further restriction does\nnot survive such relicensing or conveying.\n\n  If you add terms to a covered work in accord with this section, you\nmust place, in the relevant source files, a statement of the\nadditional terms that apply to those files, or a notice indicating\nwhere to find the applicable terms.\n\n  Additional terms, permissive or non-permissive, may be stated in the\nform of a separately written license, or stated as exceptions;\nthe above requirements apply either way.\n\n  8. Termination.\n\n  You may not propagate or modify a covered work except as expressly\nprovided under this License.  Any attempt otherwise to propagate or\nmodify it is void, and will automatically terminate your rights under\nthis License (including any patent licenses granted under the third\nparagraph of section 11).\n\n  However, if you cease all violation of this License, then your\nlicense from a particular copyright holder is reinstated (a)\nprovisionally, unless and until the copyright holder explicitly and\nfinally terminates your license, and (b) permanently, if the copyright\nholder fails to notify you of the violation by some reasonable means\nprior to 60 days after the cessation.\n\n  Moreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\n  Termination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, you do not qualify to receive new licenses for the same\nmaterial under section 10.\n\n  9. Acceptance Not Required for Having Copies.\n\n  You are not required to accept this License in order to receive or\nrun a copy of the Program.  Ancillary propagation of a covered work\noccurring solely as a consequence of using peer-to-peer transmission\nto receive a copy likewise does not require acceptance.  However,\nnothing other than this License grants you permission to propagate or\nmodify any covered work.  These actions infringe copyright if you do\nnot accept this License.  Therefore, by modifying or propagating a\ncovered work, you indicate your acceptance of this License to do so.\n\n  10. Automatic Licensing of Downstream Recipients.\n\n  Each time you convey a covered work, the recipient automatically\nreceives a license from the original licensors, to run, modify and\npropagate that work, subject to this License.  You are not responsible\nfor enforcing compliance by third parties with this License.\n\n  An \"entity transaction\" is a transaction transferring control of an\norganization, or substantially all assets of one, or subdividing an\norganization, or merging organizations.  If propagation of a covered\nwork results from an entity transaction, each party to that\ntransaction who receives a copy of the work also receives whatever\nlicenses to the work the party's predecessor in interest had or could\ngive under the previous paragraph, plus a right to possession of the\nCorresponding Source of the work from the predecessor in interest, if\nthe predecessor has it or can get it with reasonable efforts.\n\n  You may not impose any further restrictions on the exercise of the\nrights granted or affirmed under this License.  For example, you may\nnot impose a license fee, royalty, or other charge for exercise of\nrights granted under this License, and you may not initiate litigation\n(including a cross-claim or counterclaim in a lawsuit) alleging that\nany patent claim is infringed by making, using, selling, offering for\nsale, or importing the Program or any portion of it.\n\n  11. Patents.\n\n  A \"contributor\" is a copyright holder who authorizes use under this\nLicense of the Program or a work on which the Program is based.  The\nwork thus licensed is called the contributor's \"contributor version\".\n\n  A contributor's \"essential patent claims\" are all patent claims\nowned or controlled by the contributor, whether already acquired or\nhereafter acquired, that would be infringed by some manner, permitted\nby this License, of making, using, or selling its contributor version,\nbut do not include claims that would be infringed only as a\nconsequence of further modification of the contributor version.  For\npurposes of this definition, \"control\" includes the right to grant\npatent sublicenses in a manner consistent with the requirements of\nthis License.\n\n  Each contributor grants you a non-exclusive, worldwide, royalty-free\npatent license under the contributor's essential patent claims, to\nmake, use, sell, offer for sale, import and otherwise run, modify and\npropagate the contents of its contributor version.\n\n  In the following three paragraphs, a \"patent license\" is any express\nagreement or commitment, however denominated, not to enforce a patent\n(such as an express permission to practice a patent or covenant not to\nsue for patent infringement).  To \"grant\" such a patent license to a\nparty means to make such an agreement or commitment not to enforce a\npatent against the party.\n\n  If you convey a covered work, knowingly relying on a patent license,\nand the Corresponding Source of the work is not available for anyone\nto copy, free of charge and under the terms of this License, through a\npublicly available network server or other readily accessible means,\nthen you must either (1) cause the Corresponding Source to be so\navailable, or (2) arrange to deprive yourself of the benefit of the\npatent license for this particular work, or (3) arrange, in a manner\nconsistent with the requirements of this License, to extend the patent\nlicense to downstream recipients.  \"Knowingly relying\" means you have\nactual knowledge that, but for the patent license, your conveying the\ncovered work in a country, or your recipient's use of the covered work\nin a country, would infringe one or more identifiable patents in that\ncountry that you have reason to believe are valid.\n\n  If, pursuant to or in connection with a single transaction or\narrangement, you convey, or propagate by procuring conveyance of, a\ncovered work, and grant a patent license to some of the parties\nreceiving the covered work authorizing them to use, propagate, modify\nor convey a specific copy of the covered work, then the patent license\nyou grant is automatically extended to all recipients of the covered\nwork and works based on it.\n\n  A patent license is \"discriminatory\" if it does not include within\nthe scope of its coverage, prohibits the exercise of, or is\nconditioned on the non-exercise of one or more of the rights that are\nspecifically granted under this License.  You may not convey a covered\nwork if you are a party to an arrangement with a third party that is\nin the business of distributing software, under which you make payment\nto the third party based on the extent of your activity of conveying\nthe work, and under which the third party grants, to any of the\nparties who would receive the covered work from you, a discriminatory\npatent license (a) in connection with copies of the covered work\nconveyed by you (or copies made from those copies), or (b) primarily\nfor and in connection with specific products or compilations that\ncontain the covered work, unless you entered into that arrangement,\nor that patent license was granted, prior to 28 March 2007.\n\n  Nothing in this License shall be construed as excluding or limiting\nany implied license or other defenses to infringement that may\notherwise be available to you under applicable patent law.\n\n  12. No Surrender of Others' Freedom.\n\n  If conditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot convey a\ncovered work so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you may\nnot convey it at all.  For example, if you agree to terms that obligate you\nto collect a royalty for further conveying from those to whom you convey\nthe Program, the only way you could satisfy both those terms and this\nLicense would be to refrain entirely from conveying the Program.\n\n  13. Use with the GNU Affero General Public License.\n\n  Notwithstanding any other provision of this License, you have\npermission to link or combine any covered work with a work licensed\nunder version 3 of the GNU Affero General Public License into a single\ncombined work, and to convey the resulting work.  The terms of this\nLicense will continue to apply to the part which is the covered work,\nbut the special requirements of the GNU Affero General Public License,\nsection 13, concerning interaction through a network will apply to the\ncombination as such.\n\n  14. Revised Versions of this License.\n\n  The Free Software Foundation may publish revised and/or new versions of\nthe GNU General Public License from time to time.  Such new versions will\nbe similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\n  Each version is given a distinguishing version number.  If the\nProgram specifies that a certain numbered version of the GNU General\nPublic License \"or any later version\" applies to it, you have the\noption of following the terms and conditions either of that numbered\nversion or of any later version published by the Free Software\nFoundation.  If the Program does not specify a version number of the\nGNU General Public License, you may choose any version ever published\nby the Free Software Foundation.\n\n  If the Program specifies that a proxy can decide which future\nversions of the GNU General Public License can be used, that proxy's\npublic statement of acceptance of a version permanently authorizes you\nto choose that version for the Program.\n\n  Later license versions may give you additional or different\npermissions.  However, no additional obligations are imposed on any\nauthor or copyright holder as a result of your choosing to follow a\nlater version.\n\n  15. Disclaimer of Warranty.\n\n  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY\nAPPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT\nHOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY\nOF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,\nTHE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\nPURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM\nIS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\nALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n\n  16. Limitation of Liability.\n\n  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\nTHE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\nGENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\nUSE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\nDATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\nPARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\nEVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\nSUCH DAMAGES.\n\n  17. Interpretation of Sections 15 and 16.\n\n  If the disclaimer of warranty and limitation of liability provided\nabove cannot be given local legal effect according to their terms,\nreviewing courts shall apply local law that most closely approximates\nan absolute waiver of all civil liability in connection with the\nProgram, unless a warranty or assumption of liability accompanies a\ncopy of the Program in return for a fee.\n\n                     END OF TERMS AND CONDITIONS\n\n            How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nstate the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU General Public License as published by\n    the Free Software Foundation, either version 3 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU General Public License for more details.\n\n    You should have received a copy of the GNU General Public License\n    along with this program.  If not, see <https://www.gnu.org/licenses/>.\n\nAlso add information on how to contact you by electronic and paper mail.\n\n  If the program does terminal interaction, make it output a short\nnotice like this when it starts in an interactive mode:\n\n    <program>  Copyright (C) <year>  <name of author>\n    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.\n    This is free software, and you are welcome to redistribute it\n    under certain conditions; type `show c' for details.\n\nThe hypothetical commands `show w' and `show c' should show the appropriate\nparts of the General Public License.  Of course, your program's commands\nmight be different; for a GUI interface, you would use an \"about box\".\n\n  You should also get your employer (if you work as a programmer) or school,\nif any, to sign a \"copyright disclaimer\" for the program, if necessary.\nFor more information on this, and how to apply and follow the GNU GPL, see\n<https://www.gnu.org/licenses/>.\n\n  The GNU General Public License does not permit incorporating your program\ninto proprietary programs.  If your program is a subroutine library, you\nmay consider it more useful to permit linking proprietary applications with\nthe library.  If this is what you want to do, use the GNU Lesser General\nPublic License instead of this License.  But first, please read\n<https://www.gnu.org/licenses/why-not-lgpl.html>."
  },
  {
    "path": "README.md",
    "content": "<div align=\"center\">\n<h2 align=\"center\"> MapTracker: Tracking with Strided Memory Fusion for <br/> Consistent Vector HD Mapping </h1>\n\n<h4 align=\"center\"> ECCV 2024 (Oral) </h4>\n\n\n[Jiacheng Chen*<sup>1</sup>](https://jcchen.me) , [Yuefan Wu*<sup>1</sup>](https://ivenwu.com/) , [Jiaqi Tan*<sup>1</sup>](https://www.linkedin.com/in/jiaqi-christina-tan-800697158/), [Hang Ma<sup>1</sup>](https://www.cs.sfu.ca/~hangma/), [Yasutaka Furukawa<sup>1,2</sup>](https://www2.cs.sfu.ca/~furukawa/)\n\n<sup>1</sup> Simon Fraser University <sup>2</sup> Wayve\n\n\n([arXiv](https://arxiv.org/abs/2403.15951), [Project page](https://map-tracker.github.io/))\n\n</div>\n\n\n\nhttps://github.com/woodfrog/maptracker/assets/13405255/1c0e072a-cb77-4000-b81b-5b9fd40f8f39\n\n\n\n\nThis repository provides the official implementation of the paper [MapTracker: Tracking with Strided Memory Fusion for Consistent Vector HD Mapping](https://arxiv.org/abs/2403.15951). MapTracker reconstructs temporally consistent vector HD maps, and the local maps can be progressively merged into a global reconstruction.\n\nThis repository is built upon [StreamMapNet](https://github.com/yuantianyuan01/StreamMapNet). \n\n\n## Table of Contents\n- [Introduction](#introduction)\n- [Model Architecture](#model-architecture)\n- [Installation](#installation)\n- [Data preparation](#data-preparation)\n- [Getting Started](#getting-started)\n- [Acknowledgements](#acknowledgements)\n- [Citation](#citation)\n- [License](#license)\n\n## Introduction\nThis paper presents a vector HD-mapping algorithm that formulates the mapping as a tracking task and uses a history of memory latents to ensure consistent reconstructions over time.\n\nOur method, MapTracker, accumulates a sensor stream into memory buffers of two latent representations: 1) Raster latents in the bird's-eye-view (BEV) space and 2) Vector latents over the road elements (i.e., pedestrian-crossings, lane-dividers, and road-boundaries). The approach borrows the query propagation paradigm from the tracking literature that explicitly associates tracked road elements from the previous frame to the current, while fusing a subset of memory latents selected with distance strides to further enhance temporal consistency. A vector latent is decoded to reconstruct the geometry of a road element.\n\nThe paper further makes benchmark contributions by 1) Improving processing code for existing datasets to produce consistent ground truth with temporal alignments and 2) Augmenting existing mAP metrics with consistency checks. MapTracker significantly outperforms existing methods on both nuScenes and Agroverse2 datasets by over 8% and 19% on the conventional and the new consistency-aware metrics, respectively.\n\n\n## Model Architecture\n\n![visualization](docs/fig/arch.png)\n\n(Top) The architecture of MapTracker, consistsing of the BEV and VEC Modules and their memory buffers. (Bottom) The close-up views of the BEV and the vector fusion layers.\n\nThe **BEV Module** takes ConvNet features of onboard perspective images, the BEV memory buffer ${M_{\\text{BEV}}(t-1), M_{\\text{BEV}}(t-2),\\ ... }$ and vehicle motions ${P^t_{t-1}, P^t_{t-2},\\ ... }$ as input. It propagates the previous BEV memory $M_{\\text{BEV}}(t-1)$ based on vehicle motion to initialize $M_{\\text{BEV}}(t)$. In the BEV Memory Fusion layer, $M_{\\text{BEV}}(t)$ is integrated with selected history BEV memories $\\{M_{\\text{BEV}}^{*}(t'), t'\\in \\pi(t)\\}$, which is used for semantic segmentation and passed to the VEC Module.\n\nThe **VEC Module** propagates the previous latent vector memory $M_{\\text{VEC}}(t-1)$ with a PropMLP to initialize the vector queries $M_{\\text{VEC}}(t)$. In Vector Memory Fusion layer, each propagated $M_{\\text{VEC}}(t)$ is fused with its selected history vector memories $\\{M_{\\text{VEC}}^{*}(t'), t' \\in \\pi(t)\\}$. The final vector latents are decoded to reconstruct the road elements.\n\n\n## Installation\n\nPlease refer to the [installation guide](docs/installation.md) to set up the environment.\n\n\n## Data preparation\n\nFor how to download and prepare data for the nuScenes and Argoverse2 datasets, as well as downloading our checkpoints, please see the [data preparation guide](docs/data_preparation.md). \n\n\n## Getting Started\n\nFor instructions on how to run training, inference, evaluation, and visualization, please follow [getting started guide](docs/getting_started.md).\n\n\n## Acknowledgements\n\nWe're grateful to the open-source projects below, their great work made our project possible:\n\n* BEV perception: [BEVFormer](https://github.com/fundamentalvision/BEVFormer) ![GitHub stars](https://img.shields.io/github/stars/fundamentalvision/BEVFormer.svg?style=flat&label=Star)\n* Vector HD mapping: [StreamMapNet](https://github.com/yuantianyuan01/StreamMapNet) ![GitHub stars](https://img.shields.io/github/stars/yuantianyuan01/StreamMapNet.svg?style=flat&label=Star), [MapTR](https://github.com/hustvl/MapTR) ![GitHub stars](https://img.shields.io/github/stars/hustvl/MapTR.svg?style=flat&label=Star)\n\n\n## Citation\n\nIf you find MapTracker useful in your research or applications, please consider citing:\n\n```\n@inproceedings{chen2024maptrakcer,\n  author  = {Chen, Jiacheng and Wu, Yuefan and Tan, Jiaqi and Ma, Hang and Furukawa, Yasutaka},\n  title   = {MapTracker: Tracking with Strided Memory Fusion for Consistent Vector HD Mapping},\n  journal = {arXiv preprint arXiv:2403.15951},\n  year    = {2024}\n}\n```\n\n## License\n\nThis project is licensed under GPL, see the [license file](LICENSE) for details.\n"
  },
  {
    "path": "docs/data_preparation.md",
    "content": "\n# Data Preparation\n\nCompared to the data preparation procedure of StreamMapNet or MapTR, we have one more step to generate the ground truth tracking information (Step 3). \n\nWe noticed that the track generation results can be slighly different when running on different machines (potentially because Shapely's behaviors are slightly different across different machines), **so please always run the Step 3 below on the training machine to generate the gt tracking information**. \n\n## nuScenes\n**Step 1.** Download [nuScenes](https://www.nuscenes.org/download) dataset to `./datasets/nuscenes`.\n\n\n**Step 2.** Generate annotation files for NuScenes dataset (the same as StreamMapNet)\n\n```\npython tools/data_converter/nuscenes_converter.py --data-root ./datasets/nuscenes\n```\n\nAdd ``--newsplit`` to generate the metadata for the new split (geographical-based split) provided by StreamMapNet.\n\n**Step 3.** Generate the tracking ground truth by \n\n```\npython tools/tracking/prepare_gt_tracks.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py  --out-dir tracking_gts/nuscenes --visualize\n```\n\nAdd the ``--visualize`` flag to visualize the data with element IDs derived from our track generation process, or remove it to save disk memory.  \n\nFor generating the G.T. tracks of the new split, change the config file accordingly.\n\n\n## Argoverse2\n\n**Step 1.** Download [Argoverse2 (sensor)](https://argoverse.github.io/user-guide/getting_started.html#download-the-datasets) dataset to `./datasets/av2`.\n\n**Step 2.** Generate annotation files for Argoverse2 dataset.\n\n```\npython tools/data_converter/argoverse_converter.py --data-root ./datasets/av2\n```\n\n**Step 3.** Generate the tracking ground truth by \n\n```\npython tools/tracking/prepare_gt_tracks.py plugin/configs/maptracker/av2_oldsplit/maptracker_av2_oldsplit_5frame_span10_stage3_joint_finetune.py  --out-dir tracking_gts/av2 --visualize\n```\n\n\n## Checkpoints\n\nWe provide the checkpoints at [this Dropbox link](https://www.dropbox.com/scl/fo/miulg8q9oby7q2x5vemme/ALoxX1HyxGlfR9y3xlqfzeE?rlkey=i3rw4mbq7lacblc7xsnjkik1u&dl=0) or [this HuggingFace repo](https://huggingface.co/cccjc/maptracker/tree/main). Please download and place them as ``./work_dirs/pretrained_ckpts``.\n\n\n## File structures\n\nMake sure the final file structures look like below:\n\n```\nmaptracker\n├── mmdetection3d\n├── tools\n├── plugin\n│   ├── configs\n│   ├── models\n│   ├── datasets\n│   ├── ...\n├── work_dirs\n│   ├── pretrained_ckpts\n│   │   ├── maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune\n│   │   │   ├── latest.pth\n│   │   ├── ...\n│   ├── ....\n├── datasets\n│   ├── nuscenes\n│   │   ├── maps <-- used\n│   │   ├── samples <-- key frames\n│   │   ├── v1.0-test <-- metadata\n|   |   ├── v1.0-trainval <-- metadata and annotations\n│   │   ├── nuscenes_map_infos_train_{newsplit}.pkl <-- train annotations\n│   │   ├── nuscenes_map_infos_train_{newsplit}_gt_tracks.pkl <-- train gt tracks\n│   │   ├── nuscenes_map_infos_val_{newsplit}.pkl <-- val annotations\n│   │   ├── nuscenes_map_infos_val_{newsplit}_gt_trakcs.pkl <-- val gt tracks\n│   ├── av2\n│   │   ├── train\n│   │   ├── val\n│   │   ├── test\n│   │   ├── maptrv2_val_samples_info.pkl <-- maptr's av2 metadata, used to align the val set\n│   │   ├── av2_map_infos_train_{newsplit}.pkl <-- train annotations\n│   │   ├── av2_map_infos_train_{newsplit}_gt_tracks.pkl <-- train gt tracks\n│   │   ├── av2_map_infos_val_{newsplit}.pkl <-- val annotations\n│   │   ├── av2_map_infos_val_{newsplit}_gt_trakcs.pkl <-- val gt tracks\n\n```\n"
  },
  {
    "path": "docs/getting_started.md",
    "content": "# Getting started with MapTracker\n\nIn this document, we provide the commands for running inference/evaluation, training, and visualization.\n\n\n## Inference and evaluation\n\n\n### Inference and evaluate with Chamfer-based mAP\n\n\nRun the following command to do inference and evaluation using the pretrained checkpoints, assuming 8 GPUs are used.\n\n```\nCUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7  bash tools/dist_test.sh  plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py    work_dirs/pretrained_ckpts/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune/latest.pth  8  --eval --eval-options save_semantic=True\n```\n\nSet the ``--eval-options save_semantic=True`` to also save the semantic segmentation results of the BEV module.\n\n\n### Evaluate with C-mAP\n\nGenerate prediction matching by\n```\npython tools/tracking/prepare_pred_tracks.py ${CONFIG} --result_path ${SUBMISSION_FILE} --cons_frames ${COMEBACK_FRAMES}\n```\n\nEvaluate with C-mAP by\n```\npython tools/tracking/calculate_cmap.py ${CONFIG} --result_path ${PRED_MATCHING_INFO}\n```\n\nAn example evaluation:\n```\npython tools/tracking/calculate_cmap.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py --result_path ./work_dirs/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune/pos_predictions.pkl\n```\n\n### Results\n\nBy running with the checkpoints we provided in the [data preparation guide](docs/data_preparation.md), the expected results are:\n\n|                          Dataset                               | Split | Divider | Crossing | Boundary | mAP |      C-mAP  |\n|:------------------------------------------------------------------------:|:--------:|:-------:|:--------:|:--------:|:---------:|:-------------------------------------------------------------------------------------------:|\n|            nuScenes             |  old  |  74.14  |  80.04   |  74.06   |   76.08  | 69.13  |\n|            nuScenes             |  new  |  30.10  |  45.86   |  45.06   |   40.34  | 32.50  |\n|            Argoverse2           |  old  |  76.99  |  79.97   |  73.66   |   76.87  | 68.35  |\n|            Argoverse2           |  new  |  75.11  |  69.96   |  68.95   |   71.34  | 63.11  |\n\n\n## Training\n\nThe training consists of three stages as detailed in the paper. We train the models on 8 Nvidia RTX A5000 GPUs. \n\n**Stage 1**: BEV pretraining with semantic segmentation losses:\n```\nbash ./tools/dist_train.sh plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage1_bev_pretrain.py 8\n```\n\n**Stage 2**: Vector module warmup with a large batch size while freezing the BEV module:\n```\nbash ./tools/dist_train.sh plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage2_warmup.py 8\n```\nSet up the ``load_from=...`` properly in the config file to load the checkpoint from stage 1.\n\n**Stage 3**: Joint finetuning:\n```\nbash ./tools/dist_train.sh plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py 8\n```\nSet up the ``load_from=...`` properly in the config file to load the checkpoint from stage 2.\n\n\n\n## Visualization\n\n### Global merged reconstruction (merged from local HD maps)\n\n```bash\npython tools/visualization/vis_global.py [path to method configuration file under plugin/configs] \\\n  --data_path [path to the .pkl file] \\\n  --out_dir [path to the output folder] \\\n  --option [vis-pred / vis-gt: visualize predicted vectors / visualize ground truth vectors] \\\n  --per_frame_result 1\n```\nSet the ``--per_frame_result`` to 1 to generate the per-frame video, the visualization is a bit slow; set it to 0 to only produce the final merged global reconstruction. \n\n\nExamples:\n```bash\n# Visualize MapTracker's prediction\npython tools/visualization/vis_global.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py \\\n--data_path work_dirs/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune/pos_predictions.pkl \\\n--out_dir vis_global/nuscenes_old/maptracker \\\n--option vis-pred  --per_frame_result 1\n\n# Visualize groud truth data\npython tools/visualization/vis_global.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py \\\n--data_path datasets/nuscenes/nuscenes_map_infos_val_gt_tracks.pkl \\\n--out_dir vis_global/nuscenes_old/gt  \\\n--option vis-gt --per_frame_result 0\n```\n\n\n### Local HD map reconstruction\n\n```bash\npython tools/visualization/vis_per_frame.py [path to method configuration file under plugin/configs] \\\n  --data_path [path to the .pkl file] \\\n  --out_dir [path to the data folder] \\\n  --option [vis-pred / vis-gt: visualize predicted vectors / visualize ground truth vectors and input video streams]\n```\n\nNote that the input perspective-view videos will be saved when generating the ground truth visualization.\n\n\nExamples:\n```bash\n# Visualize MapTracker's prediction\npython tools/visualization/vis_per_frame.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py \\\n--data_path work_dirs/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune/pos_predictions.pkl \\\n--out_dir vis_local/nuscenes_old/maptracker \\\n--option vis-pred\n\n# Visualize groud truth data\npython tools/visualization/vis_per_frame.py plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py \\\n--data_path datasets/nuscenes/nuscenes_map_infos_val_gt_tracks.pkl \\\n--out_dir vis_local/nuscenes_old/gt  \\\n--option vis-gt\n```\n"
  },
  {
    "path": "docs/installation.md",
    "content": "# Environment Setup\n\nWe use the same environment as StreamMapNet and the environment setup is largely borrowed from their repo.\n\n**Step 1.** Create conda environment and activate:\n\n```\nconda create --name maptracker python=3.8 -y\nconda activate maptracker\n```\n\n**Step 2.** Install PyTorch.\n\n```\npip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html\n```\n\n**Step 3.** Install MMCV series.\n\n```\n# Install mmcv-series\npip install mmcv-full==1.6.0\npip install mmdet==2.28.2\npip install mmsegmentation==0.30.0\ngit clone https://github.com/open-mmlab/mmdetection3d.git\ncd mmdetection3d\ngit checkout v1.0.0rc6 \npip install -e .\n```\n\n**Step 4.** Install other requirements.\n\n```\npip install -r requirements.txt\n```\n\n\n"
  },
  {
    "path": "plugin/__init__.py",
    "content": "from .models import *\nfrom .datasets import *"
  },
  {
    "path": "plugin/configs/_base_/datasets/coco_instance.py",
    "content": "dataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/kitti-3d-3class.py",
    "content": "# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Pedestrian', 'Cyclist', 'Car']\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),\n    classes=class_names,\n    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0.5],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.78539816, 0.78539816]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=6,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=1, pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/kitti-3d-car.py",
    "content": "# dataset settings\ndataset_type = 'KittiDataset'\ndata_root = 'data/kitti/'\nclass_names = ['Car']\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'kitti_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15))\n\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='ObjectNoise',\n        num_try=100,\n        translation_std=[1.0, 1.0, 0.5],\n        global_rot_range=[0.0, 0.0],\n        rot_range=[-0.78539816, 0.78539816]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=4,\n        use_dim=4,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=6,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'kitti_infos_train.pkl',\n            split='training',\n            pts_prefix='velodyne_reduced',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'kitti_infos_val.pkl',\n        split='training',\n        pts_prefix='velodyne_reduced',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=1, pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/lyft-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-80, -80, -5, 80, 80, 3]\n# For Lyft we usually do 9-class detection\nclass_names = [\n    'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',\n    'bicycle', 'pedestrian', 'animal'\n]\ndataset_type = 'LyftDataset'\ndata_root = 'data/lyft/'\n# Input modality for Lyft dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/lyft/': 's3://lyft/lyft/',\n#         'data/lyft/': 's3://lyft/lyft/'\n#    }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_test.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True))\n# For Lyft dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/nuim_instance.py",
    "content": "dataset_type = 'CocoDataset'\ndata_root = 'data/nuimages/'\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1280, 720), (1920, 1080)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1600, 900),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-train.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-val.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/nuimages_v1.0-val.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/nus-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-50, -50, -5, 50, 50, 3]\n# For nuScenes we usually do 10-class detection\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\ndataset_type = 'NuScenesDataset'\ndata_root = 'data/nuscenes/'\n# Input modality for nuScenes dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/nuscenes/': 's3://nuscenes/nuscenes/',\n#         'data/nuscenes/': 's3://nuscenes/nuscenes/'\n#     }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectNameFilter', classes=class_names),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False,\n        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n        # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n        box_type_3d='LiDAR'),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n# For nuScenes dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/nus-mono3d.py",
    "content": "dataset_type = 'NuScenesMonoDataset'\ndata_root = 'data/nuscenes/'\nclass_names = [\n    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',\n    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'\n]\n# Input modality for nuScenes dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox=True,\n        with_label=True,\n        with_attr_label=True,\n        with_bbox_3d=True,\n        with_label_3d=True,\n        with_bbox_depth=True),\n    dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D',\n        keys=[\n            'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',\n            'gt_labels_3d', 'centers2d', 'depths'\n        ]),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='MultiScaleFlipAug',\n        scale_factor=1.0,\n        flip=False,\n        transforms=[\n            dict(type='RandomFlip3D'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['img']),\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(type='LoadImageFromFileMono3D'),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['img'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=train_pipeline,\n        modality=input_modality,\n        test_mode=False,\n        box_type_3d='Camera'),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='Camera'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',\n        img_prefix=data_root,\n        classes=class_names,\n        pipeline=test_pipeline,\n        modality=input_modality,\n        test_mode=True,\n        box_type_3d='Camera'))\nevaluation = dict(interval=2)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/range100_lyft-3d.py",
    "content": "# If point cloud range is changed, the models should also change their point\n# cloud range accordingly\npoint_cloud_range = [-100, -100, -5, 100, 100, 3]\n# For Lyft we usually do 9-class detection\nclass_names = [\n    'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',\n    'bicycle', 'pedestrian', 'animal'\n]\ndataset_type = 'LyftDataset'\ndata_root = 'data/lyft/'\n# Input modality for Lyft dataset, this is consistent with the submission\n# format which requires the information in input_modality.\ninput_modality = dict(\n    use_lidar=True,\n    use_camera=False,\n    use_radar=False,\n    use_map=False,\n    use_external=False)\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/lyft/': 's3://lyft/lyft/',\n#         'data/lyft/': 's3://lyft/lyft/'\n#    }))\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.3925, 0.3925],\n        scale_ratio_range=[0.95, 1.05],\n        translation_std=[0, 0, 0]),\n    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadPointsFromMultiSweeps',\n        sweeps_num=10,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=False),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'lyft_infos_test.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        modality=input_modality,\n        test_mode=True))\n# For Lyft dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 24. Please change the interval accordingly if you do not\n# use a default schedule.\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/s3dis_seg-3d-13class.py",
    "content": "# dataset settings\ndataset_type = 'S3DISSegDataset'\ndata_root = './data/s3dis/'\nclass_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',\n               'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')\nnum_points = 4096\ntrain_area = [1, 2, 3, 4, 6]\ntest_area = 5\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=False,\n        with_label_3d=False,\n        with_mask_3d=False,\n        with_seg_3d=True),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=tuple(range(len(class_names))),\n        max_cat_id=13),\n    dict(\n        type='IndoorPatchPointSample',\n        num_points=num_points,\n        block_size=1.0,\n        ignore_index=len(class_names),\n        use_normalized_coord=True,\n        enlarge_size=0.2,\n        min_unique_num=None),\n    dict(type='NormalizePointsColor', color_mean=None),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(type='NormalizePointsColor', color_mean=None),\n    dict(\n        # a wrapper in order to successfully call test function\n        # actually we don't perform test-time-aug\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.0,\n                flip_ratio_bev_vertical=0.0),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\n# we need to load gt seg_mask!\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=False,\n        with_label_3d=False,\n        with_mask_3d=False,\n        with_seg_3d=True),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=tuple(range(len(class_names))),\n        max_cat_id=13),\n    dict(\n        type='DefaultFormatBundle3D',\n        with_label=False,\n        class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    # train on area 1, 2, 3, 4, 6\n    # test on area 5\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_files=[\n            data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area\n        ],\n        pipeline=train_pipeline,\n        classes=class_names,\n        test_mode=False,\n        ignore_index=len(class_names),\n        scene_idxs=[\n            data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy'\n            for i in train_area\n        ]),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        ignore_index=len(class_names),\n        scene_idxs=data_root +\n        f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        ignore_index=len(class_names)))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/scannet-3d-18class.py",
    "content": "# dataset settings\ndataset_type = 'ScanNetDataset'\ndata_root = './data/scannet/'\nclass_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',\n               'bookshelf', 'picture', 'counter', 'desk', 'curtain',\n               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',\n               'garbagebin')\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        with_mask_3d=True,\n        with_seg_3d=True),\n    dict(type='GlobalAlignment', rotation_axis=2),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,\n                       36, 39),\n        max_cat_id=40),\n    dict(type='IndoorPointSample', num_points=40000),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.087266, 0.087266],\n        scale_ratio_range=[1.0, 1.0],\n        shift_height=True),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(\n        type='Collect3D',\n        keys=[\n            'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',\n            'pts_instance_mask'\n        ])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(type='GlobalAlignment', rotation_axis=2),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.5,\n                flip_ratio_bev_vertical=0.5),\n            dict(type='IndoorPointSample', num_points=40000),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(type='GlobalAlignment', rotation_axis=2),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'scannet_infos_train.pkl',\n            pipeline=train_pipeline,\n            filter_empty_gt=False,\n            classes=class_names,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='Depth')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/scannet_seg-3d-20class.py",
    "content": "# dataset settings\ndataset_type = 'ScanNetSegDataset'\ndata_root = './data/scannet/'\nclass_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',\n               'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',\n               'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',\n               'bathtub', 'otherfurniture')\nnum_points = 8192\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=False,\n        with_label_3d=False,\n        with_mask_3d=False,\n        with_seg_3d=True),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,\n                       33, 34, 36, 39),\n        max_cat_id=40),\n    dict(\n        type='IndoorPatchPointSample',\n        num_points=num_points,\n        block_size=1.5,\n        ignore_index=len(class_names),\n        use_normalized_coord=False,\n        enlarge_size=0.2,\n        min_unique_num=None),\n    dict(type='NormalizePointsColor', color_mean=None),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(type='NormalizePointsColor', color_mean=None),\n    dict(\n        # a wrapper in order to successfully call test function\n        # actually we don't perform test-time-aug\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.0,\n                flip_ratio_bev_vertical=0.0),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\n# we need to load gt seg_mask!\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        use_color=True,\n        load_dim=6,\n        use_dim=[0, 1, 2, 3, 4, 5]),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=False,\n        with_label_3d=False,\n        with_mask_3d=False,\n        with_seg_3d=True),\n    dict(\n        type='PointSegClassMapping',\n        valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,\n                       33, 34, 36, 39),\n        max_cat_id=40),\n    dict(\n        type='DefaultFormatBundle3D',\n        with_label=False,\n        class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_train.pkl',\n        pipeline=train_pipeline,\n        classes=class_names,\n        test_mode=False,\n        ignore_index=len(class_names),\n        scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        ignore_index=len(class_names)),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'scannet_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        ignore_index=len(class_names)))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/sunrgbd-3d-10class.py",
    "content": "dataset_type = 'SUNRGBDDataset'\ndata_root = 'data/sunrgbd/'\nclass_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',\n               'night_stand', 'bookshelf', 'bathtub')\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(type='LoadAnnotations3D'),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n    ),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.523599, 0.523599],\n        scale_ratio_range=[0.85, 1.15],\n        shift_height=True),\n    dict(type='IndoorPointSample', num_points=20000),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=True,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(\n                type='RandomFlip3D',\n                sync_2d=False,\n                flip_ratio_bev_horizontal=0.5,\n            ),\n            dict(type='IndoorPointSample', num_points=20000),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='DEPTH',\n        shift_height=False,\n        load_dim=6,\n        use_dim=[0, 1, 2]),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=16,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'sunrgbd_infos_train.pkl',\n            pipeline=train_pipeline,\n            classes=class_names,\n            filter_empty_gt=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='Depth')),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'sunrgbd_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'sunrgbd_infos_val.pkl',\n        pipeline=test_pipeline,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='Depth'))\n\nevaluation = dict(pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/waymoD5-3d-3class.py",
    "content": "# dataset settings\n# D5 in the config name means the whole dataset is divided into 5 folds\n# We only use one fold for efficient experiments\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format/'\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))\n\nclass_names = ['Car', 'Pedestrian', 'Cyclist']\npoint_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'waymo_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(\n        filter_by_difficulty=[-1],\n        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),\n    classes=class_names,\n    sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR',\n            # load one frame every five frames\n            load_interval=5)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/datasets/waymoD5-3d-car.py",
    "content": "# dataset settings\n# D5 in the config name means the whole dataset is divided into 5 folds\n# We only use one fold for efficient experiments\ndataset_type = 'WaymoDataset'\ndata_root = 'data/waymo/kitti_format/'\nfile_client_args = dict(backend='disk')\n# Uncomment the following if use ceph or other file clients.\n# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient\n# for more details.\n# file_client_args = dict(\n#     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))\n\nclass_names = ['Car']\npoint_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]\ninput_modality = dict(use_lidar=True, use_camera=False)\ndb_sampler = dict(\n    data_root=data_root,\n    info_path=data_root + 'waymo_dbinfos_train.pkl',\n    rate=1.0,\n    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),\n    classes=class_names,\n    sample_groups=dict(Car=15),\n    points_loader=dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=5,\n        use_dim=[0, 1, 2, 3, 4],\n        file_client_args=file_client_args))\n\ntrain_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='LoadAnnotations3D',\n        with_bbox_3d=True,\n        with_label_3d=True,\n        file_client_args=file_client_args),\n    dict(type='ObjectSample', db_sampler=db_sampler),\n    dict(\n        type='RandomFlip3D',\n        sync_2d=False,\n        flip_ratio_bev_horizontal=0.5,\n        flip_ratio_bev_vertical=0.5),\n    dict(\n        type='GlobalRotScaleTrans',\n        rot_range=[-0.78539816, 0.78539816],\n        scale_ratio_range=[0.95, 1.05]),\n    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),\n    dict(type='PointShuffle'),\n    dict(type='DefaultFormatBundle3D', class_names=class_names),\n    dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])\n]\ntest_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug3D',\n        img_scale=(1333, 800),\n        pts_scale_ratio=1,\n        flip=False,\n        transforms=[\n            dict(\n                type='GlobalRotScaleTrans',\n                rot_range=[0, 0],\n                scale_ratio_range=[1., 1.],\n                translation_std=[0, 0, 0]),\n            dict(type='RandomFlip3D'),\n            dict(\n                type='PointsRangeFilter', point_cloud_range=point_cloud_range),\n            dict(\n                type='DefaultFormatBundle3D',\n                class_names=class_names,\n                with_label=False),\n            dict(type='Collect3D', keys=['points'])\n        ])\n]\n# construct a pipeline for data and gt loading in show function\n# please keep its loading function consistent with test_pipeline (e.g. client)\neval_pipeline = [\n    dict(\n        type='LoadPointsFromFile',\n        coord_type='LIDAR',\n        load_dim=6,\n        use_dim=5,\n        file_client_args=file_client_args),\n    dict(\n        type='DefaultFormatBundle3D',\n        class_names=class_names,\n        with_label=False),\n    dict(type='Collect3D', keys=['points'])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            data_root=data_root,\n            ann_file=data_root + 'waymo_infos_train.pkl',\n            split='training',\n            pipeline=train_pipeline,\n            modality=input_modality,\n            classes=class_names,\n            test_mode=False,\n            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset\n            # and box_type_3d='Depth' in sunrgbd and scannet dataset.\n            box_type_3d='LiDAR',\n            # load one frame every five frames\n            load_interval=5)),\n    val=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'),\n    test=dict(\n        type=dataset_type,\n        data_root=data_root,\n        ann_file=data_root + 'waymo_infos_val.pkl',\n        split='training',\n        pipeline=test_pipeline,\n        modality=input_modality,\n        classes=class_names,\n        test_mode=True,\n        box_type_3d='LiDAR'))\n\nevaluation = dict(interval=24, pipeline=eval_pipeline)\n"
  },
  {
    "path": "plugin/configs/_base_/default_runtime.py",
    "content": "checkpoint_config = dict(interval=1)\n# yapf:disable push\n# By default we use textlogger hook and tensorboard\n# For more loggers see\n# https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nwork_dir = None\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n"
  },
  {
    "path": "plugin/configs/_base_/models/3dssd.py",
    "content": "model = dict(\n    type='SSD3DNet',\n    backbone=dict(\n        type='PointNet2SAMSG',\n        in_channels=4,\n        num_points=(4096, 512, (256, 256)),\n        radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),\n        num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),\n        sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),\n                     ((64, 64, 128), (64, 64, 128), (64, 96, 128)),\n                     ((128, 128, 256), (128, 192, 256), (128, 256, 256))),\n        aggregation_channels=(64, 128, 256),\n        fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),\n        fps_sample_range_lists=((-1), (-1), (512, -1)),\n        norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),\n        sa_cfg=dict(\n            type='PointSAModuleMSG',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False)),\n    bbox_head=dict(\n        type='SSD3DHead',\n        in_channels=256,\n        vote_module_cfg=dict(\n            in_channels=256,\n            num_points=256,\n            gt_per_seed=1,\n            conv_channels=(128, ),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n            with_res_feat=False,\n            vote_xyz_range=(3.0, 3.0, 2.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModuleMSG',\n            num_point=256,\n            radii=(4.8, 6.4),\n            sample_nums=(16, 32),\n            mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),\n            norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),\n            use_xyz=True,\n            normalize_xyz=False,\n            bias=True),\n        pred_layer_cfg=dict(\n            in_channels=1536,\n            shared_conv_channels=(512, 128),\n            cls_conv_channels=(128, ),\n            reg_conv_channels=(128, ),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n            bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            reduction='sum',\n            loss_weight=1.0),\n        center_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        corner_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=1.0),\n        vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),\n    test_cfg=dict(\n        nms_cfg=dict(type='nms', iou_thr=0.1),\n        sample_mod='spec',\n        score_thr=0.0,\n        per_class_proposal=True,\n        max_output_num=100))\n"
  },
  {
    "path": "plugin/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    pretrained='torchvision://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='CascadeRoIHead',\n        num_stages=3,\n        stage_loss_weights=[1, 0.5, 0.25],\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=2000,\n            max_num=2000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.5,\n                    min_pos_iou=0.5,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.6,\n                    min_pos_iou=0.6,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.7,\n                    neg_iou_thr=0.7,\n                    min_pos_iou=0.7,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False)\n        ]),\n    test_cfg=dict(\n        rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py",
    "content": "voxel_size = [0.1, 0.1, 0.2]\nmodel = dict(\n    type='CenterPoint',\n    pts_voxel_layer=dict(\n        max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),\n    pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),\n    pts_middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[41, 1024, 1024],\n        output_channels=128,\n        order=('conv', 'norm', 'act'),\n        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,\n                                                                      128)),\n        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),\n        block_type='basicblock'),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        out_channels=[128, 256],\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        out_channels=[256, 256],\n        upsample_strides=[1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='CenterHead',\n        in_channels=sum([256, 256]),\n        tasks=[\n            dict(num_class=1, class_names=['car']),\n            dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n            dict(num_class=2, class_names=['bus', 'trailer']),\n            dict(num_class=1, class_names=['barrier']),\n            dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n        ],\n        common_heads=dict(\n            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        share_conv_channel=64,\n        bbox_coder=dict(\n            type='CenterPointBBoxCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_num=500,\n            score_threshold=0.1,\n            out_size_factor=8,\n            voxel_size=voxel_size[:2],\n            code_size=9),\n        separate_head=dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3),\n        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        norm_bbox=True),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            grid_size=[1024, 1024, 40],\n            voxel_size=voxel_size,\n            out_size_factor=8,\n            dense_reg=1,\n            gaussian_overlap=0.1,\n            max_objs=500,\n            min_radius=2,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),\n    test_cfg=dict(\n        pts=dict(\n            post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_per_img=500,\n            max_pool_nms=False,\n            min_radius=[4, 12, 10, 1, 0.85, 0.175],\n            score_threshold=0.1,\n            out_size_factor=8,\n            voxel_size=voxel_size[:2],\n            nms_type='rotate',\n            pre_max_size=1000,\n            post_max_size=83,\n            nms_thr=0.2)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py",
    "content": "voxel_size = [0.2, 0.2, 8]\nmodel = dict(\n    type='CenterPoint',\n    pts_voxel_layer=dict(\n        max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),\n    pts_voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=(0.2, 0.2, 8),\n        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),\n        legacy=False),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        out_channels=[64, 128, 256],\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        conv_cfg=dict(type='Conv2d', bias=False)),\n    pts_neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        out_channels=[128, 128, 128],\n        upsample_strides=[0.5, 1, 2],\n        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),\n        upsample_cfg=dict(type='deconv', bias=False),\n        use_conv_for_no_stride=True),\n    pts_bbox_head=dict(\n        type='CenterHead',\n        in_channels=sum([128, 128, 128]),\n        tasks=[\n            dict(num_class=1, class_names=['car']),\n            dict(num_class=2, class_names=['truck', 'construction_vehicle']),\n            dict(num_class=2, class_names=['bus', 'trailer']),\n            dict(num_class=1, class_names=['barrier']),\n            dict(num_class=2, class_names=['motorcycle', 'bicycle']),\n            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),\n        ],\n        common_heads=dict(\n            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),\n        share_conv_channel=64,\n        bbox_coder=dict(\n            type='CenterPointBBoxCoder',\n            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_num=500,\n            score_threshold=0.1,\n            out_size_factor=4,\n            voxel_size=voxel_size[:2],\n            code_size=9),\n        separate_head=dict(\n            type='SeparateHead', init_bias=-2.19, final_kernel=3),\n        loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),\n        loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),\n        norm_bbox=True),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            grid_size=[512, 512, 1],\n            voxel_size=voxel_size,\n            out_size_factor=4,\n            dense_reg=1,\n            gaussian_overlap=0.1,\n            max_objs=500,\n            min_radius=2,\n            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),\n    test_cfg=dict(\n        pts=dict(\n            post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],\n            max_per_img=500,\n            max_pool_nms=False,\n            min_radius=[4, 12, 10, 1, 0.85, 0.175],\n            score_threshold=0.1,\n            pc_range=[-51.2, -51.2],\n            out_size_factor=4,\n            voxel_size=voxel_size[:2],\n            nms_type='rotate',\n            pre_max_size=1000,\n            post_max_size=83,\n            nms_thr=0.2)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/fcos3d.py",
    "content": "model = dict(\n    type='FCOSMono3D',\n    pretrained='open-mmlab://detectron2/resnet101_caffe',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5,\n        relu_before_extra_convs=True),\n    bbox_head=dict(\n        type='FCOSMono3DHead',\n        num_classes=10,\n        in_channels=256,\n        stacked_convs=2,\n        feat_channels=256,\n        use_direction_classifier=True,\n        diff_rad_by_sin=True,\n        pred_attrs=True,\n        pred_velo=True,\n        dir_offset=0.7854,  # pi/4\n        strides=[8, 16, 32, 64, 128],\n        group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo\n        cls_branch=(256, ),\n        reg_branch=(\n            (256, ),  # offset\n            (256, ),  # depth\n            (256, ),  # size\n            (256, ),  # rot\n            ()  # velo\n        ),\n        dir_branch=(256, ),\n        attr_branch=(256, ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n        loss_attr=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        norm_on_bbox=True,\n        centerness_on_reg=True,\n        center_sampling=True,\n        conv_bias=True,\n        dcn_on_last_conv=True),\n    train_cfg=dict(\n        allowed_border=0,\n        code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_pre=1000,\n        nms_thr=0.8,\n        score_thr=0.05,\n        min_bbox_size=0,\n        max_per_img=200))\n"
  },
  {
    "path": "plugin/configs/_base_/models/groupfree3d.py",
    "content": "model = dict(\n    type='GroupFree3DNet',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=3,\n        num_points=(2048, 1024, 512, 256),\n        radius=(0.2, 0.4, 0.8, 1.2),\n        num_samples=(64, 32, 16, 16),\n        sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                     (128, 128, 256)),\n        fp_channels=((256, 256), (256, 288)),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)),\n    bbox_head=dict(\n        type='GroupFree3DHead',\n        in_channels=288,\n        num_decoder_layers=6,\n        num_proposal=256,\n        transformerlayers=dict(\n            type='BaseTransformerLayer',\n            attn_cfgs=dict(\n                type='GroupFree3DMHA',\n                embed_dims=288,\n                num_heads=8,\n                attn_drop=0.1,\n                dropout_layer=dict(type='Dropout', drop_prob=0.1)),\n            ffn_cfgs=dict(\n                embed_dims=288,\n                feedforward_channels=2048,\n                ffn_drop=0.1,\n                act_cfg=dict(type='ReLU', inplace=True)),\n            operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',\n                             'norm')),\n        pred_layer_cfg=dict(\n            in_channels=288, shared_conv_channels=(288, 288), bias=True),\n        sampling_objectness_loss=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=8.0),\n        objectness_loss=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        center_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        size_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),\n        semantic_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(sample_mod='kps'),\n    test_cfg=dict(\n        sample_mod='kps',\n        nms_thr=0.25,\n        score_thr=0.0,\n        per_class_proposal=True,\n        prediction_stages='last'))\n"
  },
  {
    "path": "plugin/configs/_base_/models/h3dnet.py",
    "content": "primitive_z_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=2,\n    num_classes=18,\n    primitive_mode='z',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nprimitive_xy_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=1,\n    num_classes=18,\n    primitive_mode='xy',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=0.5,\n        loss_dst_weight=0.5),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nprimitive_line_cfg = dict(\n    type='PrimitiveHead',\n    num_dims=0,\n    num_classes=18,\n    primitive_mode='line',\n    upper_thresh=100.0,\n    surface_thresh=0.5,\n    vote_module_cfg=dict(\n        in_channels=256,\n        vote_per_seed=1,\n        gt_per_seed=1,\n        conv_channels=(256, 256),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        norm_feats=True,\n        vote_loss=dict(\n            type='ChamferDistance',\n            mode='l1',\n            reduction='none',\n            loss_dst_weight=10.0)),\n    vote_aggregation_cfg=dict(\n        type='PointSAModule',\n        num_point=1024,\n        radius=0.3,\n        num_sample=16,\n        mlp_channels=[256, 128, 128, 128],\n        use_xyz=True,\n        normalize_xyz=True),\n    feat_channels=(128, 128),\n    conv_cfg=dict(type='Conv1d'),\n    norm_cfg=dict(type='BN1d'),\n    objectness_loss=dict(\n        type='CrossEntropyLoss',\n        class_weight=[0.4, 0.6],\n        reduction='mean',\n        loss_weight=30.0),\n    center_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=1.0,\n        loss_dst_weight=1.0),\n    semantic_reg_loss=dict(\n        type='ChamferDistance',\n        mode='l1',\n        reduction='sum',\n        loss_src_weight=1.0,\n        loss_dst_weight=1.0),\n    semantic_cls_loss=dict(\n        type='CrossEntropyLoss', reduction='sum', loss_weight=2.0),\n    train_cfg=dict(\n        dist_thresh=0.2,\n        var_thresh=1e-2,\n        lower_thresh=1e-6,\n        num_point=100,\n        num_point_line=10,\n        line_thresh=0.2))\n\nmodel = dict(\n    type='H3DNet',\n    backbone=dict(\n        type='MultiBackbone',\n        num_streams=4,\n        suffixes=['net0', 'net1', 'net2', 'net3'],\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),\n        act_cfg=dict(type='ReLU'),\n        backbones=dict(\n            type='PointNet2SASSG',\n            in_channels=4,\n            num_points=(2048, 1024, 512, 256),\n            radius=(0.2, 0.4, 0.8, 1.2),\n            num_samples=(64, 32, 16, 16),\n            sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                         (128, 128, 256)),\n            fp_channels=((256, 256), (256, 256)),\n            norm_cfg=dict(type='BN2d'),\n            sa_cfg=dict(\n                type='PointSAModule',\n                pool_mod='max',\n                use_xyz=True,\n                normalize_xyz=True))),\n    rpn_head=dict(\n        type='VoteHead',\n        vote_module_cfg=dict(\n            in_channels=256,\n            vote_per_seed=1,\n            gt_per_seed=3,\n            conv_channels=(256, 256),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            norm_feats=True,\n            vote_loss=dict(\n                type='ChamferDistance',\n                mode='l1',\n                reduction='none',\n                loss_dst_weight=10.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModule',\n            num_point=256,\n            radius=0.3,\n            num_sample=16,\n            mlp_channels=[256, 128, 128, 128],\n            use_xyz=True,\n            normalize_xyz=True),\n        pred_layer_cfg=dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            class_weight=[0.2, 0.8],\n            reduction='sum',\n            loss_weight=5.0),\n        center_loss=dict(\n            type='ChamferDistance',\n            mode='l2',\n            reduction='sum',\n            loss_src_weight=10.0,\n            loss_dst_weight=10.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        size_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        semantic_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n    roi_head=dict(\n        type='H3DRoIHead',\n        primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],\n        bbox_head=dict(\n            type='H3DBboxHead',\n            gt_per_seed=3,\n            num_proposal=256,\n            suface_matching_cfg=dict(\n                type='PointSAModule',\n                num_point=256 * 6,\n                radius=0.5,\n                num_sample=32,\n                mlp_channels=[128 + 6, 128, 64, 32],\n                use_xyz=True,\n                normalize_xyz=True),\n            line_matching_cfg=dict(\n                type='PointSAModule',\n                num_point=256 * 12,\n                radius=0.5,\n                num_sample=32,\n                mlp_channels=[128 + 12, 128, 64, 32],\n                use_xyz=True,\n                normalize_xyz=True),\n            feat_channels=(128, 128),\n            primitive_refine_channels=[128, 128, 128],\n            upper_thresh=100.0,\n            surface_thresh=0.5,\n            line_thresh=0.5,\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.2, 0.8],\n                reduction='sum',\n                loss_weight=5.0),\n            center_loss=dict(\n                type='ChamferDistance',\n                mode='l2',\n                reduction='sum',\n                loss_src_weight=10.0,\n                loss_dst_weight=10.0),\n            dir_class_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            dir_res_loss=dict(\n                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n            size_class_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            size_res_loss=dict(\n                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n            semantic_loss=dict(\n                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),\n            cues_objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.3, 0.7],\n                reduction='mean',\n                loss_weight=5.0),\n            cues_semantic_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.3, 0.7],\n                reduction='mean',\n                loss_weight=5.0),\n            proposal_objectness_loss=dict(\n                type='CrossEntropyLoss',\n                class_weight=[0.2, 0.8],\n                reduction='none',\n                loss_weight=5.0),\n            primitive_center_loss=dict(\n                type='MSELoss', reduction='none', loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),\n        rpn_proposal=dict(use_nms=False),\n        rcnn=dict(\n            pos_distance_thr=0.3,\n            neg_distance_thr=0.6,\n            sample_mod='vote',\n            far_threshold=0.6,\n            near_threshold=0.3,\n            mask_surface_threshold=0.3,\n            label_surface_threshold=0.3,\n            mask_line_threshold=0.3,\n            label_line_threshold=0.3)),\n    test_cfg=dict(\n        rpn=dict(\n            sample_mod='seed',\n            nms_thr=0.25,\n            score_thr=0.05,\n            per_class_proposal=True,\n            use_nms=False),\n        rcnn=dict(\n            sample_mod='seed',\n            nms_thr=0.25,\n            score_thr=0.05,\n            per_class_proposal=True)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/hv_pointpillars_fpn_lyft.py",
    "content": "_base_ = './hv_pointpillars_fpn_nus.py'\n\n# model settings (based on nuScenes model settings)\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nmodel = dict(\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-80, -80, -5, 80, 80, 3],\n        max_voxels=(60000, 60000)),\n    pts_voxel_encoder=dict(\n        feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),\n    pts_middle_encoder=dict(output_shape=[640, 640]),\n    pts_bbox_head=dict(\n        num_classes=9,\n        anchor_generator=dict(\n            ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),\n    # model training settings (based on nuScenes model settings)\n    train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))\n"
  },
  {
    "path": "plugin/configs/_base_/models/hv_pointpillars_fpn_nus.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.25, 0.25, 8]\nmodel = dict(\n    type='MVXFasterRCNN',\n    pts_voxel_layer=dict(\n        max_num_points=64,\n        point_cloud_range=[-50, -50, -5, 50, 50, 3],\n        voxel_size=voxel_size,\n        max_voxels=(30000, 40000)),\n    pts_voxel_encoder=dict(\n        type='HardVFE',\n        in_channels=4,\n        feat_channels=[64, 64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        with_cluster_center=True,\n        with_voxel_center=True,\n        point_cloud_range=[-50, -50, -5, 50, 50, 3],\n        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        out_channels=[64, 128, 256]),\n    pts_neck=dict(\n        type='FPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        act_cfg=dict(type='ReLU'),\n        in_channels=[64, 128, 256],\n        out_channels=256,\n        start_level=0,\n        num_outs=3),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=10,\n        in_channels=256,\n        feat_channels=256,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-50, -50, -1.8, 50, 50, -1.8]],\n            scales=[1, 2, 4],\n            sizes=[\n                [0.8660, 2.5981, 1.],  # 1.5/sqrt(3)\n                [0.5774, 1.7321, 1.],  # 1/sqrt(3)\n                [1., 1., 1.],\n                [0.4, 0.4, 1],\n            ],\n            custom_values=[0, 0],\n            rotations=[0, 1.57],\n            reshape_out=True),\n        assigner_per_size=False,\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        pts=dict(\n            use_rotate_nms=True,\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_thr=0.2,\n            score_thr=0.05,\n            min_bbox_size=0,\n            max_num=500)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py",
    "content": "_base_ = './hv_pointpillars_fpn_nus.py'\n\n# model settings (based on nuScenes model settings)\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nmodel = dict(\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-100, -100, -5, 100, 100, 3],\n        max_voxels=(60000, 60000)),\n    pts_voxel_encoder=dict(\n        feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),\n    pts_middle_encoder=dict(output_shape=[800, 800]),\n    pts_bbox_head=dict(\n        num_classes=9,\n        anchor_generator=dict(\n            ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),\n    # model training settings (based on nuScenes model settings)\n    train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))\n"
  },
  {
    "path": "plugin/configs/_base_/models/hv_pointpillars_secfpn_kitti.py",
    "content": "voxel_size = [0.16, 0.16, 4]\n\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=32,  # max_points_per_voxel\n        point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)  # (training, testing) max_voxels\n    ),\n    voxel_encoder=dict(\n        type='PillarFeatureNet',\n        in_channels=4,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),\n    middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),\n    backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        layer_nums=[3, 5, 5],\n        layer_strides=[2, 2, 2],\n        out_channels=[64, 128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[\n                [0, -39.68, -0.6, 70.4, 39.68, -0.6],\n                [0, -39.68, -0.6, 70.4, 39.68, -0.6],\n                [0, -39.68, -1.78, 70.4, 39.68, -1.78],\n            ],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # for Pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.35,\n                min_pos_iou=0.35,\n                ignore_iof_thr=-1),\n            dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n        ],\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n"
  },
  {
    "path": "plugin/configs/_base_/models/hv_pointpillars_secfpn_waymo.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.32, 0.32, 6]\nmodel = dict(\n    type='MVXFasterRCNN',\n    pts_voxel_layer=dict(\n        max_num_points=20,\n        point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],\n        voxel_size=voxel_size,\n        max_voxels=(32000, 32000)),\n    pts_voxel_encoder=dict(\n        type='HardVFE',\n        in_channels=5,\n        feat_channels=[64],\n        with_distance=False,\n        voxel_size=voxel_size,\n        with_cluster_center=True,\n        with_voxel_center=True,\n        point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],\n        norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),\n    pts_middle_encoder=dict(\n        type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),\n    pts_backbone=dict(\n        type='SECOND',\n        in_channels=64,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[3, 5, 5],\n        layer_strides=[1, 2, 2],\n        out_channels=[64, 128, 256]),\n    pts_neck=dict(\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[64, 128, 256],\n        upsample_strides=[1, 2, 4],\n        out_channels=[128, 128, 128]),\n    pts_bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=384,\n        feat_channels=384,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],\n                    [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],\n                    [-74.88, -74.88, 0, 74.88, 74.88, 0]],\n            sizes=[\n                [2.08, 4.73, 1.77],  # car\n                [0.84, 1.81, 1.77],  # cyclist\n                [0.84, 0.91, 1.74]  # pedestrian\n            ],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        pts=dict(\n            assigner=[\n                dict(  # car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.4,\n                    min_pos_iou=0.4,\n                    ignore_iof_thr=-1),\n                dict(  # cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.3,\n                    min_pos_iou=0.3,\n                    ignore_iof_thr=-1),\n                dict(  # pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.3,\n                    min_pos_iou=0.3,\n                    ignore_iof_thr=-1),\n            ],\n            allowed_border=0,\n            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        pts=dict(\n            use_rotate_nms=True,\n            nms_across_levels=False,\n            nms_pre=4096,\n            nms_thr=0.25,\n            score_thr=0.1,\n            min_bbox_size=0,\n            max_num=500)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/hv_second_secfpn_kitti.py",
    "content": "voxel_size = [0.05, 0.05, 0.1]\n\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=5,\n        point_cloud_range=[0, -40, -3, 70.4, 40, 1],\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)),\n    voxel_encoder=dict(type='HardSimpleVFE'),\n    middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=4,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                [0, -40.0, -1.78, 70.4, 40.0, -1.78],\n            ],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # for Pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.35,\n                neg_iou_thr=0.2,\n                min_pos_iou=0.2,\n                ignore_iof_thr=-1),\n            dict(  # for Cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.35,\n                neg_iou_thr=0.2,\n                min_pos_iou=0.2,\n                ignore_iof_thr=-1),\n            dict(  # for Car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.6,\n                neg_iou_thr=0.45,\n                min_pos_iou=0.45,\n                ignore_iof_thr=-1),\n        ],\n        allowed_border=0,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_thr=0.01,\n        score_thr=0.1,\n        min_bbox_size=0,\n        nms_pre=100,\n        max_num=50))\n"
  },
  {
    "path": "plugin/configs/_base_/models/hv_second_secfpn_waymo.py",
    "content": "# model settings\n# Voxel size for voxel encoder\n# Usually voxel size is changed consistently with the point cloud range\n# If point cloud range is modified, do remember to change all related\n# keys in the config.\nvoxel_size = [0.08, 0.08, 0.1]\nmodel = dict(\n    type='VoxelNet',\n    voxel_layer=dict(\n        max_num_points=10,\n        point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],\n        voxel_size=voxel_size,\n        max_voxels=(80000, 90000)),\n    voxel_encoder=dict(type='HardSimpleVFE', num_features=5),\n    middle_encoder=dict(\n        type='SparseEncoder',\n        in_channels=5,\n        sparse_shape=[61, 1280, 1920],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=384,\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    bbox_head=dict(\n        type='Anchor3DHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='AlignedAnchor3DRangeGenerator',\n            ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],\n                    [-76.8, -51.2, 0, 76.8, 51.2, 0],\n                    [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],\n            sizes=[\n                [2.08, 4.73, 1.77],  # car\n                [0.84, 0.91, 1.74],  # pedestrian\n                [0.84, 1.81, 1.77]  # cyclist\n            ],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        dir_offset=0.7854,  # pi/4\n        dir_limit_offset=0,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=[\n            dict(  # car\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.55,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.4,\n                ignore_iof_thr=-1),\n            dict(  # pedestrian\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            dict(  # cyclist\n                type='MaxIoUAssigner',\n                iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1)\n        ],\n        allowed_border=0,\n        code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        use_rotate_nms=True,\n        nms_across_levels=False,\n        nms_pre=4096,\n        nms_thr=0.25,\n        score_thr=0.1,\n        min_bbox_size=0,\n        max_num=500))\n"
  },
  {
    "path": "plugin/configs/_base_/models/imvotenet_image.py",
    "content": "model = dict(\n    type='ImVoteNet',\n    img_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe'),\n    img_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    img_rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    img_roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=10,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),\n\n    # model training and testing settings\n    train_cfg=dict(\n        img_rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        img_rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        img_rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=False,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        img_rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        img_rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/mask_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    pretrained='torchvision://resnet50',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_across_levels=False,\n            nms_pre=2000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_across_levels=False,\n            nms_pre=1000,\n            nms_post=1000,\n            max_num=1000,\n            nms_thr=0.7,\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/paconv_cuda_ssg.py",
    "content": "_base_ = './paconv_ssg.py'\n\nmodel = dict(\n    backbone=dict(\n        sa_cfg=dict(\n            type='PAConvCUDASAModule',\n            scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))\n"
  },
  {
    "path": "plugin/configs/_base_/models/paconv_ssg.py",
    "content": "# model settings\nmodel = dict(\n    type='EncoderDecoder3D',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=9,  # [xyz, rgb, normalized_xyz]\n        num_points=(1024, 256, 64, 16),\n        radius=(None, None, None, None),  # use kNN instead of ball query\n        num_samples=(32, 32, 32, 32),\n        sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,\n                                                                    512)),\n        fp_channels=(),\n        norm_cfg=dict(type='BN2d', momentum=0.1),\n        sa_cfg=dict(\n            type='PAConvSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False,\n            paconv_num_kernels=[16, 16, 16],\n            paconv_kernel_input='w_neighbor',\n            scorenet_input='w_neighbor_dist',\n            scorenet_cfg=dict(\n                mlp_channels=[16, 16, 16],\n                score_norm='softmax',\n                temp_factor=1.0,\n                last_bn=False))),\n    decode_head=dict(\n        type='PAConvHead',\n        # PAConv model's decoder takes skip connections from beckbone\n        # different from PointNet++, it also concats input features in the last\n        # level of decoder, leading to `128 + 6` as the channel number\n        fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),\n                     (128 + 6, 128, 128, 128)),\n        channels=128,\n        dropout_ratio=0.5,\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        act_cfg=dict(type='ReLU'),\n        loss_decode=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            class_weight=None,  # should be modified with dataset\n            loss_weight=1.0)),\n    # correlation loss to regularize PAConv's kernel weights\n    loss_regularization=dict(\n        type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),\n    # model training and testing settings\n    train_cfg=dict(),\n    test_cfg=dict(mode='slide'))\n"
  },
  {
    "path": "plugin/configs/_base_/models/parta2.py",
    "content": "# model settings\nvoxel_size = [0.05, 0.05, 0.1]\npoint_cloud_range = [0, -40, -3, 70.4, 40, 1]\n\nmodel = dict(\n    type='PartA2',\n    voxel_layer=dict(\n        max_num_points=5,  # max_points_per_voxel\n        point_cloud_range=point_cloud_range,\n        voxel_size=voxel_size,\n        max_voxels=(16000, 40000)  # (training, testing) max_voxels\n    ),\n    voxel_encoder=dict(type='HardSimpleVFE'),\n    middle_encoder=dict(\n        type='SparseUNet',\n        in_channels=4,\n        sparse_shape=[41, 1600, 1408],\n        order=('conv', 'norm', 'act')),\n    backbone=dict(\n        type='SECOND',\n        in_channels=256,\n        layer_nums=[5, 5],\n        layer_strides=[1, 2],\n        out_channels=[128, 256]),\n    neck=dict(\n        type='SECONDFPN',\n        in_channels=[128, 256],\n        upsample_strides=[1, 2],\n        out_channels=[256, 256]),\n    rpn_head=dict(\n        type='PartA2RPNHead',\n        num_classes=3,\n        in_channels=512,\n        feat_channels=512,\n        use_direction_classifier=True,\n        anchor_generator=dict(\n            type='Anchor3DRangeGenerator',\n            ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                    [0, -40.0, -0.6, 70.4, 40.0, -0.6],\n                    [0, -40.0, -1.78, 70.4, 40.0, -1.78]],\n            sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],\n            rotations=[0, 1.57],\n            reshape_out=False),\n        diff_rad_by_sin=True,\n        assigner_per_size=True,\n        assign_per_class=True,\n        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),\n        loss_dir=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),\n    roi_head=dict(\n        type='PartAggregationROIHead',\n        num_classes=3,\n        semantic_head=dict(\n            type='PointwiseSemanticHead',\n            in_channels=16,\n            extra_width=0.2,\n            seg_score_thr=0.3,\n            num_classes=3,\n            loss_seg=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0),\n            loss_part=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n        seg_roi_extractor=dict(\n            type='Single3DRoIAwareExtractor',\n            roi_layer=dict(\n                type='RoIAwarePool3d',\n                out_size=14,\n                max_pts_per_voxel=128,\n                mode='max')),\n        part_roi_extractor=dict(\n            type='Single3DRoIAwareExtractor',\n            roi_layer=dict(\n                type='RoIAwarePool3d',\n                out_size=14,\n                max_pts_per_voxel=128,\n                mode='avg')),\n        bbox_head=dict(\n            type='PartA2BboxHead',\n            num_classes=3,\n            seg_in_channels=16,\n            part_in_channels=4,\n            seg_conv_channels=[64, 64],\n            part_conv_channels=[64, 64],\n            merge_conv_channels=[128, 128],\n            down_conv_channels=[128, 256],\n            bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),\n            shared_fc_channels=[256, 512, 512, 512],\n            cls_channels=[256, 256],\n            reg_channels=[256, 256],\n            dropout_ratio=0.1,\n            roi_feat_size=14,\n            with_corner_loss=True,\n            loss_bbox=dict(\n                type='SmoothL1Loss',\n                beta=1.0 / 9.0,\n                reduction='sum',\n                loss_weight=1.0),\n            loss_cls=dict(\n                type='CrossEntropyLoss',\n                use_sigmoid=True,\n                reduction='sum',\n                loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.35,\n                    min_pos_iou=0.35,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(type='BboxOverlapsNearest3D'),\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.45,\n                    min_pos_iou=0.45,\n                    ignore_iof_thr=-1)\n            ],\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=9000,\n            nms_post=512,\n            max_num=512,\n            nms_thr=0.8,\n            score_thr=0,\n            use_rotate_nms=False),\n        rcnn=dict(\n            assigner=[\n                dict(  # for Pedestrian\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1),\n                dict(  # for Cyclist\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1),\n                dict(  # for Car\n                    type='MaxIoUAssigner',\n                    iou_calculator=dict(\n                        type='BboxOverlaps3D', coordinate='lidar'),\n                    pos_iou_thr=0.55,\n                    neg_iou_thr=0.55,\n                    min_pos_iou=0.55,\n                    ignore_iof_thr=-1)\n            ],\n            sampler=dict(\n                type='IoUNegPiecewiseSampler',\n                num=128,\n                pos_fraction=0.55,\n                neg_piece_fractions=[0.8, 0.2],\n                neg_iou_piece_thrs=[0.55, 0.1],\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False,\n                return_iou=True),\n            cls_pos_thr=0.75,\n            cls_neg_thr=0.25)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1024,\n            nms_post=100,\n            max_num=100,\n            nms_thr=0.7,\n            score_thr=0,\n            use_rotate_nms=True),\n        rcnn=dict(\n            use_rotate_nms=True,\n            use_raw_score=True,\n            nms_thr=0.01,\n            score_thr=0.1)))\n"
  },
  {
    "path": "plugin/configs/_base_/models/pointnet2_msg.py",
    "content": "_base_ = './pointnet2_ssg.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='PointNet2SAMSG',\n        in_channels=6,  # [xyz, rgb], should be modified with dataset\n        num_points=(1024, 256, 64, 16),\n        radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),\n        num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),\n        sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,\n                                                                    128)),\n                     ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),\n                                                          (256, 384, 512))),\n        aggregation_channels=(None, None, None, None),\n        fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),\n        fps_sample_range_lists=((-1), (-1), (-1), (-1)),\n        dilated_group=(False, False, False, False),\n        out_indices=(0, 1, 2, 3),\n        sa_cfg=dict(\n            type='PointSAModuleMSG',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False)),\n    decode_head=dict(\n        fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),\n                     (128, 128, 128, 128))))\n"
  },
  {
    "path": "plugin/configs/_base_/models/pointnet2_ssg.py",
    "content": "# model settings\nmodel = dict(\n    type='EncoderDecoder3D',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=6,  # [xyz, rgb], should be modified with dataset\n        num_points=(1024, 256, 64, 16),\n        radius=(0.1, 0.2, 0.4, 0.8),\n        num_samples=(32, 32, 32, 32),\n        sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,\n                                                                    512)),\n        fp_channels=(),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=False)),\n    decode_head=dict(\n        type='PointNet2Head',\n        fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),\n                     (128, 128, 128, 128)),\n        channels=128,\n        dropout_ratio=0.5,\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        act_cfg=dict(type='ReLU'),\n        loss_decode=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            class_weight=None,  # should be modified with dataset\n            loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(),\n    test_cfg=dict(mode='slide'))\n"
  },
  {
    "path": "plugin/configs/_base_/models/votenet.py",
    "content": "model = dict(\n    type='VoteNet',\n    backbone=dict(\n        type='PointNet2SASSG',\n        in_channels=4,\n        num_points=(2048, 1024, 512, 256),\n        radius=(0.2, 0.4, 0.8, 1.2),\n        num_samples=(64, 32, 16, 16),\n        sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),\n                     (128, 128, 256)),\n        fp_channels=((256, 256), (256, 256)),\n        norm_cfg=dict(type='BN2d'),\n        sa_cfg=dict(\n            type='PointSAModule',\n            pool_mod='max',\n            use_xyz=True,\n            normalize_xyz=True)),\n    bbox_head=dict(\n        type='VoteHead',\n        vote_module_cfg=dict(\n            in_channels=256,\n            vote_per_seed=1,\n            gt_per_seed=3,\n            conv_channels=(256, 256),\n            conv_cfg=dict(type='Conv1d'),\n            norm_cfg=dict(type='BN1d'),\n            norm_feats=True,\n            vote_loss=dict(\n                type='ChamferDistance',\n                mode='l1',\n                reduction='none',\n                loss_dst_weight=10.0)),\n        vote_aggregation_cfg=dict(\n            type='PointSAModule',\n            num_point=256,\n            radius=0.3,\n            num_sample=16,\n            mlp_channels=[256, 128, 128, 128],\n            use_xyz=True,\n            normalize_xyz=True),\n        pred_layer_cfg=dict(\n            in_channels=128, shared_conv_channels=(128, 128), bias=True),\n        conv_cfg=dict(type='Conv1d'),\n        norm_cfg=dict(type='BN1d'),\n        objectness_loss=dict(\n            type='CrossEntropyLoss',\n            class_weight=[0.2, 0.8],\n            reduction='sum',\n            loss_weight=5.0),\n        center_loss=dict(\n            type='ChamferDistance',\n            mode='l2',\n            reduction='sum',\n            loss_src_weight=10.0,\n            loss_dst_weight=10.0),\n        dir_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        dir_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),\n        size_class_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),\n        size_res_loss=dict(\n            type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),\n        semantic_loss=dict(\n            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),\n    test_cfg=dict(\n        sample_mod='seed',\n        nms_thr=0.25,\n        score_thr=0.05,\n        per_class_proposal=True))\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/cosine.py",
    "content": "# This schedule is mainly used by models with dynamic voxelization\n# optimizer\nlr = 0.003  # max learning rate\noptimizer = dict(\n    type='AdamW',\n    lr=lr,\n    betas=(0.95, 0.99),  # the momentum is change during training\n    weight_decay=0.001)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\n\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 10,\n    min_lr_ratio=1e-5)\n\nmomentum_config = None\n\nrunner = dict(type='EpochBasedRunner', max_epochs=40)\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/cyclic_20e.py",
    "content": "# For nuScenes dataset, we usually evaluate the model at the end of training.\n# Since the models are trained by 24 epochs by default, we set evaluation\n# interval to be 20. Please change the interval accordingly if you do not\n# use a default schedule.\n# optimizer\n# This schedule is mainly used by models on nuScenes dataset\noptimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)\n# max_norm=10 is better for SECOND\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/cyclic_40e.py",
    "content": "# The schedule is usually used by models trained on KITTI dataset\n\n# The learning rate set in the cyclic schedule is the initial learning rate\n# rather than the max learning rate. Since the target_ratio is (10, 1e-4),\n# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4\nlr = 0.0018\n# The optimizer follows the setting in SECOND.Pytorch, but here we use\n# the offcial AdamW optimizer implemented by PyTorch.\noptimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\n# We use cyclic learning rate and momentum schedule following SECOND.Pytorch\n# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa\n# We implement them in mmcv, for more details, please refer to\n# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa\n# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa\nlr_config = dict(\n    policy='cyclic',\n    target_ratio=(10, 1e-4),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\nmomentum_config = dict(\n    policy='cyclic',\n    target_ratio=(0.85 / 0.95, 1),\n    cyclic_times=1,\n    step_ratio_up=0.4,\n)\n# Although the max_epochs is 40, this schedule is usually used we\n# RepeatDataset with repeat ratio N, thus the actual max epoch\n# number could be Nx40\nrunner = dict(type='EpochBasedRunner', max_epochs=40)\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/mmdet_schedule_1x.py",
    "content": "# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/schedule_2x.py",
    "content": "# optimizer\n# This schedule is mainly used by models on nuScenes dataset\noptimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)\n# max_norm=10 is better for SECOND\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 1000,\n    step=[20, 23])\nmomentum_config = None\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/schedule_3x.py",
    "content": "# optimizer\n# This schedule is mainly used by models on indoor dataset,\n# e.g., VoteNet on SUNRGBD and ScanNet\nlr = 0.008  # max learning rate\noptimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)\noptimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))\nlr_config = dict(policy='step', warmup=None, step=[24, 32])\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/seg_cosine_150e.py",
    "content": "# optimizer\n# This schedule is mainly used on S3DIS dataset in segmentation task\noptimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)\noptimizer_config = dict(grad_clip=None)\nlr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)\nmomentum_config = None\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=150)\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/seg_cosine_200e.py",
    "content": "# optimizer\n# This schedule is mainly used on ScanNet dataset in segmentation task\noptimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)\noptimizer_config = dict(grad_clip=None)\nlr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)\nmomentum_config = None\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=200)\n"
  },
  {
    "path": "plugin/configs/_base_/schedules/seg_cosine_50e.py",
    "content": "# optimizer\n# This schedule is mainly used on S3DIS dataset in segmentation task\noptimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)\noptimizer_config = dict(grad_clip=None)\nlr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)\nmomentum_config = None\n\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=50)\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_100x50_newsplit_5frame_span10_stage1_bev_pretrain.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 1\nnum_iters_per_epoch = 29293 // (num_gpus * batch_size)\nnum_epochs = 12\nnum_epochs_interval = num_epochs // 6\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (100, 50) # bev range, 100m in x-axis, 50m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=True,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=False,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n            semantic_mask=True\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_train_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n        eval_semantic=True,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n        eval_semantic=True,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=5e-2)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_100x50_newsplit_5frame_span10_stage2_warmup.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 6\nnum_iters_per_epoch = 29293 // (num_gpus * batch_size)\nnum_epochs = 3\nnum_epochs_interval = num_epochs\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (100, 50) # bev range, 100m in x-axis, 50m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=True,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='PV_Map', img_shape=img_size, \n        feat_down_sample=8,\n        thickness=1, \n        coords_dim=coords_dim,\n        pv_mask=True,\n        num_cams=num_cams,\n        num_coords=3,\n    ),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask', 'pv_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=0.95)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = 'work_dirs/maptracker_av2_100x50_newsplit_5frame_span10_stage1_bev_pretrain/latest.pth'\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_100x50_newsplit_5frame_span10_stage3_joint_finetune.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 2\nnum_iters_per_epoch = 29293 // (num_gpus * batch_size)\nnum_epochs = 20\nnum_epochs_interval = num_epochs // 5\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (100, 50) # bev range, 100m in x-axis, 50m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=-1,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'backbone.img_backbone': dict(lr_mult=0.1),\n            'backbone.img_neck': dict(lr_mult=0.5),\n            'backbone.transformer': dict(lr_mult=0.5),\n            'backbone.positional_encoding': dict(lr_mult=0.5),\n            'seg_decoder': dict(lr_mult=0.5),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=3e-3)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = 'work_dirs/maptracker_av2_100x50_newsplit_5frame_span10_stage2_warmup/latest.pth'\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_newsplit_5frame_span10_stage1_bev_pretrain.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 1\nnum_iters_per_epoch = 29293 // (num_gpus * batch_size)\nnum_epochs = 12\nnum_epochs_interval = num_epochs // 6\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=True,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=False,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n            semantic_mask=True\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_train_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n        eval_semantic=True,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n        eval_semantic=True,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=5e-2)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_newsplit_5frame_span10_stage2_warmup.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 6\nnum_iters_per_epoch = 29293 // (num_gpus * batch_size)\nnum_epochs = 3\nnum_epochs_interval = num_epochs\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=True,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='PV_Map', img_shape=img_size, \n        feat_down_sample=8,\n        thickness=1, \n        coords_dim=coords_dim,\n        pv_mask=True,\n        num_cams=num_cams,\n        num_coords=3,\n    ),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask', 'pv_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=0.95)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = 'work_dirs/maptracker_av2_newsplit_5frame_span10_stage1_bev_pretrain/latest.pth'\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_newsplit/maptracker_av2_newsplit_5frame_span10_stage3_joint_finetune.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 2\nnum_iters_per_epoch = 29293 // (num_gpus * batch_size)\nnum_epochs = 20\nnum_epochs_interval = num_epochs // 5\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=-1,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'backbone.img_backbone': dict(lr_mult=0.1),\n            'backbone.img_neck': dict(lr_mult=0.5),\n            'backbone.transformer': dict(lr_mult=0.5),\n            'backbone.positional_encoding': dict(lr_mult=0.5),\n            'seg_decoder': dict(lr_mult=0.5),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=3e-3)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = 'work_dirs/maptracker_av2_newsplit_5frame_span10_stage2_warmup/latest.pth'\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_oldsplit/maptracker_av2_oldsplit_5frame_span10_stage1_bev_pretrain.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 1\nnum_iters_per_epoch = 27243 // (num_gpus * batch_size)\nnum_epochs = 12\nnum_epochs_interval = num_epochs // 6\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=True,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=False,\n    mem_len=4,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n            semantic_mask=True\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n        eval_semantic=True,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n        eval_semantic=True,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=5e-2)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_oldsplit/maptracker_av2_oldsplit_5frame_span10_stage2_warmup.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 6\nnum_iters_per_epoch = 27243 // (num_gpus * batch_size)\nnum_epochs = 3\nnum_epochs_interval = num_epochs\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=True,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            #pretrained='torchvision://resnet18',\n            depth=50,\n            #depth=18,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            #in_channels=[128, 256, 512],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    # operation_order=('norm', 'self_attn', 'norm', 'cross_attn',\n                    #                 'norm', 'ffn',)\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='PV_Map', img_shape=img_size, \n        feat_down_sample=8,\n        thickness=1, \n        coords_dim=coords_dim,\n        pv_mask=True,\n        num_cams=num_cams,\n        num_coords=3,\n    ),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask', 'pv_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=0.95)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = 'work_dirs/maptracker_av2_oldsplit_5frame_span10_stage1_bev_pretrain/latest.pth'\n"
  },
  {
    "path": "plugin/configs/maptracker/av2_oldsplit/maptracker_av2_oldsplit_5frame_span10_stage3_joint_finetune.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 608\nimg_w = 608\nimg_size = (img_h, img_w)\nnum_cams = 7\n\nnum_gpus = 8\nbatch_size = 2\nnum_iters_per_epoch = 27243 // (num_gpus * batch_size)\nnum_epochs = 20\nnum_epochs_interval = num_epochs // 5\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=-1,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            num_cams=num_cams,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                            num_cams=num_cams,\n                        ),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img',], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n\nmatch_config = dict(\n    type='AV2Dataset',\n    ann_file='./datasets/av2/av2_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=4,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_train.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        interval=4,\n    ),\n    val=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    test=dict(\n        type='AV2Dataset',\n        ann_file='./datasets/av2/av2_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        interval=4,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'backbone.img_backbone': dict(lr_mult=0.1),\n            'backbone.img_neck': dict(lr_mult=0.5),\n            'backbone.transformer': dict(lr_mult=0.5),\n            'backbone.positional_encoding': dict(lr_mult=0.5),\n            'seg_decoder': dict(lr_mult=0.5),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=3e-3)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = 'work_dirs/maptracker_av2_oldsplit_5frame_span10_stage2_warmup/latest.pth'\n"
  },
  {
    "path": "plugin/configs/maptracker/nuscenes_newsplit/maptracker_nusc_newsplit_5frame_span10_stage1_bev_pretrain.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 480\nimg_w = 800\nimg_size = (img_h, img_w)\nnum_cams = 6\n\nnum_gpus = 8\nbatch_size = 3\nnum_iters_per_epoch = 27846 // (num_gpus * batch_size)\nnum_epochs = 18\nnum_epochs_interval = num_epochs // 6\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=True,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=False,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n            semantic_mask=True,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n\nmatch_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'ego2cam', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name', 'img_filenames', 'cam_intrinsics', 'cam_extrinsics', 'lidar2ego_translation', \n        'lidar2ego_rotation'])\n    ],\n    interval=1,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        sampling_span=10,\n    ),\n    val=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        eval_semantic=True,\n    ),\n    test=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        eval_semantic=True,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=5e-2)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True"
  },
  {
    "path": "plugin/configs/maptracker/nuscenes_newsplit/maptracker_nusc_newsplit_5frame_span10_stage2_warmup.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 480\nimg_w = 800\nimg_size = (img_h, img_w)\n\nnum_gpus = 8\nbatch_size = 8\nnum_iters_per_epoch = 27846 // (num_gpus * batch_size)\nnum_epochs = 4\nnum_epochs_interval = num_epochs\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=True,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        history_steps=4,\n        use_grid_mask=True,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            #in_channels=[128, 256, 512],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                        )\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    # operation_order=('norm', 'self_attn', 'norm', 'cross_attn',\n                    #                 'norm', 'ffn',)\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n\nmatch_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=10,\n    train=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        sampling_span=10,\n    ),\n    val=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n    ),\n    test=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=0.95)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = \"work_dirs/maptracker_nusc_newsplit_5frame_span10_stage1_bev_pretrain/latest.pth\"\n"
  },
  {
    "path": "plugin/configs/maptracker/nuscenes_newsplit/maptracker_nusc_newsplit_5frame_span10_stage3_joint_finetune.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 480\nimg_w = 800\nimg_size = (img_h, img_w)\n\nnum_gpus = 8\nbatch_size = 4\nnum_iters_per_epoch = 27846 // (num_gpus * batch_size)\nnum_epochs = 36\nnum_epochs_interval = num_epochs // 6\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=-1,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        history_steps=4,\n        use_grid_mask=True,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                        )\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    # operation_order=('norm', 'self_attn', 'norm', 'cross_attn',\n                    #                 'norm', 'ffn',)\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n\nmatch_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_train_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        sampling_span=10,\n    ),\n    val=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n    ),\n    test=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val_newsplit.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'backbone.img_backbone': dict(lr_mult=0.1),\n            'backbone.img_neck': dict(lr_mult=0.5),\n            'backbone.transformer': dict(lr_mult=0.5),\n            'backbone.positional_encoding': dict(lr_mult=0.5),\n            'seg_decoder': dict(lr_mult=0.5),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=3e-3)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = \"work_dirs/maptracker_nusc_newsplit_5frame_span10_stage2_warmup/latest.pth\"\n\n"
  },
  {
    "path": "plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage1_bev_pretrain.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 480\nimg_w = 800\nimg_size = (img_h, img_w)\nnum_cams = 6\n\nnum_gpus = 8\nbatch_size = 1\nnum_iters_per_epoch = 27968 // (num_gpus * batch_size)\nnum_epochs = 18\nnum_epochs_interval = num_epochs // 6\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=True,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=False,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        use_grid_mask=True,\n        history_steps=4,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims),\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', \n                                     'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    # operation_order=('norm', 'self_attn', 'norm', 'cross_attn',\n                    #                 'norm', 'ffn',)\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n            semantic_mask=True,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n\nmatch_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'ego2cam', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name', 'img_filenames', 'cam_intrinsics', 'cam_extrinsics', 'lidar2ego_translation', \n        'lidar2ego_rotation'])\n    ],\n    interval=1,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_train.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        sampling_span=10,\n    ),\n    val=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        eval_semantic=True,\n    ),\n    test=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n        eval_semantic=True,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=5e-2)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n"
  },
  {
    "path": "plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage2_warmup.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 480\nimg_w = 800\nimg_size = (img_h, img_w)\n\nnum_gpus = 8\nbatch_size = 6\nnum_iters_per_epoch = 27968 // (num_gpus * batch_size)\nnum_epochs = 4\nnum_epochs_interval = num_epochs\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=True,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=500,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        history_steps=4,\n        use_grid_mask=True,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                        )\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    # operation_order=('norm', 'self_attn', 'norm', 'cross_attn',\n                    #                 'norm', 'ffn',)\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n\nmatch_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_train.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        sampling_span=10,\n    ),\n    val=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n    ),\n    test=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'img_backbone': dict(lr_mult=0.1),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=0.95) # only slightly decay\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = \"work_dirs/maptracker_nusc_oldsplit_5frame_span10_stage1_bev_pretrain/latest.pth\"\n"
  },
  {
    "path": "plugin/configs/maptracker/nuscenes_oldsplit/maptracker_nusc_oldsplit_5frame_span10_stage3_joint_finetune.py",
    "content": "_base_ = [\n    '../../_base_/default_runtime.py'\n]\n\n# model type\ntype = 'Mapper'\nplugin = True\n\n# plugin code dir\nplugin_dir = 'plugin/'\n[]\n# img configs\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\nimg_h = 480\nimg_w = 800\nimg_size = (img_h, img_w)\n\nnum_gpus = 8\nbatch_size = 2\nnum_iters_per_epoch = 27968 // (num_gpus * batch_size)\nnum_epochs = 48\nnum_epochs_interval = num_epochs // 8\ntotal_iters = num_epochs * num_iters_per_epoch\nnum_queries = 100\n\n# category configs\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\nnum_class = max(list(cat2id.values())) + 1\n\n# bev configs\nroi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis\nbev_h = 50\nbev_w = 100\npc_range = [-roi_size[0]/2, -roi_size[1]/2, -3, roi_size[0]/2, roi_size[1]/2, 5]\n\n# vectorize params\ncoords_dim = 2\nsample_dist = -1\nsample_num = -1\nsimplify = True\n\n# rasterize params (for temporal matching use)\ncanvas_size = (200, 100) # bev feature size\nthickness = 3 # thickness of rasterized polylines\n\n# meta info for submission pkl\nmeta = dict(\n    use_lidar=False,\n    use_camera=True,\n    use_radar=False,\n    use_map=False,\n    use_external=False,\n    output_format='vector')\n\n# model configs\nbev_embed_dims = 256\nembed_dims = 512\nnum_feat_levels = 3\nnorm_cfg = dict(type='BN2d')\nnum_class = max(list(cat2id.values()))+1\nnum_points = 20\npermute = True\n\nmodel = dict(\n    type='MapTracker',\n    roi_size=roi_size,\n    bev_h=bev_h,\n    bev_w=bev_w,\n    history_steps=4,\n    test_time_history_steps=20,\n    mem_select_dist_ranges=[1, 5, 10, 15],\n    skip_vector_head=False,\n    freeze_bev=False,\n    track_fp_aug=False,\n    use_memory=True,\n    mem_len=4,\n    mem_warmup_iters=-1,\n    backbone_cfg=dict(\n        type='BEVFormerBackbone',\n        roi_size=roi_size,\n        bev_h=bev_h,\n        bev_w=bev_w,\n        history_steps=4,\n        use_grid_mask=True,\n        img_backbone=dict(\n            type='ResNet',\n            with_cp=False,\n            # pretrained='./resnet50_checkpoint.pth',\n            pretrained='open-mmlab://detectron2/resnet50_caffe',\n            depth=50,\n            num_stages=4,\n            out_indices=(1, 2, 3),\n            frozen_stages=-1,\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            style='caffe',\n            dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n            stage_with_dcn=(False, False, True, True)\n            ),\n        img_neck=dict(\n            type='FPN',\n            in_channels=[512, 1024, 2048],\n            out_channels=bev_embed_dims,\n            start_level=0,\n            add_extra_convs=True,\n            num_outs=num_feat_levels,\n            norm_cfg=norm_cfg,\n            relu_before_extra_convs=True),\n        transformer=dict(\n            type='PerceptionTransformer',\n            embed_dims=bev_embed_dims,\n            encoder=dict(\n                type='BEVFormerEncoder',\n                num_layers=2,\n                pc_range=pc_range,\n                num_points_in_pillar=4,\n                return_intermediate=False,\n                transformerlayers=dict(\n                    type='BEVFormerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='TemporalSelfAttention',\n                            embed_dims=bev_embed_dims,\n                            num_levels=1),\n                        dict(\n                            type='SpatialCrossAttention',\n                            deformable_attention=dict(\n                                type='MSDeformableAttention3D',\n                                embed_dims=bev_embed_dims,\n                                num_points=8,\n                                num_levels=num_feat_levels),\n                            embed_dims=bev_embed_dims,\n                        )\n                    ],\n                    feedforward_channels=bev_embed_dims*2,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            ),\n        ),\n        positional_encoding=dict(\n            type='LearnedPositionalEncoding',\n            num_feats=bev_embed_dims//2,\n            row_num_embed=bev_h,\n            col_num_embed=bev_w,\n            ),\n    ),\n    head_cfg=dict(\n        type='MapDetectorHead',\n        num_queries=num_queries,\n        embed_dims=embed_dims,\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        num_points=num_points,\n        roi_size=roi_size,\n        coord_dim=2,\n        different_heads=False,\n        predict_refine=False,\n        sync_cls_avg_factor=True,\n        trans_loss_weight=0.1,\n        transformer=dict(\n            type='MapTransformer',\n            num_feature_levels=1,\n            num_points=num_points,\n            coord_dim=2,\n            encoder=dict(\n                type='PlaceHolderEncoder',\n                embed_dims=embed_dims,\n            ),\n            decoder=dict(\n                type='MapTransformerDecoder_new',\n                num_layers=6,\n                prop_add_stage=1,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='MapTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                        dict(\n                            type='CustomMSDeformableAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            num_levels=1,\n                            num_points=num_points,\n                            dropout=0.1,\n                        ),\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=embed_dims,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                        ),\n                    ],\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=embed_dims,\n                        feedforward_channels=embed_dims*2,\n                        num_fcs=2,\n                        ffn_drop=0.1,\n                        act_cfg=dict(type='ReLU', inplace=True),        \n                    ),\n                    feedforward_channels=embed_dims*2,\n                    ffn_dropout=0.1,\n                    ## an addtional cross attention for vector memory fusion\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'cross_attn', 'norm',\n                                    'ffn', 'norm')\n                )\n            )\n        ),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=5.0\n        ),\n        loss_reg=dict(\n            type='LinesL1Loss',\n            loss_weight=50.0,\n            beta=0.01,\n        ),\n        assigner=dict(\n            type='HungarianLinesAssigner',\n                cost=dict(\n                    type='MapQueriesCost',\n                    cls_cost=dict(type='FocalLossCost', weight=5.0),\n                    reg_cost=dict(type='LinesL1Cost', weight=50.0, beta=0.01, permute=permute),\n                    ),\n                ),\n        ),\n    seg_cfg=dict(\n        type='MapSegHead',\n        num_classes=num_class,\n        in_channels=bev_embed_dims,\n        embed_dims=bev_embed_dims,\n        bev_size=(bev_w, bev_h),\n        canvas_size=canvas_size,\n        loss_seg=dict(\n            type='MaskFocalLoss',\n            use_sigmoid=True,\n            loss_weight=10.0,\n        ),\n        loss_dice=dict(\n            type='MaskDiceLoss',\n            loss_weight=1.0,\n        )\n    ),\n    model_name='SingleStage'\n)\n\n# data processing pipelines\ntrain_pipeline = [\n    dict(\n        type='VectorizeMap',\n        coords_dim=coords_dim,\n        roi_size=roi_size,\n        sample_num=num_points,\n        normalize=True,\n        permute=permute,\n    ),\n    dict(\n        type='RasterizeMap',   \n        roi_size=roi_size,\n        coords_dim=coords_dim,\n        canvas_size=canvas_size,\n        thickness=thickness,\n        semantic_mask=True,\n    ),\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='PhotoMetricDistortionMultiViewImage'),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img', 'vectors', 'semantic_mask'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# data processing pipelines\ntest_pipeline = [\n    dict(type='LoadMultiViewImagesFromFiles', to_float32=True),\n    dict(type='ResizeMultiViewImages',\n         size=img_size, # H, W\n         change_intrinsics=True,\n         ),\n    dict(type='Normalize3D', **img_norm_cfg),\n    dict(type='PadMultiViewImages', size_divisor=32),\n    dict(type='FormatBundleMap'),\n    dict(type='Collect3D', keys=['img'], meta_keys=(\n        'token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'))\n]\n\n# configs for evaluation code\n# DO NOT CHANGE\neval_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=True,\n            normalize=False,\n            roi_size=roi_size\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors',], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n\nmatch_config = dict(\n    type='NuscDataset',\n    data_root='./datasets/nuscenes',\n    ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n    meta=meta,\n    roi_size=roi_size,\n    cat2id=cat2id,\n    pipeline=[\n        dict(\n            type='VectorizeMap',\n            coords_dim=coords_dim,\n            simplify=False,\n            normalize=True,\n            roi_size=roi_size,\n            sample_num=num_points,\n        ),\n        dict(\n            type='RasterizeMap',   \n            roi_size=roi_size,\n            coords_dim=coords_dim,\n            canvas_size=canvas_size,\n            thickness=thickness,\n        ),\n        dict(type='FormatBundleMap'),\n        dict(type='Collect3D', keys=['vectors', 'semantic_mask'], meta_keys=['token', 'ego2img', 'sample_idx', 'ego2global_translation',\n        'ego2global_rotation', 'img_shape', 'scene_name'])\n    ],\n    interval=1,\n)\n\n# dataset configs\ndata = dict(\n    samples_per_gpu=batch_size,\n    workers_per_gpu=8,\n    train=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_train.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=train_pipeline,\n        seq_split_num=-2,\n        matching=True,\n        multi_frame=5,\n        sampling_span=10,\n    ),\n    val=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n    ),\n    test=dict(\n        type='NuscDataset',\n        data_root='./datasets/nuscenes',\n        ann_file='./datasets/nuscenes/nuscenes_map_infos_val.pkl',\n        meta=meta,\n        roi_size=roi_size,\n        cat2id=cat2id,\n        pipeline=test_pipeline,\n        eval_config=eval_config,\n        test_mode=True,\n        seq_split_num=1,\n    ),\n    shuffler_sampler=dict(type='DistributedGroupSampler'),\n    nonshuffler_sampler=dict(type='DistributedSampler')\n)\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=5e-4,\n    paramwise_cfg=dict(\n        custom_keys={\n            'backbone.img_backbone': dict(lr_mult=0.1),\n            'backbone.img_neck': dict(lr_mult=0.5),\n            'backbone.transformer': dict(lr_mult=0.5),\n            'backbone.positional_encoding': dict(lr_mult=0.5),\n            'seg_decoder': dict(lr_mult=0.5),\n        }),\n    weight_decay=1e-2)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy & schedule\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    min_lr_ratio=3e-3)\n\nevaluation = dict(interval=num_epochs_interval*num_iters_per_epoch)\n#evaluation = dict(interval=1) # for debugging use..\nfind_unused_parameters = True #### when use checkpoint, find_unused_parameters must be False\ncheckpoint_config = dict(interval=num_epochs_interval*num_iters_per_epoch)\n\nrunner = dict(\n    type='MyRunnerWrapper', max_iters=num_epochs * num_iters_per_epoch)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='TensorboardLoggerHook')\n    ])\n\nSyncBN = True\n\nload_from = \"work_dirs/maptracker_nusc_oldsplit_5frame_span10_stage2_warmup/latest.pth\"\n"
  },
  {
    "path": "plugin/core/apis/__init__.py",
    "content": "from .train import custom_train_model\nfrom .mmdet_train import custom_train_detector\n# from .test import custom_multi_gpu_test"
  },
  {
    "path": "plugin/core/apis/mmdet_train.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n# ---------------------------------------------\n#  Modified by Shihao Wang\n# ---------------------------------------------\nimport random\nimport warnings\n\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\nfrom mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner, IterBasedRunner, RUNNERS,\n                         Fp16OptimizerHook, OptimizerHook, build_optimizer,\n                         build_runner, get_dist_info)\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.core import EvalHook\n\nfrom mmdet.datasets import (build_dataset,\n                            replace_ImageToTensor)\nfrom mmdet.utils import get_root_logger\nimport time\nimport os.path as osp\nfrom ...datasets.builder import build_dataloader\nfrom ..evaluation.eval_hooks import CustomDistEvalHook\n\n\n@RUNNERS.register_module()\nclass MyRunnerWrapper(IterBasedRunner):\n    def train(self, data_loader, **kwargs):\n        self.model.module.num_iter = self._iter\n        self.model.train()\n        self.mode = 'train'\n        self.data_loader = data_loader\n        self._epoch = data_loader.epoch\n        self.model.module.num_epoch = self._epoch\n        data_batch = next(data_loader)\n        self.call_hook('before_train_iter')\n        outputs = self.model.train_step(data_batch, self.optimizer, **kwargs)\n        if not isinstance(outputs, dict):\n            raise TypeError('model.train_step() must return a dict')\n        if 'log_vars' in outputs:\n            self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])\n        self.outputs = outputs\n        self.call_hook('after_train_iter')\n        self._inner_iter += 1\n        self._iter += 1\n\n\ndef custom_train_detector(model,\n                   dataset,\n                   cfg,\n                   distributed=False,\n                   validate=False,\n                   timestamp=None,\n                   eval_model=None,\n                   meta=None):\n    logger = get_root_logger(cfg.log_level)\n\n    # prepare data loaders\n   \n    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]\n    #assert len(dataset)==1s\n    if 'imgs_per_gpu' in cfg.data:\n        logger.warning('\"imgs_per_gpu\" is deprecated in MMDet V2.0. '\n                       'Please use \"samples_per_gpu\" instead')\n        if 'samples_per_gpu' in cfg.data:\n            logger.warning(\n                f'Got \"imgs_per_gpu\"={cfg.data.imgs_per_gpu} and '\n                f'\"samples_per_gpu\"={cfg.data.samples_per_gpu}, \"imgs_per_gpu\"'\n                f'={cfg.data.imgs_per_gpu} is used in this experiments')\n        else:\n            logger.warning(\n                'Automatically set \"samples_per_gpu\"=\"imgs_per_gpu\"='\n                f'{cfg.data.imgs_per_gpu} in this experiments')\n        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu\n\n    data_loaders = [\n        build_dataloader(\n            ds,\n            cfg.data.samples_per_gpu,\n            cfg.data.workers_per_gpu,\n            # cfg.gpus will be ignored if distributed\n            len(cfg.gpu_ids),\n            dist=distributed,\n            seed=cfg.seed,\n            shuffler_sampler=cfg.data.shuffler_sampler,  # dict(type='DistributedGroupSampler'),\n            nonshuffler_sampler=cfg.data.nonshuffler_sampler,  # dict(type='DistributedSampler'),\n            runner_type=cfg.runner,\n        ) for ds in dataset\n    ]\n\n    # put model on gpus\n    if distributed:\n        find_unused_parameters = cfg.get('find_unused_parameters', False)\n        # Sets the `find_unused_parameters` parameter in\n        # torch.nn.parallel.DistributedDataParallel\n        model = MMDistributedDataParallel(\n            model.cuda(),\n            device_ids=[torch.cuda.current_device()],\n            broadcast_buffers=False,\n            find_unused_parameters=find_unused_parameters)\n        if eval_model is not None:\n            eval_model = MMDistributedDataParallel(\n                eval_model.cuda(),\n                device_ids=[torch.cuda.current_device()],\n                broadcast_buffers=False,\n                find_unused_parameters=find_unused_parameters)\n    else:\n        model = MMDataParallel(\n            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)\n        if eval_model is not None:\n            eval_model = MMDataParallel(\n                eval_model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)\n\n\n    # build runner\n    optimizer = build_optimizer(model, cfg.optimizer)\n\n    if 'runner' not in cfg:\n        cfg.runner = {\n            'type': 'EpochBasedRunner',\n            'max_epochs': cfg.total_epochs\n        }\n        warnings.warn(\n            'config is now expected to have a `runner` section, '\n            'please set `runner` in your config.', UserWarning)\n    else:\n        if 'total_epochs' in cfg:\n            assert cfg.total_epochs == cfg.runner.max_epochs\n    if eval_model is not None:\n        runner = build_runner(\n            cfg.runner,\n            default_args=dict(\n                model=model,\n                eval_model=eval_model,\n                optimizer=optimizer,\n                work_dir=cfg.work_dir,\n                logger=logger,\n                meta=meta))\n    else:\n        runner = build_runner(\n            cfg.runner,\n            default_args=dict(\n                model=model,\n                optimizer=optimizer,\n                work_dir=cfg.work_dir,\n                logger=logger,\n                meta=meta))\n\n    # an ugly workaround to make .log and .log.json filenames the same\n    runner.timestamp = timestamp\n\n    # fp16 setting\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        optimizer_config = Fp16OptimizerHook(\n            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)\n    elif distributed and 'type' not in cfg.optimizer_config:\n        optimizer_config = OptimizerHook(**cfg.optimizer_config)\n    else:\n        optimizer_config = cfg.optimizer_config\n\n    # register hooks\n    runner.register_training_hooks(cfg.lr_config, optimizer_config,\n                                   cfg.checkpoint_config, cfg.log_config,\n                                   cfg.get('momentum_config', None))\n    \n    # register profiler hook\n    #trace_config = dict(type='tb_trace', dir_name='work_dir')\n    #profiler_config = dict(on_trace_ready=trace_config)\n    #runner.register_profiler_hook(profiler_config)\n    \n    if distributed:\n        if isinstance(runner, EpochBasedRunner):\n            runner.register_hook(DistSamplerSeedHook())\n\n    # register eval hooks\n    if validate:\n        # Support batch_size > 1 in validation\n        val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)\n        if val_samples_per_gpu > 1:\n            assert False\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.val.pipeline = replace_ImageToTensor(\n                cfg.data.val.pipeline)\n        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))\n        \n        val_dataloader = build_dataloader(\n            val_dataset,\n            samples_per_gpu=val_samples_per_gpu,\n            workers_per_gpu=cfg.data.workers_per_gpu,\n            dist=distributed,\n            shuffle=False,\n            shuffler_sampler=cfg.data.shuffler_sampler,  # dict(type='DistributedGroupSampler'),\n            nonshuffler_sampler=cfg.data.nonshuffler_sampler,  # dict(type='DistributedSampler'),\n        )\n        eval_cfg = cfg.get('evaluation', {})\n        #eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'\n        eval_cfg['by_epoch'] = not isinstance(runner, IterBasedRunner)\n        eval_cfg['jsonfile_prefix'] = osp.join('val', cfg.work_dir, time.ctime().replace(' ','_').replace(':','_'))\n        eval_hook = CustomDistEvalHook if distributed else EvalHook\n\n        runner.register_hook(eval_hook(val_dataloader, **eval_cfg), priority='LOW')\n\n    # user-defined hooks\n    if cfg.get('custom_hooks', None):\n        custom_hooks = cfg.custom_hooks\n        assert isinstance(custom_hooks, list), \\\n            f'custom_hooks expect list type, but got {type(custom_hooks)}'\n        for hook_cfg in cfg.custom_hooks:\n            assert isinstance(hook_cfg, dict), \\\n                'Each item in custom_hooks expects dict type, but got ' \\\n                f'{type(hook_cfg)}'\n            hook_cfg = hook_cfg.copy()\n            priority = hook_cfg.pop('priority', 'NORMAL')\n            hook = build_from_cfg(hook_cfg, HOOKS)\n            runner.register_hook(hook, priority=priority)\n\n    if cfg.resume_from:\n        runner.resume(cfg.resume_from)\n    elif cfg.load_from:\n        runner.load_checkpoint(cfg.load_from)\n    runner.run(data_loaders, cfg.workflow)\n\n"
  },
  {
    "path": "plugin/core/apis/test.py",
    "content": "# ---------------------------------------------\r\n# Copyright (c) OpenMMLab. All rights reserved.\r\n# ---------------------------------------------\r\n#  Modified by Zhiqi Li\r\n# ---------------------------------------------\r\nimport os.path as osp\r\nimport pickle\r\nimport shutil\r\nimport tempfile\r\nimport time\r\n\r\nimport mmcv\r\nimport torch\r\nimport torch.distributed as dist\r\nfrom mmcv.image import tensor2imgs\r\nfrom mmcv.runner import get_dist_info\r\n\r\nfrom mmdet.core import encode_mask_results\r\n\r\n\r\nimport mmcv\r\nimport numpy as np\r\nimport pycocotools.mask as mask_util\r\n\r\ndef custom_encode_mask_results(mask_results):\r\n    \"\"\"Encode bitmap mask to RLE code. Semantic Masks only\r\n    Args:\r\n        mask_results (list | tuple[list]): bitmap mask results.\r\n            In mask scoring rcnn, mask_results is a tuple of (segm_results,\r\n            segm_cls_score).\r\n    Returns:\r\n        list | tuple: RLE encoded mask.\r\n    \"\"\"\r\n    cls_segms = mask_results\r\n    num_classes = len(cls_segms)\r\n    encoded_mask_results = []\r\n    for i in range(len(cls_segms)):\r\n        encoded_mask_results.append(\r\n            mask_util.encode(\r\n                np.array(\r\n                    cls_segms[i][:, :, np.newaxis], order='F',\r\n                        dtype='uint8'))[0])  # encoded with RLE\r\n    return [encoded_mask_results]\r\n\r\ndef custom_multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):\r\n    \"\"\"Test model with multiple gpus.\r\n    This method tests model with multiple gpus and collects the results\r\n    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'\r\n    it encodes results to gpu tensors and use gpu communication for results\r\n    collection. On cpu mode it saves the results on different gpus to 'tmpdir'\r\n    and collects them by the rank 0 worker.\r\n    Args:\r\n        model (nn.Module): Model to be tested.\r\n        data_loader (nn.Dataloader): Pytorch data loader.\r\n        tmpdir (str): Path of directory to save the temporary results from\r\n            different gpus under cpu mode.\r\n        gpu_collect (bool): Option to use either gpu or cpu to collect results.\r\n    Returns:\r\n        list: The prediction results.\r\n    \"\"\"\r\n    model.eval()\r\n    bbox_results = []\r\n    mask_results = []\r\n    dataset = data_loader.dataset\r\n    rank, world_size = get_dist_info()\r\n    if rank == 0:\r\n        prog_bar = mmcv.ProgressBar(len(dataset))\r\n    time.sleep(2)  # This line can prevent deadlock problem in some cases.\r\n    have_mask = False\r\n    for i, data in enumerate(data_loader):\r\n        with torch.no_grad():\r\n            result = model(return_loss=False, rescale=True, **data)\r\n            # encode mask results\r\n            if isinstance(result, dict):\r\n                if 'bbox_results' in result.keys():\r\n                    bbox_result = result['bbox_results']\r\n                    batch_size = len(result['bbox_results'])\r\n                    bbox_results.extend(bbox_result)\r\n                if 'mask_results' in result.keys() and result['mask_results'] is not None:\r\n                    mask_result = custom_encode_mask_results(result['mask_results'])\r\n                    mask_results.extend(mask_result)\r\n                    have_mask = True\r\n            else:\r\n                batch_size = len(result)\r\n                bbox_results.extend(result)\r\n\r\n        if rank == 0:\r\n            \r\n            for _ in range(batch_size * world_size):\r\n                prog_bar.update()\r\n\r\n    # collect results from all ranks\r\n    if gpu_collect:\r\n        bbox_results = collect_results_gpu(bbox_results, len(dataset))\r\n        if have_mask:\r\n            mask_results = collect_results_gpu(mask_results, len(dataset))\r\n        else:\r\n            mask_results = None\r\n    else:\r\n        bbox_results = collect_results_cpu(bbox_results, len(dataset), tmpdir)\r\n        tmpdir = tmpdir+'_mask' if tmpdir is not None else None\r\n        if have_mask:\r\n            mask_results = collect_results_cpu(mask_results, len(dataset), tmpdir)\r\n        else:\r\n            mask_results = None\r\n\r\n    if mask_results is None:\r\n        return bbox_results\r\n    return {'bbox_results': bbox_results, 'mask_results': mask_results}\r\n\r\n\r\ndef collect_results_cpu(result_part, size, tmpdir=None):\r\n    rank, world_size = get_dist_info()\r\n    # create a tmp dir if it is not specified\r\n    if tmpdir is None:\r\n        MAX_LEN = 512\r\n        # 32 is whitespace\r\n        dir_tensor = torch.full((MAX_LEN, ),\r\n                                32,\r\n                                dtype=torch.uint8,\r\n                                device='cuda')\r\n        if rank == 0:\r\n            mmcv.mkdir_or_exist('.dist_test')\r\n            tmpdir = tempfile.mkdtemp(dir='.dist_test')\r\n            tmpdir = torch.tensor(\r\n                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')\r\n            dir_tensor[:len(tmpdir)] = tmpdir\r\n        dist.broadcast(dir_tensor, 0)\r\n        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()\r\n    else:\r\n        mmcv.mkdir_or_exist(tmpdir)\r\n    # dump the part result to the dir\r\n    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))\r\n    dist.barrier()\r\n    # collect all parts\r\n    if rank != 0:\r\n        return None\r\n    else:\r\n        # load results of all parts from tmp dir\r\n        part_list = []\r\n        for i in range(world_size):\r\n            part_file = osp.join(tmpdir, f'part_{i}.pkl')\r\n            part_list.append(mmcv.load(part_file))\r\n        # sort the results\r\n        ordered_results = []\r\n        '''\r\n        bacause we change the sample of the evaluation stage to make sure that each gpu will handle continuous sample,\r\n        '''\r\n        #for res in zip(*part_list):\r\n        for res in part_list:  \r\n            ordered_results.extend(list(res))\r\n        # the dataloader may pad some samples\r\n        print(f'\\ntruncate {size} samples from {len(ordered_results)}')\r\n        ordered_results = ordered_results[:size]\r\n        # remove tmp dir\r\n        shutil.rmtree(tmpdir)\r\n        return ordered_results\r\n\r\n\r\ndef collect_results_gpu(result_part, size):\r\n    collect_results_cpu(result_part, size)"
  },
  {
    "path": "plugin/core/apis/train.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n# ---------------------------------------------\n#  Modified by Shihao Wang\n# ---------------------------------------------\n\nfrom .mmdet_train import custom_train_detector\nfrom mmseg.apis import train_segmentor\nfrom mmdet.apis import train_detector\n\ndef custom_train_model(model,\n                dataset,\n                cfg,\n                distributed=False,\n                validate=False,\n                timestamp=None,\n                eval_model=None,\n                meta=None):\n    \"\"\"A function wrapper for launching model training according to cfg.\n\n    Because we need different eval_hook in runner. Should be deprecated in the\n    future.\n    \"\"\"\n    if cfg.model.type in ['EncoderDecoder3D']:\n        assert False\n    else:\n        custom_train_detector(\n            model,\n            dataset,\n            cfg,\n            distributed=distributed,\n            validate=validate,\n            timestamp=timestamp,\n            eval_model=eval_model,\n            meta=meta)\n\n\ndef train_model(model,\n                dataset,\n                cfg,\n                distributed=False,\n                validate=False,\n                timestamp=None,\n                meta=None):\n    \"\"\"A function wrapper for launching model training according to cfg.\n\n    Because we need different eval_hook in runner. Should be deprecated in the\n    future.\n    \"\"\"\n    if cfg.model.type in ['EncoderDecoder3D']:\n        train_segmentor(\n            model,\n            dataset,\n            cfg,\n            distributed=distributed,\n            validate=validate,\n            timestamp=timestamp,\n            meta=meta)\n    else:\n        train_detector(\n            model,\n            dataset,\n            cfg,\n            distributed=distributed,\n            validate=validate,\n            timestamp=timestamp,\n            meta=meta)\n"
  },
  {
    "path": "plugin/core/evaluation/__init__.py",
    "content": "from .eval_hooks import CustomDistEvalHook"
  },
  {
    "path": "plugin/core/evaluation/eval_hooks.py",
    "content": "\n# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,\n# in order to avoid strong version dependency, we did not directly\n# inherit EvalHook but BaseDistEvalHook.\n\nimport bisect\nimport os.path as osp\n\nimport mmcv\nimport torch.distributed as dist\nfrom mmcv.runner import DistEvalHook as BaseDistEvalHook\nfrom mmcv.runner import EvalHook as BaseEvalHook\nfrom torch.nn.modules.batchnorm import _BatchNorm\nfrom mmdet.core.evaluation.eval_hooks import DistEvalHook\n\n\ndef _calc_dynamic_intervals(start_interval, dynamic_interval_list):\n    assert mmcv.is_list_of(dynamic_interval_list, tuple)\n\n    dynamic_milestones = [0]\n    dynamic_milestones.extend(\n        [dynamic_interval[0] for dynamic_interval in dynamic_interval_list])\n    dynamic_intervals = [start_interval]\n    dynamic_intervals.extend(\n        [dynamic_interval[1] for dynamic_interval in dynamic_interval_list])\n    return dynamic_milestones, dynamic_intervals\n\n\nclass CustomDistEvalHook(BaseDistEvalHook):\n\n    def __init__(self, *args, dynamic_intervals=None,  **kwargs):\n        super(CustomDistEvalHook, self).__init__(*args, **kwargs)\n        self.use_dynamic_intervals = dynamic_intervals is not None\n        if self.use_dynamic_intervals:\n            self.dynamic_milestones, self.dynamic_intervals = \\\n                _calc_dynamic_intervals(self.interval, dynamic_intervals)\n\n    def _decide_interval(self, runner):\n        if self.use_dynamic_intervals:\n            progress = runner.epoch if self.by_epoch else runner.iter\n            step = bisect.bisect(self.dynamic_milestones, (progress + 1))\n            # Dynamically modify the evaluation interval\n            self.interval = self.dynamic_intervals[step - 1]\n\n    def before_train_epoch(self, runner):\n        \"\"\"Evaluate the model only at the start of training by epoch.\"\"\"\n        self._decide_interval(runner)\n        super().before_train_epoch(runner)\n\n    def before_train_iter(self, runner):\n        self._decide_interval(runner)\n        super().before_train_iter(runner)\n\n    def _do_evaluate(self, runner):\n        \"\"\"perform evaluation and save ckpt.\"\"\"\n        # Synchronization of BatchNorm's buffer (running_mean\n        # and running_var) is not supported in the DDP of pytorch,\n        # which may cause the inconsistent performance of models in\n        # different ranks, so we broadcast BatchNorm's buffers\n        # of rank 0 to other ranks to avoid this.\n        if self.broadcast_bn_buffer:\n            model = runner.model\n            for name, module in model.named_modules():\n                if isinstance(module,\n                              _BatchNorm) and module.track_running_stats:\n                    dist.broadcast(module.running_var, 0)\n                    dist.broadcast(module.running_mean, 0)\n\n        if not self._should_evaluate(runner):\n            return\n\n        tmpdir = self.tmpdir\n        if tmpdir is None:\n            tmpdir = osp.join(runner.work_dir, '.eval_hook')\n\n        from ..apis.test import custom_multi_gpu_test # to solve circlur  import\n\n        results = custom_multi_gpu_test(\n            runner.model,\n            self.dataloader,\n            tmpdir=tmpdir,\n            gpu_collect=self.gpu_collect)\n        \n        if runner.rank == 0:\n            print('\\n')\n            runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)\n\n            key_score = self.evaluate(runner, results)\n\n            if self.save_best:\n                self._save_ckpt(runner, key_score)\n  \n"
  },
  {
    "path": "plugin/datasets/__init__.py",
    "content": "from .pipelines import *\nfrom .argo_dataset import AV2Dataset\nfrom .nusc_dataset import NuscDataset\n"
  },
  {
    "path": "plugin/datasets/argo_dataset.py",
    "content": "from .base_dataset import BaseMapDataset\nfrom .map_utils.av2map_extractor import AV2MapExtractor\nfrom mmdet.datasets import DATASETS\nimport numpy as np\nfrom .visualize.renderer import Renderer\nfrom time import time\nimport mmcv\nfrom pyquaternion import Quaternion\n\nimport pickle\nimport os\n\n\n@DATASETS.register_module()\nclass AV2Dataset(BaseMapDataset):\n    \"\"\"Argoverse2 map dataset class.\n\n    Args:\n        ann_file (str): annotation file path\n        cat2id (dict): category to class id\n        roi_size (tuple): bev range\n        eval_config (Config): evaluation config\n        meta (dict): meta information\n        pipeline (Config): data processing pipeline config,\n        interval (int): annotation load interval\n        work_dir (str): path to work dir\n        test_mode (bool): whether in test mode\n    \"\"\"\n\n    def __init__(self, **kwargs,):\n        super().__init__(**kwargs)\n        self.map_extractor = AV2MapExtractor(self.roi_size, self.id2map)\n\n        self.renderer = Renderer(self.cat2id, self.roi_size, 'av2')\n    \n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations.\n        \"\"\"\n        \n        start_time = time()\n        ann = mmcv.load(ann_file)\n        self.id2map = ann['id2map']\n        samples = ann['samples']\n\n        if 'newsplit' not in ann_file:\n            if 'val' in ann_file:\n                # For the old split testing, we make sure that the test set matches exactly with the MapTR codebase\n                # NOTE: simply sort&sampling will produce slightly different results compared to MapTR's samples\n                # so we have to directly use the saved meta information from MapTR codebase to get the samples\n                maptr_meta_path = os.path.join(os.path.dirname(ann_file), 'maptrv2_val_samples_info.pkl')\n                with open(maptr_meta_path, 'rb') as f:\n                    maptr_meta = pickle.load(f)\n                maptr_unique_tokens = [x['token'] for x in maptr_meta['samples_meta']]\n\n                unique_token2samples = {}\n                for sample in samples:\n                    unique_token2samples[f'{sample[\"log_id\"]}_{sample[\"token\"]}'] = sample\n\n                samples = [unique_token2samples[x] for x in maptr_unique_tokens]\n            else:\n                # For the old split training, we follow MapTR's data loading, which\n                # sorts the samples based on the token, then do sub-sampling\n                samples = list(sorted(samples, key=lambda e: e['token']))\n                samples = samples[::self.interval]\n        else:\n            # For the new split, we simply follow StreamMapNet, do not sort based on the token\n            # In this way, the intervals between consecutive frames are uniform...\n            samples = samples[::self.interval]\n\n        # Since the sorted order copied from MapTR does not strictly enforce that\n        # samples of the same scene are consecutive, need to re-arrange\n        scene_name2idx = {}\n        for idx, sample in enumerate(samples):\n            scene = sample['log_id']\n            if scene not in scene_name2idx:\n                scene_name2idx[scene] = []\n            scene_name2idx[scene].append(idx)\n\n        samples_rearrange = []\n        for scene_name in scene_name2idx:\n            scene_sample_ids = scene_name2idx[scene_name]\n            for sample_id in scene_sample_ids:\n                samples_rearrange.append(samples[sample_id])\n        \n        samples = samples_rearrange\n\n        print(f'collected {len(samples)} samples in {(time() - start_time):.2f}s')\n        self.samples = samples\n\n    def load_matching(self, matching_file):\n        with open(matching_file, 'rb') as pf:\n            data = pickle.load(pf)\n        total_samples = 0\n        for scene_name, info in data.items():\n            total_samples += len(info['sample_ids'])\n\n        assert total_samples == len(self.samples), 'Matching info not matched with data samples'\n        self.matching_meta = data\n        print(f'loaded matching meta for {len(data)} scenes')\n\n    def get_sample(self, idx):\n        \"\"\"Get data sample. For each sample, map extractor will be applied to extract \n        map elements. \n\n        Args:\n            idx (int): data index\n\n        Returns:\n            result (dict): dict of input\n        \"\"\"\n\n        sample = self.samples[idx]\n        log_id = sample['log_id']\n        map_geoms = self.map_extractor.get_map_geom(log_id, sample['e2g_translation'], \n                sample['e2g_rotation'])\n\n        map_label2geom = {}\n        for k, v in map_geoms.items():\n            if k in self.cat2id.keys():\n                map_label2geom[self.cat2id[k]] = v\n        \n        ego2img_rts = []\n        for c in sample['cams'].values():\n            extrinsic, intrinsic = np.array(\n                c['extrinsics']), np.array(c['intrinsics'])\n            ego2cam_rt = extrinsic\n            viewpad = np.eye(4)\n            viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic\n            ego2cam_rt = (viewpad @ ego2cam_rt)\n            ego2img_rts.append(ego2cam_rt)\n\n\n        # pdb.set_trace()\n\n        input_dict = {\n            'token': sample['token'],\n            'img_filenames': [c['img_fpath'] for c in sample['cams'].values()],\n            # intrinsics are 3x3 Ks\n            'cam_intrinsics': [c['intrinsics'] for c in sample['cams'].values()],\n            # extrinsics are 4x4 tranform matrix, NOTE: **ego2cam**\n            'cam_extrinsics': [c['extrinsics'] for c in sample['cams'].values()],\n            'ego2img': ego2img_rts,\n            'map_geoms': map_label2geom, # {0: List[ped_crossing(LineString)], 1: ...}\n            'ego2global_translation': sample['e2g_translation'], \n            'ego2global_rotation': sample['e2g_rotation'].tolist(),\n            'sample_idx': sample['modified_sample_idx'],\n            'scene_name': sample['scene_name'],\n            'lidar_path': sample['lidar_fpath']\n        }\n\n        return input_dict"
  },
  {
    "path": "plugin/datasets/base_dataset.py",
    "content": "import numpy as np\nimport os\nimport os.path as osp\nimport mmcv\nfrom .evaluation.raster_eval import RasterEvaluate\nfrom .evaluation.vector_eval import VectorEvaluate\nfrom mmdet3d.datasets.pipelines import Compose\nfrom mmdet.datasets import DATASETS\nfrom torch.utils.data import Dataset\nfrom mmcv.parallel import DataContainer as DC\nimport warnings\nimport pickle\n\n\nwarnings.filterwarnings(\"ignore\")\n\n@DATASETS.register_module()\nclass BaseMapDataset(Dataset):\n    \"\"\"Map dataset base class.\n\n    Args:\n        ann_file (str): annotation file path\n        cat2id (dict): category to class id\n        roi_size (tuple): bev range\n        eval_config (Config): evaluation config\n        meta (dict): meta information\n        pipeline (Config): data processing pipeline config,\n        interval (int): annotation load interval\n        work_dir (str): path to work dir\n        test_mode (bool): whether in test mode\n    \"\"\"\n    def __init__(self, \n                 ann_file,\n                 cat2id,\n                 roi_size,\n                 meta,\n                 pipeline,\n                 interval=1,\n                 seq_split_num=1,\n                 work_dir=None,\n                 eval_config=None,\n                 test_mode=False,\n                 multi_frame=False,\n                 sampling_span=10,\n                 matching=False,\n                 eval_semantic=False,\n        ):\n        super().__init__()\n        self.ann_file = ann_file\n        self.multi_frame = multi_frame\n        self.sampling_span = sampling_span\n        self.matching = matching\n        \n        self.meta = meta\n        self.classes = list(cat2id.keys())\n        self.num_classes = len(self.classes)\n        self.cat2id = cat2id\n        self.interval = interval\n        self.seq_split_num = seq_split_num\n        self.eval_semantic = eval_semantic\n\n        self.load_annotations(self.ann_file)\n\n        if matching:\n            assert self.multi_frame, 'The matching info has to loaded under the multi-frame setting'\n            self.matching_file = ann_file[:-4] + '_gt_tracks.pkl'\n            assert os.path.isfile(self.matching_file)\n            self.load_matching(self.matching_file)\n        \n        self.idx2token = {}\n        for i, s in enumerate(self.samples):\n            self.idx2token[i] = s['token']\n        self.token2idx = {v: k for k, v in self.idx2token.items()}\n\n        if pipeline is not None:\n            self.pipeline = Compose(pipeline)\n        else:\n            self.pipeline = None\n        \n        # dummy flags to fit with mmdet dataset\n        self.flag = np.zeros(len(self), dtype=np.uint8)\n\n        self.roi_size = roi_size\n        \n        self.work_dir = work_dir\n        self.eval_config = eval_config\n        if self.eval_config is not None:\n            assert test_mode, \"eval_config is valid only in test_mode\"\n\n        # record the sequence information, prepare for two-frame data loading\n        self._set_sequence_info()\n        \n        self._set_sequence_group_flag()\n        \n    \n    def _set_sequence_info(self):\n        \"\"\"Compute and record the sequence id and local index of each sample\n        \"\"\"\n        scene_name2idx = {}\n        for idx, sample in enumerate(self.samples):\n            self.samples[idx]['modified_sample_idx'] = idx\n            scene = sample['scene_name']\n            if scene not in scene_name2idx:\n                scene_name2idx[scene] = []\n                self.samples[idx]['prev'] = -1\n\n            scene_name2idx[scene].append(idx)\n        self.scene_name2idx = scene_name2idx\n\n        print('Prepare sequence information for {}'.format(self.ann_file))\n        idx2scene = {}\n        for scene_name, scene_info in scene_name2idx.items():\n            for local_idx, global_idx in enumerate(scene_info):\n                idx2scene[global_idx] = (scene_name, local_idx, len(scene_info))\n        self.idx2scene = idx2scene\n\n    def _set_sequence_group_flag(self):\n        \"\"\"\n        Set each sequence to be a different group\n        \"\"\"\n        if self.seq_split_num == -1:\n            self.flag = np.arange(len(self.samples))\n            return\n        elif self.seq_split_num == -2:\n            return\n        \n        res = []\n\n        curr_sequence = -1\n        for idx in range(len(self.samples)):\n            if self.samples[idx]['prev'] == -1:\n                # new sequence\n                curr_sequence += 1\n            res.append(curr_sequence)\n\n        self.flag = np.array(res, dtype=np.int64)\n\n        if self.seq_split_num != 1:\n            bin_counts = np.bincount(self.flag)\n            new_flags = []\n            curr_new_flag = 0\n            for curr_flag in range(len(bin_counts)):\n                seq_length = int(round(bin_counts[curr_flag] / self.seq_split_num))\n                curr_sequence_length = list(range(0, bin_counts[curr_flag], seq_length)) + [bin_counts[curr_flag]]\n                \n                # if left one sample, put it into the last sequence\n                if curr_sequence_length[-1] - curr_sequence_length[-2] <= 1:\n                    curr_sequence_length = curr_sequence_length[:-2] + [curr_sequence_length[-1]]\n                \n                curr_sequence_length = np.array(curr_sequence_length)\n\n                for sub_seq_idx in (curr_sequence_length[1:] - curr_sequence_length[:-1]):\n                    for _ in range(sub_seq_idx):\n                        new_flags.append(curr_new_flag)\n                    curr_new_flag += 1\n\n            assert len(new_flags) == len(self.flag)\n            # assert len(np.bincount(new_flags)) == len(np.bincount(self.flag)) * self.seq_split_num\n            self.flag = np.array(new_flags, dtype=np.int64)\n\n    def load_annotations(self, ann_file):\n        raise NotImplementedError\n    \n    def load_matching(self, matching_file):\n        raise NotImplementedError\n\n    def get_sample(self, idx):\n        raise NotImplementedError\n\n    def format_results(self, results, denormalize=True, prefix=None, save_semantic=False):\n        '''Format prediction result to submission format.\n        \n        Args:\n            results (list[Tensor]): List of prediction results.\n            denormalize (bool): whether to denormalize prediction from (0, 1) \\\n                to bev range. Default: True\n            prefix (str): work dir prefix to save submission file.\n\n        Returns:\n            dict: Evaluation results\n        '''\n\n        meta = self.meta\n        output_format = meta['output_format']\n        submissions = {\n            'meta': meta,\n            'results': {},\n        }\n\n        if output_format == 'raster':\n            for pred in results:\n                single_case = {}\n                token = pred['token']\n                pred_map = pred['semantic_mask']\n                pred_bool = pred_map > 0\n                single_case['semantic_mask'] = pred_bool.bool()\n                submissions['results'][token] = single_case\n            \n            # Use pickle format to minimize submission file size.\n            out_path = osp.join(prefix, 'submission_raster.pkl')\n            print(f'saving submissions results to {out_path}')\n            os.makedirs(os.path.dirname(out_path), exist_ok=True)\n            mmcv.dump(submissions, out_path)\n            return out_path\n\n        elif output_format == 'vector':\n            all_pos_results = []\n            for pred in results:\n                '''\n                For each case, the result should be formatted as Dict{'vectors': [], 'scores': [], 'labels': []}\n                'vectors': List of vector, each vector is a array([[x1, y1], [x2, y2] ...]),\n                    contain all vectors predicted in this sample.\n                'scores: List of score(float), \n                    contain scores of all instances in this sample.\n                'labels': List of label(int), \n                    contain labels of all instances in this sample.\n                '''\n                if pred is None: # empty prediction\n                    continue\n                \n                single_case = {'vectors': [], 'scores': [], 'labels': [], 'props': [],\n                        'track_vectors': [], 'track_scores': [], 'track_labels': []}\n                token = pred['token']\n                roi_size = np.array(self.roi_size)\n                origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])\n                \n                # save the extra semantic info\n                if save_semantic:\n                    single_case['semantic_mask'] = pred['semantic_mask'].tolist()\n\n                if 'scores' in pred:\n                    for i in range(len(pred['scores'])):\n                        score = pred['scores'][i]\n                        label = pred['labels'][i]\n                        vector = pred['vectors'][i]\n                        prop = pred['props'][i]\n\n                        # A line should have >=2 points\n                        if len(vector) < 2:\n                            continue\n                        \n                        if denormalize:\n                            eps = 1e-5\n                            vector = vector * (roi_size + eps) + origin\n\n                        single_case['vectors'].append(vector)\n                        single_case['scores'].append(score)\n                        single_case['labels'].append(label)\n                        single_case['props'].append(prop)\n                \n                if 'track_scores' in pred:\n                # also save the tracking information for analyzing\n                    for i in range(len(pred['track_scores'])):\n                        score = pred['track_scores'][i]\n                        label = pred['track_labels'][i]\n                        vector = pred['track_vectors'][i]\n                        if denormalize:\n                            eps = 1e-5\n                            vector = vector * (roi_size + eps) + origin\n                        single_case['track_vectors'].append(vector)\n                        single_case['track_scores'].append(score)\n                        single_case['track_labels'].append(label)\n\n                submissions['results'][token] = single_case\n                \n                if not self.eval_semantic:\n                    pos_results = pred['pos_results']\n                    pos_vectors = pos_results['vectors']\n                    if denormalize and len(pos_vectors) > 0:\n                        pos_vectors = pos_vectors.reshape(pos_vectors.shape[0], -1, 2)\n                        pos_vectors = (pos_vectors * roi_size + origin).reshape(pos_vectors.shape[0], -1)\n                    save_pos_results = {\n                        'vectors': pos_vectors,\n                        'labels': pos_results['labels'],\n                        'scores': pos_results['scores'],\n                        'scene_name': pos_results['scene_name'],\n                        'local_idx': pos_results['local_idx'],\n                        'global_ids': pos_results['global_ids'],\n                        'meta': pred['meta']\n                    }\n                    all_pos_results.append(save_pos_results)\n                \n            out_path = osp.join(prefix, 'submission_vector.json')\n            print(f'saving submissions results to {out_path}')\n            os.makedirs(os.path.dirname(out_path), exist_ok=True)\n            mmcv.dump(submissions, out_path)\n\n            if not self.eval_semantic:\n                out_path_pos = osp.join(prefix, 'pos_predictions.pkl')\n                with open(out_path_pos, 'wb') as f:\n                    pickle.dump(all_pos_results, f, protocol=pickle.HIGHEST_PROTOCOL)\n            \n            return out_path\n\n        else:\n            raise ValueError(\"output format must be either \\'raster\\' or \\'vector\\'\")\n\n    def evaluate(self, results, logger=None, **kwargs):\n        '''Evaluate prediction result based on `output_format` specified by dataset.\n\n        Args:\n            results (list[Tensor]): List of prediction results.\n            logger (logger): logger to print evaluation results.\n\n        Returns:\n            dict: Evaluation results.\n        '''\n        print('len of the results', len(results))\n\n        eval_semantic = True if (hasattr(self, 'eval_semantic') and self.eval_semantic) else False\n        save_semantic = True if 'save_semantic' in kwargs and kwargs['save_semantic'] or eval_semantic \\\n                            else False\n        \n        result_path = self.format_results(results, denormalize=True, \n                        prefix=self.work_dir, save_semantic=save_semantic)\n\n        return self._evaluate(result_path, logger=logger, eval_semantic=eval_semantic)\n    \n    def _evaluate(self, result_path, logger=None, eval_semantic=False):\n        if not eval_semantic:\n            self.evaluator = VectorEvaluate(self.eval_config)\n        else:\n            self.evaluator = RasterEvaluate(self.eval_config)\n        result_dict = self.evaluator.evaluate(result_path, logger=logger)\n        return result_dict\n\n    def show_gt(self, idx, out_dir='demo/'):\n        '''Visualize ground-truth.\n\n        Args:\n            idx (int): index of sample.\n            out_dir (str): output directory.\n        '''\n\n        from mmcv.parallel import DataContainer\n        from copy import deepcopy\n        sample = self.get_sample(idx)\n        sample = deepcopy(sample)\n        data = self.pipeline(sample)\n\n        #imgs = [mmcv.imread(i) for i in sample['img_filenames']]\n        #cam_extrinsics = sample['cam_extrinsics']\n        #cam_intrinsics = sample['cam_intrinsics']\n\n        if 'vectors' in data:\n            vectors = data['vectors']\n            if isinstance(vectors, DataContainer):\n                vectors = vectors.data\n\n            self.renderer.render_bev_from_vectors(vectors, out_dir)\n            #self.renderer.render_camera_views_from_vectors(vectors, imgs, \n            #    cam_extrinsics, cam_intrinsics, 2, out_dir)\n\n        if 'semantic_mask' in data:\n            semantic_mask = data['semantic_mask']\n            if isinstance(semantic_mask, DataContainer):\n                semantic_mask = semantic_mask.data\n            \n            self.renderer.render_bev_from_mask(semantic_mask, out_dir, flip=True)\n\n    def show_result(self, submission, idx, score_thr=0, draw_score=False, show_semantic=False, out_dir='demo/'):\n        '''Visualize prediction result.\n\n        Args:\n            idx (int): index of sample.\n            submission (dict): prediction results.\n            score_thr (float): threshold to filter prediction results.\n            out_dir (str): output directory.\n        '''\n\n        meta = submission['meta']\n        output_format = meta['output_format']\n        token = self.idx2token[idx]\n        results = submission['results'][token]\n        sample = self.get_sample(idx)\n\n\n        if 'semantic_mask' in results and show_semantic:\n            semantic_mask = np.array(results['semantic_mask'])\n            self.renderer.render_bev_from_mask(semantic_mask, out_dir, flip=False)\n        \n        if output_format == 'vector' and 'scores' in results:\n            vectors = {label: [] for label in self.cat2id.values()}\n            for i in range(len(results['labels'])):\n                score = results['scores'][i]\n                label = results['labels'][i]\n                prop = results['props'][i]\n                v = results['vectors'][i]\n\n                if score > score_thr:\n                    if draw_score:\n                        vectors[label].append((v, score, prop))\n                    else:\n                        vectors[label].append(v)\n\n            self.renderer.render_bev_from_vectors(vectors, out_dir, draw_scores=draw_score)\n\n            # For projecting and visualizing results on perspective images\n            #imgs = [mmcv.imread(i) for i in sample['img_filenames']]\n            #cam_extrinsics = sample['cam_extrinsics']\n            #cam_intrinsics = sample['cam_intrinsics']\n            # self.renderer.render_camera_views_from_vectors(vectors, imgs, \n            #         cam_extrinsics, cam_intrinsics, 2, out_dir)\n    \n\n    def show_track(self, submission, idx, out_dir='demo/'):\n        '''Visualize prediction result.\n\n        Args:\n            idx (int): index of sample.\n            submission (dict): prediction results.\n            score_thr (float): threshold to filter prediction results.\n            out_dir (str): output directory.\n        '''\n\n        meta = submission['meta']\n        token = self.idx2token[idx]\n        results = submission['results'][token]\n\n        vectors = {label: [] for label in self.cat2id.values()}\n        for i in range(len(results['track_labels'])):\n            score = results['track_scores'][i]\n            label = results['track_labels'][i]\n            v = results['track_vectors'][i]\n            vectors[label].append((v, score, 1))\n        \n        self.renderer.render_bev_from_vectors(vectors, out_dir, draw_scores=True)\n\n    def __len__(self):\n        \"\"\"Return the length of data infos.\n\n        Returns:\n            int: Length of data infos.\n        \"\"\"\n        return len(self.samples)\n        \n    def _rand_another(self, idx):\n        \"\"\"Randomly get another item.\n\n        Returns:\n            int: Another index of item.\n        \"\"\"\n        return np.random.choice(self.__len__)\n\n    def __getitem__(self, idx):\n        \"\"\"Get item from infos according to the given index.\n\n        Returns:\n            dict: Data dictionary of the corresponding index.\n        \"\"\"\n        input_dict = self.get_sample(idx)\n        data = self.pipeline(input_dict)\n\n        # prepare the local sequence index info\n        seq_info = self.idx2scene[idx]\n        data['seq_info'] = DC(seq_info, cpu_only=True)\n\n        if self.multi_frame: # used when sampling multi-frame training data\n            scene_name = input_dict['scene_name']\n            scene_seq_info = self.scene_name2idx[scene_name]\n            local_idx_curr = input_dict['sample_idx'] - scene_seq_info[0]\n\n            span = max(self.sampling_span, self.multi_frame)\n            min_idx = local_idx_curr - span\n            sampled_indices = np.random.choice(span, self.multi_frame-1, replace=False).tolist()\n            sampled_indices = sorted(sampled_indices)\n            local_indices_prev = [min_idx + x for x in sampled_indices]\n            local_indices_prev = [x if x>=0 else 0 for x in local_indices_prev]\n            \n            data['img_metas'].data['local_idx'] = local_idx_curr\n            global_indices_prev = [local_idx + scene_seq_info[0] for local_idx in local_indices_prev]\n\n            all_prev_data = []\n            for idx, global_idx_prev in enumerate(global_indices_prev):\n                input_dict_prev = self.get_sample(global_idx_prev)\n                data_prev = self.pipeline(input_dict_prev)\n                local_idx_prev = local_indices_prev[idx]\n                data_prev['img_metas'].data['local_idx'] = local_idx_prev\n                all_prev_data.append(data_prev)\n\n            all_local2global_info = []\n            if self.matching:\n                scene_matching_info = self.matching_meta[scene_name]\n                for local_idx_prev in local_indices_prev:\n                    prev_local2global = DC(scene_matching_info['instance_ids'][local_idx_prev], cpu_only=True)\n                    all_local2global_info.append(prev_local2global)\n            curr_local2global = DC(scene_matching_info['instance_ids'][local_idx_curr], cpu_only=True)\n            all_local2global_info.append(curr_local2global)\n                    \n            data['all_prev_data'] = all_prev_data\n            data['all_local2global_info'] = all_local2global_info\n        \n        return data\n\n"
  },
  {
    "path": "plugin/datasets/builder.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Shihao Wang\n# ---------------------------------------------\nimport copy\nimport platform\nimport random\nfrom functools import partial\n\nimport numpy as np\nfrom mmcv.parallel import collate\nfrom mmcv.runner import get_dist_info\nfrom mmcv.utils import Registry, build_from_cfg\nfrom torch.utils.data import DataLoader\n\nfrom mmdet.datasets.samplers import GroupSampler\nfrom .samplers.group_sampler import DistributedGroupSampler\nfrom .samplers.distributed_sampler import DistributedSampler\nfrom .samplers.group_sampler import InfiniteGroupEachSampleInBatchSampler\nfrom .samplers.sampler import build_sampler\n\ndef build_dataloader(dataset,\n                     samples_per_gpu,\n                     workers_per_gpu,\n                     num_gpus=1,\n                     dist=True,\n                     shuffle=True,\n                     seed=None,\n                     shuffler_sampler=None,\n                     nonshuffler_sampler=None,\n                     runner_type=dict(type='EpochBasedRunner'),\n                     **kwargs):\n    \"\"\"Build PyTorch DataLoader.\n    In distributed training, each GPU/process has a dataloader.\n    In non-distributed training, there is only one dataloader for all GPUs.\n    Args:\n        dataset (Dataset): A PyTorch dataset.\n        samples_per_gpu (int): Number of training samples on each GPU, i.e.,\n            batch size of each GPU.\n        workers_per_gpu (int): How many subprocesses to use for data loading\n            for each GPU.\n        num_gpus (int): Number of GPUs. Only used in non-distributed training.\n        dist (bool): Distributed training/test or not. Default: True.\n        shuffle (bool): Whether to shuffle the data at every epoch.\n            Default: True.\n        kwargs: any keyword argument to be used to initialize DataLoader\n    Returns:\n        DataLoader: A PyTorch dataloader.\n    \"\"\"\n    rank, world_size = get_dist_info()\n\n    if dist:\n        # DistributedGroupSampler will definitely shuffle the data to satisfy\n        # that images on each GPU are in the same group\n        if shuffle:\n            sampler = build_sampler(shuffler_sampler if shuffler_sampler is not None else dict(type='DistributedGroupSampler'),\n                                     dict(\n                                         dataset=dataset,\n                                         samples_per_gpu=samples_per_gpu,\n                                         num_replicas=world_size,\n                                         rank=rank,\n                                         seed=seed)\n                                     )\n        else:\n            sampler = build_sampler(nonshuffler_sampler if nonshuffler_sampler is not None else dict(type='DistributedSampler'),\n                                     dict(\n                                         dataset=dataset,\n                                         num_replicas=world_size,\n                                         rank=rank,\n                                         shuffle=shuffle,\n                                         seed=seed)\n                                     )\n\n        batch_size = samples_per_gpu\n        num_workers = workers_per_gpu\n        batch_sampler = None\n\n\n    else:\n        # assert False, 'not support in bevformer'\n        # print('WARNING!!!!, Only can be used for obtain inference speed!!!!')\n        sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None\n        batch_size = num_gpus * samples_per_gpu\n        num_workers = num_gpus * workers_per_gpu\n        batch_sampler = None\n\n    # True entry here!!!\n    if runner_type['type'] == 'IterBasedRunner' and shuffler_sampler['type'] =='InfiniteGroupEachSampleInBatchSampler':\n        # TODO: original has more options, but I'm not using them \n        # https://github.com/open-mmlab/mmdetection/blob/3b72b12fe9b14de906d1363982b9fba05e7d47c1/mmdet/datasets/builder.py#L145-L157\n        batch_sampler = build_sampler(shuffler_sampler, dict(\n                                         dataset=dataset,\n                                         samples_per_gpu=samples_per_gpu,\n                                         num_replicas=world_size,\n                                         rank=rank,\n                                         seed=seed)\n                                     )\n        batch_size = 1 # Since we have batch sampler, the batch_size must = 1\n        sampler = None\n\n\n    init_fn = partial(\n        worker_init_fn, num_workers=num_workers, rank=rank,\n        seed=seed) if seed is not None else None\n\n    data_loader = DataLoader(\n        dataset,\n        batch_size=batch_size,\n        sampler=sampler,\n        batch_sampler=batch_sampler,\n        num_workers=num_workers,\n        collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),\n        pin_memory=False,\n        worker_init_fn=init_fn,\n        **kwargs)\n\n    return data_loader\n\n\ndef worker_init_fn(worker_id, num_workers, rank, seed):\n    # The seed of each worker equals to\n    # num_worker * rank + worker_id + user_seed\n    worker_seed = num_workers * rank + worker_id + seed\n    np.random.seed(worker_seed)\n    random.seed(worker_seed)\n\n\n# Copyright (c) OpenMMLab. All rights reserved.\n# import platform\n# from mmcv.utils import Registry, build_from_cfg\n\n# from mmdet.datasets import DATASETS\n# from mmdet.datasets.builder import _concat_dataset\n\n# if platform.system() != 'Windows':\n#     # https://github.com/pytorch/pytorch/issues/973\n#     import resource\n#     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)\n#     base_soft_limit = rlimit[0]\n#     hard_limit = rlimit[1]\n#     soft_limit = min(max(4096, base_soft_limit), hard_limit)\n#     resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))\n\n# OBJECTSAMPLERS = Registry('Object sampler')\n\n\n# def custom_build_dataset(cfg, default_args=None):\n#     from mmdet3d.datasets.dataset_wrappers import CBGSDataset\n#     from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,\n#                                                  ConcatDataset, RepeatDataset)\n#     if isinstance(cfg, (list, tuple)):\n#         dataset = ConcatDataset([custom_build_dataset(c, default_args) for c in cfg])\n#     elif cfg['type'] == 'ConcatDataset':\n#         dataset = ConcatDataset(\n#             [custom_build_dataset(c, default_args) for c in cfg['datasets']],\n#             cfg.get('separate_eval', True))\n#     elif cfg['type'] == 'RepeatDataset':\n#         dataset = RepeatDataset(\n#             custom_build_dataset(cfg['dataset'], default_args), cfg['times'])\n#     elif cfg['type'] == 'ClassBalancedDataset':\n#         dataset = ClassBalancedDataset(\n#             custom_build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])\n#     elif cfg['type'] == 'CBGSDataset':\n#         dataset = CBGSDataset(custom_build_dataset(cfg['dataset'], default_args))\n#     elif isinstance(cfg.get('ann_file'), (list, tuple)):\n#         dataset = _concat_dataset(cfg, default_args)\n#     else:\n#         dataset = build_from_cfg(cfg, DATASETS, default_args)\n\n#     return dataset"
  },
  {
    "path": "plugin/datasets/evaluation/AP.py",
    "content": "import numpy as np\nfrom .distance import chamfer_distance, frechet_distance, chamfer_distance_batch\nfrom typing import List, Tuple, Union\nfrom numpy.typing import NDArray\nimport torch\n\ndef average_precision(recalls, precisions, mode='area'):\n    \"\"\"Calculate average precision. \n\n    Args:\n        recalls (ndarray): shape (num_dets, )\n        precisions (ndarray): shape (num_dets, )\n        mode (str): 'area' or '11points', 'area' means calculating the area\n            under precision-recall curve, '11points' means calculating\n            the average precision of recalls at [0, 0.1, ..., 1]\n\n    Returns:\n        float: calculated average precision\n    \"\"\"\n\n    recalls = recalls[np.newaxis, :]\n    precisions = precisions[np.newaxis, :]\n\n    assert recalls.shape == precisions.shape and recalls.ndim == 2\n    num_scales = recalls.shape[0]\n    ap = 0.\n\n    if mode == 'area':\n        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)\n        ones = np.ones((num_scales, 1), dtype=recalls.dtype)\n        mrec = np.hstack((zeros, recalls, ones))\n        mpre = np.hstack((zeros, precisions, zeros))\n        for i in range(mpre.shape[1] - 1, 0, -1):\n            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])\n        \n        ind = np.where(mrec[0, 1:] != mrec[0, :-1])[0]\n        ap = np.sum(\n            (mrec[0, ind + 1] - mrec[0, ind]) * mpre[0, ind + 1])\n    \n    elif mode == '11points':\n        for thr in np.arange(0, 1 + 1e-3, 0.1):\n            precs = precisions[0, recalls[i, :] >= thr]\n            prec = precs.max() if precs.size > 0 else 0\n            ap += prec\n        ap /= 11\n    else:\n        raise ValueError(\n            'Unrecognized mode, only \"area\" and \"11points\" are supported')\n    \n    return ap\n\ndef instance_match(pred_lines: NDArray, \n                   scores: NDArray, \n                   gt_lines: NDArray, \n                   thresholds: Union[Tuple, List], \n                   metric: str='chamfer') -> List:\n    \"\"\"Compute whether detected lines are true positive or false positive.\n\n    Args:\n        pred_lines (array): Detected lines of a sample, of shape (M, INTERP_NUM, 2 or 3).\n        scores (array): Confidence score of each line, of shape (M, ).\n        gt_lines (array): GT lines of a sample, of shape (N, INTERP_NUM, 2 or 3).\n        thresholds (list of tuple): List of thresholds.\n        metric (str): Distance function for lines matching. Default: 'chamfer'.\n\n    Returns:\n        list_of_tp_fp (list): tp-fp matching result at all thresholds\n    \"\"\"\n\n    if metric == 'chamfer':\n        distance_fn = chamfer_distance\n\n    elif metric == 'frechet':\n        distance_fn = frechet_distance\n    \n    else:\n        raise ValueError(f'unknown distance function {metric}')\n\n    num_preds = pred_lines.shape[0]\n    num_gts = gt_lines.shape[0]\n\n    # tp and fp\n    tp_fp_list = []\n    tp = np.zeros((num_preds), dtype=np.float32)\n    fp = np.zeros((num_preds), dtype=np.float32)\n\n    # if there is no gt lines in this sample, then all pred lines are false positives\n    if num_gts == 0:\n        fp[...] = 1\n        for thr in thresholds:\n            tp_fp_list.append((tp.copy(), fp.copy()))\n        return tp_fp_list\n    \n    if num_preds == 0:\n        for thr in thresholds:\n            tp_fp_list.append((tp.copy(), fp.copy()))\n        return tp_fp_list\n\n    assert pred_lines.shape[1] == gt_lines.shape[1], \\\n        \"sample points num should be the same\"\n\n    # distance matrix: M x N\n    matrix = np.zeros((num_preds, num_gts))\n\n    # for i in range(num_preds):\n    #     for j in range(num_gts):\n    #         matrix[i, j] = distance_fn(pred_lines[i], gt_lines[j])\n    \n    matrix = chamfer_distance_batch(pred_lines, gt_lines)\n    # for each det, the min distance with all gts\n    matrix_min = matrix.min(axis=1)\n\n    # for each det, which gt is the closest to it\n    matrix_argmin = matrix.argmin(axis=1)\n    # sort all dets in descending order by scores\n    sort_inds = np.argsort(-scores)\n\n    # match under different thresholds\n    for thr in thresholds:\n        tp = np.zeros((num_preds), dtype=np.float32)\n        fp = np.zeros((num_preds), dtype=np.float32)\n\n        gt_covered = np.zeros(num_gts, dtype=bool)\n        for i in sort_inds:\n            if matrix_min[i] <= thr:\n                matched_gt = matrix_argmin[i]\n                if not gt_covered[matched_gt]:\n                    gt_covered[matched_gt] = True\n                    tp[i] = 1\n                else:\n                    fp[i] = 1\n            else:\n                fp[i] = 1\n        \n        tp_fp_list.append((tp, fp))\n\n    return tp_fp_list"
  },
  {
    "path": "plugin/datasets/evaluation/__init__.py",
    "content": ""
  },
  {
    "path": "plugin/datasets/evaluation/distance.py",
    "content": "from scipy.spatial import distance\nfrom numpy.typing import NDArray\nimport torch\n\ndef chamfer_distance(line1: NDArray, line2: NDArray) -> float:\n    ''' Calculate chamfer distance between two lines. Make sure the \n    lines are interpolated.\n\n    Args:\n        line1 (array): coordinates of line1\n        line2 (array): coordinates of line2\n    \n    Returns:\n        distance (float): chamfer distance\n    '''\n    \n    dist_matrix = distance.cdist(line1, line2, 'euclidean')\n    dist12 = dist_matrix.min(-1).sum() / len(line1)\n    dist21 = dist_matrix.min(-2).sum() / len(line2)\n\n    return (dist12 + dist21) / 2\n\ndef frechet_distance(line1: NDArray, line2: NDArray) -> float:\n    ''' Calculate frechet distance between two lines. Make sure the \n    lines are interpolated.\n\n    Args:\n        line1 (array): coordinates of line1\n        line2 (array): coordinates of line2\n    \n    Returns:\n        distance (float): frechet distance\n    '''\n    \n    raise NotImplementedError\n\ndef chamfer_distance_batch(pred_lines, gt_lines):\n    ''' Calculate chamfer distance between two group of lines. Make sure the \n    lines are interpolated.\n\n    Args:\n        pred_lines (array or tensor): shape (m, num_pts, 2 or 3)\n        gt_lines (array or tensor): shape (n, num_pts, 2 or 3)\n    \n    Returns:\n        distance (array): chamfer distance\n    '''\n    _, num_pts, coord_dims = pred_lines.shape\n    \n    if not isinstance(pred_lines, torch.Tensor):\n        pred_lines = torch.tensor(pred_lines)\n    if not isinstance(gt_lines, torch.Tensor):\n        gt_lines = torch.tensor(gt_lines)\n    dist_mat = torch.cdist(pred_lines.view(-1, coord_dims), \n                    gt_lines.view(-1, coord_dims), p=2) \n    # (num_query*num_points, num_gt*num_points)\n    dist_mat = torch.stack(torch.split(dist_mat, num_pts)) \n    # (num_query, num_points, num_gt*num_points)\n    dist_mat = torch.stack(torch.split(dist_mat, num_pts, dim=-1)) \n    # (num_gt, num_q, num_pts, num_pts)\n\n    dist1 = dist_mat.min(-1)[0].sum(-1)\n    dist2 = dist_mat.min(-2)[0].sum(-1)\n\n    dist_matrix = (dist1 + dist2).transpose(0, 1) / (2 * num_pts)\n    \n    return dist_matrix.numpy()"
  },
  {
    "path": "plugin/datasets/evaluation/raster_eval.py",
    "content": "import torch\nfrom mmdet3d.datasets import build_dataset, build_dataloader\nimport mmcv\nfrom functools import cached_property\nimport prettytable\nfrom numpy.typing import NDArray\nfrom typing import Dict, Optional\nfrom logging import Logger\nfrom mmcv import Config\nfrom copy import deepcopy\n\nN_WORKERS = 16\n\nclass RasterEvaluate(object):\n    \"\"\"Evaluator for rasterized map.\n\n    Args:\n        dataset_cfg (Config): dataset cfg for gt\n        n_workers (int): num workers to parallel\n    \"\"\"\n\n    def __init__(self, dataset_cfg: Config, n_workers: int=N_WORKERS):\n        self.dataset = build_dataset(dataset_cfg)\n        self.dataloader = build_dataloader(\n            self.dataset, samples_per_gpu=1, workers_per_gpu=n_workers, shuffle=False, dist=False)\n        self.cat2id = self.dataset.cat2id\n        self.id2cat = {v: k for k, v in self.cat2id.items()}\n        self.n_workers = n_workers\n\n    @cached_property\n    def gts(self) -> Dict[str, NDArray]:\n        print('collecting gts...')\n        gts = {}\n        for data in mmcv.track_iter_progress(self.dataloader):\n            token = deepcopy(data['img_metas'].data[0][0]['token'])\n            gt = deepcopy(data['semantic_mask'].data[0][0])\n            gts[token] = gt\n            del data # avoid dataloader memory crash\n        \n        return gts\n\n    def evaluate(self, \n                 result_path: str, \n                 logger: Optional[Logger]=None) -> Dict[str, float]:\n        ''' Do evaluation for a submission file and print evalution results to `logger` if specified.\n        The submission will be aligned by tokens before evaluation. \n        \n        Args:\n            result_path (str): path to submission file\n            logger (Logger): logger to print evaluation result, Default: None\n        \n        Returns:\n            result_dict (Dict): evaluation results. IoU by categories.\n        '''\n        \n        results = mmcv.load(result_path)\n        meta = results['meta']\n        results = results['results']\n\n        result_dict = {}\n\n        gts = []\n        preds = []\n        for token, gt in self.gts.items():\n            gts.append(gt)\n            pred = torch.zeros((len(self.cat2id), gt.shape[1], gt.shape[2])).bool()\n            if token in results:\n                semantic_mask = torch.tensor(results[token]['semantic_mask'])\n                for label_i in range(gt.shape[0]):\n                    pred[label_i] = (semantic_mask == label_i+1)\n            preds.append(pred)\n        \n        preds = torch.stack(preds).bool()\n        gts = torch.stack(gts).bool()\n\n        # TODO: flip the gt\n        gts = torch.flip(gts, [2,])\n\n        # for every label\n        total = 0\n        for i in range(gts.shape[1]):\n            category = self.id2cat[i]\n            pred = preds[:, i]\n            gt = gts[:, i]\n            intersect = (pred & gt).sum().float().item()\n            union = (pred | gt).sum().float().item()\n            result_dict[category] = intersect / (union + 1e-7)\n            total += result_dict[category]\n        \n        mIoU = total / gts.shape[1]\n        result_dict['mIoU'] = mIoU\n        \n        categories = list(self.cat2id.keys())\n        table = prettytable.PrettyTable([' ', *categories, 'mean'])\n        table.add_row(['IoU', \n            *[round(result_dict[cat], 4) for cat in categories], \n            round(mIoU, 4)])\n        \n        if logger:\n            from mmcv.utils import print_log\n            print_log('\\n'+str(table), logger=logger)\n            print_log(f'mIoU = {mIoU:.4f}\\n', logger=logger)\n\n        return result_dict\n"
  },
  {
    "path": "plugin/datasets/evaluation/vector_eval.py",
    "content": "from functools import partial\nimport numpy as np\nfrom multiprocessing import Pool\nfrom mmdet3d.datasets import build_dataset, build_dataloader\nimport mmcv\nfrom .AP import instance_match, average_precision\nimport prettytable\nfrom time import time\nfrom functools import cached_property\nfrom shapely.geometry import LineString\nfrom numpy.typing import NDArray\nfrom typing import Dict, List, Optional\nfrom logging import Logger\nfrom mmcv import Config\nfrom copy import deepcopy\nimport os\n\nINTERP_NUM = 200 # number of points to interpolate during evaluation\nTHRESHOLDS = [0.5, 1.0, 1.5] # AP thresholds\nN_WORKERS = 16 # num workers to parallel\nSAMPLE_DIST = 0.15\n\n\nclass VectorEvaluate(object):\n    \"\"\"Evaluator for vectorized map.\n\n    Args:\n        dataset_cfg (Config): dataset cfg for gt\n        n_workers (int): num workers to parallel\n    \"\"\"\n\n    def __init__(self, dataset_cfg: Config, n_workers: int=N_WORKERS) -> None:\n        self.dataset = build_dataset(dataset_cfg)\n        self.cat2id = self.dataset.cat2id\n        self.id2cat = {v: k for k, v in self.cat2id.items()}\n        self.n_workers = n_workers\n        self.new_split = 'newsplit' in self.dataset.ann_file\n        self.roi_size = self.dataset.roi_size\n        if self.roi_size == (60, 30):\n            self.thresholds = [0.5, 1.0, 1.5]\n        elif self.roi_size == (100, 50):\n            self.thresholds = [1.0, 1.5, 2.0]\n        \n    @cached_property\n    def gts(self) -> Dict[str, Dict[int, List[NDArray]]]:\n        roi_size = self.dataset.roi_size\n        if 'av2' in self.dataset.ann_file:\n            dataset = 'av2'\n        else:\n            dataset = 'nusc'\n        if self.new_split:\n            tmp_file = f'./tmp_gts_{dataset}_{roi_size[0]}x{roi_size[1]}_newsplit.pkl'\n        else:\n            tmp_file = f'./tmp_gts_{dataset}_{roi_size[0]}x{roi_size[1]}.pkl'\n        if os.path.exists(tmp_file):\n            print(f'loading cached gts from {tmp_file}')\n            gts = mmcv.load(tmp_file)\n            return gts\n        \n        print('collecting gts...')\n        gts = {}\n        self.dataloader = build_dataloader(\n            self.dataset, samples_per_gpu=1, workers_per_gpu=self.n_workers, shuffle=False, dist=False)\n        pbar = mmcv.ProgressBar(len(self.dataloader))\n        for data in self.dataloader:\n            token = deepcopy(data['img_metas'].data[0][0]['token'])\n            gt = deepcopy(data['vectors'].data[0][0])\n            gts[token] = gt\n            pbar.update()\n            del data # avoid dataloader memory crash\n        \n        if not os.path.exists(tmp_file):\n            print(f\"saving gt to {tmp_file}\")\n            mmcv.dump(gts, tmp_file)\n        return gts\n    \n    def interp_fixed_num(self, \n                         vector: NDArray, \n                         num_pts: int) -> NDArray:\n        ''' Interpolate a polyline.\n        \n        Args:\n            vector (array): line coordinates, shape (M, 2)\n            num_pts (int): \n        \n        Returns:\n            sampled_points (array): interpolated coordinates\n        '''\n        line = LineString(vector)\n        distances = np.linspace(0, line.length, num_pts)\n        sampled_points = np.array([list(line.interpolate(distance).coords) \n            for distance in distances]).squeeze()\n        \n        return sampled_points\n    \n    def interp_fixed_dist(self, \n                          vector: NDArray,\n                          sample_dist: float) -> NDArray:\n        ''' Interpolate a line at fixed interval.\n        \n        Args:\n            vector (LineString): vector\n            sample_dist (float): sample interval\n        \n        Returns:\n            points (array): interpolated points, shape (N, 2)\n        '''\n        line = LineString(vector)\n        distances = list(np.arange(sample_dist, line.length, sample_dist))\n        # make sure to sample at least two points when sample_dist > line.length\n        distances = [0,] + distances + [line.length,] \n        \n        sampled_points = np.array([list(line.interpolate(distance).coords)\n                                for distance in distances]).squeeze()\n        \n        return sampled_points\n\n    def _evaluate_single(self, \n                         pred_vectors: List, \n                         scores: List, \n                         groundtruth: List, \n                         thresholds: List, \n                         metric: str='metric') -> Dict[int, NDArray]:\n        ''' Do single-frame matching for one class.\n        \n        Args:\n            pred_vectors (List): List[vector(ndarray) (different length)], \n            scores (List): List[score(float)]\n            groundtruth (List): List of vectors\n            thresholds (List): List of thresholds\n        \n        Returns:\n            tp_fp_score_by_thr (Dict): matching results at different thresholds\n                e.g. {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}\n        '''\n\n        pred_lines = []\n\n        # interpolate predictions\n        for vector in pred_vectors:\n            vector = np.array(vector)\n            vector_interp = self.interp_fixed_num(vector, INTERP_NUM)\n            pred_lines.append(vector_interp)\n        if pred_lines:\n            pred_lines = np.stack(pred_lines)\n        else:\n            pred_lines = np.zeros((0, INTERP_NUM, 2))\n\n        # interpolate groundtruth\n        gt_lines = []\n        for vector in groundtruth:\n            vector_interp = self.interp_fixed_num(vector, INTERP_NUM)\n            gt_lines.append(vector_interp)\n        if gt_lines:\n            gt_lines = np.stack(gt_lines)\n        else:\n            gt_lines = np.zeros((0, INTERP_NUM, 2))\n        \n        scores = np.array(scores)\n        tp_fp_list = instance_match(pred_lines, scores, gt_lines, thresholds, metric) # (M, 2)\n        tp_fp_score_by_thr = {}\n        for i, thr in enumerate(thresholds):\n            tp, fp = tp_fp_list[i]\n            tp_fp_score = np.hstack([tp[:, None], fp[:, None], scores[:, None]])\n            tp_fp_score_by_thr[thr] = tp_fp_score\n        \n        return tp_fp_score_by_thr # {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}\n        \n    def evaluate(self, \n                 result_path: str, \n                 metric: str='chamfer', \n                 logger: Optional[Logger]=None) -> Dict[str, float]:\n        ''' Do evaluation for a submission file and print evalution results to `logger` if specified.\n        The submission will be aligned by tokens before evaluation. We use multi-worker to speed up.\n        \n        Args:\n            result_path (str): path to submission file\n            metric (str): distance metric. Default: 'chamfer'\n            logger (Logger): logger to print evaluation result, Default: None\n        \n        Returns:\n            new_result_dict (Dict): evaluation results. AP by categories.\n        '''\n        \n        results = mmcv.load(result_path)\n        results = results['results']\n        \n        # re-group samples and gt by label\n        samples_by_cls = {label: [] for label in self.id2cat.keys()}\n        num_gts = {label: 0 for label in self.id2cat.keys()}\n        num_preds = {label: 0 for label in self.id2cat.keys()}\n\n        # align by token\n        for token, gt in self.gts.items():\n            if token in results.keys():\n                pred = results[token]\n            else:\n                pred = {'vectors': [], 'scores': [], 'labels': []}\n            \n            # for every sample\n            vectors_by_cls = {label: [] for label in self.id2cat.keys()}\n            scores_by_cls = {label: [] for label in self.id2cat.keys()}\n\n            for i in range(len(pred['labels'])):\n                # i-th pred line in sample\n                label = pred['labels'][i]\n                vector = pred['vectors'][i]\n                score = pred['scores'][i]\n\n                vectors_by_cls[label].append(vector)\n                scores_by_cls[label].append(score)\n\n            for label in self.id2cat.keys():\n                new_sample = (vectors_by_cls[label], scores_by_cls[label], gt[label])\n                num_gts[label] += len(gt[label])\n                num_preds[label] += len(scores_by_cls[label])\n                samples_by_cls[label].append(new_sample)\n\n        result_dict = {}\n\n        print(f'\\nevaluating {len(self.id2cat)} categories...')\n        start = time()\n        if self.n_workers > 0:\n            pool = Pool(self.n_workers)\n        \n        sum_mAP = 0\n        pbar = mmcv.ProgressBar(len(self.id2cat))\n        for label in self.id2cat.keys():\n            samples = samples_by_cls[label] # List[(pred_lines, scores, gts)]\n            result_dict[self.id2cat[label]] = {\n                'num_gts': num_gts[label],\n                'num_preds': num_preds[label]\n            }\n            sum_AP = 0\n\n            fn = partial(self._evaluate_single, thresholds=self.thresholds, metric=metric)\n            if self.n_workers > 0:\n                tpfp_score_list = pool.starmap(fn, samples)\n            else:\n                tpfp_score_list = []\n                for sample in samples:\n                    tpfp_score_list.append(fn(*sample))\n            \n            for thr in self.thresholds:\n                tp_fp_score = [i[thr] for i in tpfp_score_list]\n                tp_fp_score = np.vstack(tp_fp_score) # (num_dets, 3)\n                sort_inds = np.argsort(-tp_fp_score[:, -1])\n\n                tp = tp_fp_score[sort_inds, 0] # (num_dets,)\n                fp = tp_fp_score[sort_inds, 1] # (num_dets,)\n                tp = np.cumsum(tp, axis=0)\n                fp = np.cumsum(fp, axis=0)\n                eps = np.finfo(np.float32).eps\n                recalls = tp / np.maximum(num_gts[label], eps)\n                precisions = tp / np.maximum((tp + fp), eps)\n\n                AP = average_precision(recalls, precisions, 'area')\n                sum_AP += AP\n                result_dict[self.id2cat[label]].update({f'AP@{thr}': AP})\n\n            pbar.update()\n            \n            AP = sum_AP / len(self.thresholds)\n            sum_mAP += AP\n\n            result_dict[self.id2cat[label]].update({f'AP': AP})\n        \n        if self.n_workers > 0:\n            pool.close()\n        \n        mAP = sum_mAP / len(self.id2cat.keys())\n        result_dict.update({'mAP': mAP})\n        \n        print(f\"finished in {time() - start:.2f}s\")\n\n        # print results\n        table = prettytable.PrettyTable(['category', 'num_preds', 'num_gts'] + \n                [f'AP@{thr}' for thr in self.thresholds] + ['AP'])\n        for label in self.id2cat.keys():\n            table.add_row([\n                self.id2cat[label], \n                result_dict[self.id2cat[label]]['num_preds'],\n                result_dict[self.id2cat[label]]['num_gts'],\n                *[round(result_dict[self.id2cat[label]][f'AP@{thr}'], 4) for thr in self.thresholds],\n                round(result_dict[self.id2cat[label]]['AP'], 4),\n            ])\n        \n        from mmcv.utils import print_log\n        print_log('\\n'+str(table), logger=logger)\n        mAP_normal = 0\n        for label in self.id2cat.keys():\n            for thr in self.thresholds:\n                mAP_normal += result_dict[self.id2cat[label]][f'AP@{thr}']\n        mAP_normal = mAP_normal / 9\n\n        print_log(f'mAP_normal = {mAP_normal:.4f}\\n', logger=logger)\n        # print_log(f'mAP_hard = {mAP_easy:.4f}\\n', logger=logger)\n\n        new_result_dict = {}\n        for name in self.cat2id:\n            new_result_dict[name] = result_dict[name]['AP']\n\n        return new_result_dict"
  },
  {
    "path": "plugin/datasets/map_utils/av2map_extractor.py",
    "content": "from av2.map.map_api import ArgoverseStaticMap\nfrom pathlib import Path\nfrom shapely.geometry import LineString, box, Polygon\nfrom shapely import ops\nimport numpy as np\nfrom .utils import split_collections, get_drivable_area_contour, \\\n        get_ped_crossing_contour, remove_repeated_lines, transform_from, \\\n        connect_lines, remove_boundary_dividers, remove_repeated_lanesegment, reassign_graph_attribute\nfrom numpy.typing import NDArray\nfrom typing import Dict, List, Tuple, Union\n\nfrom av2.geometry.se3 import SE3\nfrom nuscenes.map_expansion.map_api import NuScenesMapExplorer\nimport networkx as nx\n\nfrom nuscenes.eval.common.utils import quaternion_yaw, Quaternion\n\nfrom shapely.geometry import Polygon, LineString, box, MultiPolygon, MultiLineString\nfrom shapely.strtree import STRtree\n\nfrom shapely.geometry import CAP_STYLE, JOIN_STYLE\n\n\nclass AV2MapExtractor(object):\n    \"\"\"Argoverse 2 map ground-truth extractor.\n\n    Args:\n        roi_size (tuple or list): bev range\n        id2map (dict): log id to map json path\n    \"\"\"\n    def __init__(self, roi_size: Union[Tuple, List], id2map: Dict) -> None:\n        self.roi_size = roi_size\n        self.id2map = {}\n\n        for log_id, path in id2map.items():\n            self.id2map[log_id] = ArgoverseStaticMap.from_json(Path(path))\n        \n    def generate_nearby_dividers(self,avm, e2g_translation, e2g_rotation,patch):\n        def get_path(ls_dict):\n            pts_G = nx.DiGraph()\n            junction_pts_list = []\n            tmp=ls_dict\n            for key, value in tmp.items():\n                centerline_geom = LineString(value['polyline'].xyz)\n                centerline_pts = np.array(centerline_geom.coords).round(3)\n                start_pt = centerline_pts[0]\n                end_pt = centerline_pts[-1]\n\n                for idx, pts in enumerate(centerline_pts[:-1]):\n                    pts_G.add_edge(tuple(centerline_pts[idx]),tuple(centerline_pts[idx+1]))\n\n                valid_incoming_num = 0\n                for idx, pred in enumerate(value['predecessors']):\n                    if pred in tmp.keys():\n                        valid_incoming_num += 1\n                        pred_geom = LineString(tmp[pred]['polyline'].xyz)\n                        pred_pt = np.array(pred_geom.coords).round(3)[-1]\n\n                        if pred_pt[0] == start_pt[0] and pred_pt[1] == start_pt[1] and pred_pt[2] == start_pt[2]:\n                            pass\n                        else:\n                            pts_G.add_edge(tuple(pred_pt), tuple(start_pt))\n\n                if valid_incoming_num > 1:\n                    junction_pts_list.append(tuple(start_pt))\n                \n                valid_outgoing_num = 0\n                for idx, succ in enumerate(value['successors']):\n                    if succ in tmp.keys():\n                        valid_outgoing_num += 1\n                        succ_geom = LineString(tmp[succ]['polyline'].xyz)\n                        succ_pt = np.array(succ_geom.coords).round(3)[0]\n\n                        if end_pt[0] == succ_pt[0] and end_pt[1] == succ_pt[1] and end_pt[2] == succ_pt[2]:\n                            pass\n                        else:\n                            pts_G.add_edge(tuple(end_pt), tuple(succ_pt))\n\n                if valid_outgoing_num > 1:\n                    junction_pts_list.append(tuple(end_pt))\n            \n            roots = (v for v, d in pts_G.in_degree() if d == 0)\n            roots_list = [v for v, d in pts_G.in_degree() if d == 0]\n            \n            notroot_list = [v for v in pts_G.nodes if v not in roots_list]\n            leaves = [v for v,d in pts_G.out_degree() if d==0]\n            ### find path from each root to leaves\n\n            all_paths = []\n            for root in roots:\n                for leave in leaves:\n                    paths = nx.all_simple_paths(pts_G, root, leave)\n                    all_paths.extend(paths)\n\n            for single_path in all_paths:\n                for single_node in single_path:\n                    if single_node in notroot_list:\n                        notroot_list.remove(single_node)\n\n            final_centerline_paths = []\n            for path in all_paths:\n                merged_line = LineString(path)\n                # pdb.set_trace()\n                merged_line = merged_line.simplify(0.2, preserve_topology=True)\n                final_centerline_paths.append(merged_line)\n\n            local_centerline_paths = final_centerline_paths\n            return local_centerline_paths\n        \n        left_lane_dict = {}\n        right_lane_dict = {}\n\n        scene_ls_list = avm.get_scenario_lane_segments()\n        scene_ls_dict = dict()\n        for ls in scene_ls_list:\n            scene_ls_dict[ls.id] = dict(\n                ls=ls,\n                polygon = Polygon(ls.polygon_boundary),\n                predecessors=ls.predecessors,\n                successors=ls.successors\n            )\n        \n        nearby_ls_dict = dict()\n        for key, value in scene_ls_dict.items():\n            polygon = value['polygon']\n            if polygon.is_valid:\n                new_polygon = polygon.intersection(patch)\n                if not new_polygon.is_empty:\n                    nearby_ls_dict[key] = value['ls']\n\n        ls_dict = nearby_ls_dict\n        divider_ls_dict = dict()\n        for key, value in ls_dict.items():\n            if not value.is_intersection:\n                divider_ls_dict[key] = value\n\n        left_lane_dict = {}\n        right_lane_dict = {}\n        for key,value in divider_ls_dict.items():\n            if value.left_neighbor_id is not None:\n                left_lane_dict[key] = dict(\n                    polyline=value.left_lane_boundary,\n                    predecessors = value.predecessors,\n                    successors = value.successors,\n                    left_neighbor_id = value.left_neighbor_id,\n                )\n            if value.right_neighbor_id is not None:\n                right_lane_dict[key] = dict(\n                    polyline = value.right_lane_boundary,\n                    predecessors = value.predecessors,\n                    successors = value.successors,\n                    right_neighbor_id = value.right_neighbor_id,\n                )\n\n        for key, value in left_lane_dict.items():\n            if value['left_neighbor_id'] in right_lane_dict.keys():\n                del right_lane_dict[value['left_neighbor_id']]\n\n        for key, value in right_lane_dict.items():\n            if value['right_neighbor_id'] in left_lane_dict.keys():\n                del left_lane_dict[value['right_neighbor_id']]\n\n        left_lane_dict = remove_repeated_lanesegment(left_lane_dict)\n        right_lane_dict = remove_repeated_lanesegment(right_lane_dict)\n\n        left_lane_dict = reassign_graph_attribute(left_lane_dict)\n        right_lane_dict = reassign_graph_attribute(right_lane_dict)\n\n        left_paths = get_path(left_lane_dict)\n        right_paths = get_path(right_lane_dict)\n        local_dividers = left_paths + right_paths\n\n        return local_dividers\n\n    def proc_polygon(self,polygon, ego_SE3_city):\n        interiors = []\n        exterior_cityframe = np.array(list(polygon.exterior.coords))\n        exterior_egoframe = ego_SE3_city.transform_point_cloud(exterior_cityframe)\n        for inter in polygon.interiors:\n            inter_cityframe = np.array(list(inter.coords))\n            inter_egoframe = ego_SE3_city.transform_point_cloud(inter_cityframe)\n            interiors.append(inter_egoframe[:,:3])\n\n        new_polygon = Polygon(exterior_egoframe[:,:3], interiors)\n        return new_polygon\n    \n    def proc_line(self,line,ego_SE3_city):\n        new_line_pts_cityframe = np.array(list(line.coords))\n        new_line_pts_egoframe = ego_SE3_city.transform_point_cloud(new_line_pts_cityframe)\n        line = LineString(new_line_pts_egoframe[:,:3]) #TODO\n        return line\n\n    def extract_local_divider(self,nearby_dividers, ego_SE3_city, patch_box, patch_angle,patch_size):\n        patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)\n        # pdb.set_trace()\n        # final_pgeom = remove_repeated_lines(nearby_dividers)\n        line_list = []\n        # pdb.set_trace()\n        for line in nearby_dividers:\n            if line.is_empty:  # Skip lines without nodes.\n                continue\n            new_line = line.intersection(patch)\n            if not new_line.is_empty:\n                if new_line.geom_type == 'MultiLineString':\n                    for single_line in new_line.geoms:\n                        if single_line.is_empty:\n                            continue\n                        single_line = self.proc_line(single_line,ego_SE3_city)\n                        line_list.append(single_line)\n                else:\n                    new_line = self.proc_line(new_line, ego_SE3_city)\n                    line_list.append(new_line)\n        centerlines = line_list\n        \n        poly_centerlines = [line.buffer(0.1,\n                    cap_style=CAP_STYLE.flat, join_style=JOIN_STYLE.mitre) for line in centerlines]\n        index_by_id = dict((id(pt), i) for i, pt in enumerate(poly_centerlines))\n        tree = STRtree(poly_centerlines)\n        final_pgeom = []\n        remain_idx = [i for i in range(len(centerlines))]\n        for i, pline in enumerate(poly_centerlines):\n            if i not in remain_idx:\n                continue\n            remain_idx.pop(remain_idx.index(i))\n\n            final_pgeom.append(centerlines[i])\n            for o in tree.query(pline):\n                o_idx = index_by_id[id(o)]\n                if o_idx not in remain_idx:\n                    continue\n                inter = o.intersection(pline).area\n                union = o.union(pline).area\n                iou = inter / union\n                if iou >= 0.90:\n                    remain_idx.pop(remain_idx.index(o_idx))\n\n        # return [np.array(line.coords) for line in final_pgeom]\n        final_pgeom = connect_lines(final_pgeom)\n        return final_pgeom\n\n    def extract_local_boundary(self,avm, ego_SE3_city, patch_box, patch_angle,patch_size):\n        boundary_list = []\n        patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)\n        for da in avm.get_scenario_vector_drivable_areas():\n            boundary_list.append(da.xyz)\n\n        polygon_list = []\n        for da in boundary_list:\n            exterior_coords = da\n            interiors = []\n        #     polygon = Polygon(exterior_coords, interiors)\n            polygon = Polygon(exterior_coords, interiors)\n            if polygon.is_valid:\n                new_polygon = polygon.intersection(patch)\n                if not new_polygon.is_empty:\n                    if new_polygon.geom_type is 'Polygon':\n                        if not new_polygon.is_valid:\n                            continue\n                        new_polygon = self.proc_polygon(new_polygon,ego_SE3_city)\n                        if not new_polygon.is_valid:\n                            continue\n                    elif new_polygon.geom_type is 'MultiPolygon':\n                        polygons = []\n                        for single_polygon in new_polygon.geoms:\n                            if not single_polygon.is_valid or single_polygon.is_empty:\n                                continue\n                            new_single_polygon = self.proc_polygon(single_polygon,ego_SE3_city)\n                            if not new_single_polygon.is_valid:\n                                continue\n                            polygons.append(new_single_polygon)\n                        if len(polygons) == 0:\n                            continue\n                        new_polygon = MultiPolygon(polygons)\n                        if not new_polygon.is_valid:\n                            continue\n                    else:\n                        raise ValueError('{} is not valid'.format(new_polygon.geom_type))\n\n                    if new_polygon.geom_type is 'Polygon':\n                        new_polygon = MultiPolygon([new_polygon])\n                    polygon_list.append(new_polygon)\n\n        union_segments = ops.unary_union(polygon_list)\n        max_x = patch_size[1] / 2\n        max_y = patch_size[0] / 2\n        local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)\n        exteriors = []\n        interiors = []\n        if union_segments.geom_type != 'MultiPolygon':\n            union_segments = MultiPolygon([union_segments])\n        for poly in union_segments.geoms:\n            exteriors.append(poly.exterior)\n            for inter in poly.interiors:\n                interiors.append(inter)\n\n\n        results = []\n        for ext in exteriors:\n            if ext.is_ccw:\n                ext.coords = list(ext.coords)[::-1]\n            lines = ext.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        for inter in interiors:\n            if not inter.is_ccw:\n                inter.coords = list(inter.coords)[::-1]\n            lines = inter.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        boundary_lines = []\n        for line in results:\n            if not line.is_empty:\n                if line.geom_type == 'MultiLineString':\n                    for single_line in line.geoms:\n                        boundary_lines.append(single_line)\n                elif line.geom_type == 'LineString':\n                    boundary_lines.append(line)\n                else:\n                    raise NotImplementedError\n        return boundary_lines\n\n    def get_scene_dividers(self,avm,patch_box,patch_angle):\n        patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)\n        scene_ls_list = avm.get_scenario_lane_segments()\n        # pdb.set_trace()\n        scene_ls_dict = dict()\n        for ls in scene_ls_list:\n            scene_ls_dict[ls.id] = dict(\n                ls=ls,\n                polygon = Polygon(ls.polygon_boundary),\n                predecessors=ls.predecessors,\n                successors=ls.successors\n            )\n        nearby_ls_dict = dict()\n        for key, value in scene_ls_dict.items():\n            polygon = value['polygon']\n            if polygon.is_valid:\n                new_polygon = polygon.intersection(patch)\n                if not new_polygon.is_empty:\n                    nearby_ls_dict[key] = value['ls']\n\n        ls_dict = nearby_ls_dict\n        divider_ls_dict = dict()\n        for key, value in ls_dict.items():\n            if not value.is_intersection:\n                divider_ls_dict[key] = value\n\n        return divider_ls_dict\n\n    def get_scene_ped_crossings(self,avm,e2g_translation,e2g_rotation,polygon_ped=True):\n\n        g2e_translation = e2g_rotation.T.dot(-e2g_translation)\n        g2e_rotation = e2g_rotation.T\n\n        roi_x, roi_y = self.roi_size[:2]\n        local_patch = box(-roi_x / 2, -roi_y / 2, roi_x / 2, roi_y / 2)\n        ped_crossings = [] \n        for _, pc in avm.vector_pedestrian_crossings.items():\n            edge1_xyz = pc.edge1.xyz\n            edge2_xyz = pc.edge2.xyz\n            ego1_xyz = transform_from(edge1_xyz, g2e_translation, g2e_rotation)\n            ego2_xyz = transform_from(edge2_xyz, g2e_translation, g2e_rotation)\n\n            # if True, organize each ped crossing as closed polylines. \n            if polygon_ped:\n                vertices = np.concatenate([ego1_xyz, ego2_xyz[::-1, :]])\n                p = Polygon(vertices)\n                line = get_ped_crossing_contour(p, local_patch)\n                if line is not None:\n                    if len(line.coords) < 3 or Polygon(line).area < 1:\n                        continue\n                    ped_crossings.append(line)\n            # Otherwise organize each ped crossing as two parallel polylines.\n            else:\n                line1 = LineString(ego1_xyz)\n                line2 = LineString(ego2_xyz)\n                line1_local = line1.intersection(local_patch)\n                line2_local = line2.intersection(local_patch)\n\n                # take the whole ped cross if all two edges are in roi range\n                if not line1_local.is_empty and not line2_local.is_empty:\n                    ped_crossings.append(line1_local)\n                    ped_crossings.append(line2_local)\n\n        return ped_crossings\n    \n    def get_map_geom(self,\n                     log_id: str, \n                     e2g_translation: NDArray, \n                     e2g_rotation: NDArray,\n                     polygon_ped=True) -> Dict[str, List[Union[LineString, Polygon]]]:\n        ''' Extract geometries given `log_id` and ego pose.\n        \n        Args:\n            log_id (str): log id\n            e2g_translation (array): ego2global translation, shape (3,)\n            e2g_rotation (array): ego2global rotation matrix, shape (3, 3)\n            polygon_ped: if True, organize each ped crossing as closed polylines. \\\n                Otherwise organize each ped crossing as two parallel polylines. \\\n                Default: True\n        \n        Returns:\n            geometries (Dict): extracted geometries by category.\n        '''\n\n        avm = self.id2map[log_id]\n        \n        patch_h = self.roi_size[1]\n        patch_w = self.roi_size[0]\n        patch_size = (patch_h, patch_w)\n        map_pose = e2g_translation[:2]\n        rotation = Quaternion._from_matrix(e2g_rotation)\n        patch_box = (map_pose[0], map_pose[1], patch_size[0], patch_size[1])\n        patch_angle = quaternion_yaw(rotation) / np.pi * 180\n\n        city_SE2_ego = SE3(e2g_rotation, e2g_translation)\n        ego_SE3_city = city_SE2_ego.inverse()\n        \n        patch = NuScenesMapExplorer.get_patch_coord(patch_box, patch_angle)\n        nearby_dividers = self.generate_nearby_dividers(avm, e2g_translation,e2g_rotation,patch)\n        # pdb.set_trace()\n        map_anno=dict(\n            divider=[],\n            ped_crossing=[],\n            boundary=[],\n            drivable_area=[],\n        )\n        map_anno['ped_crossing'] = self.get_scene_ped_crossings(avm,e2g_translation,e2g_rotation,polygon_ped=polygon_ped)\n        \n        map_anno['boundary'] = self.extract_local_boundary(avm, ego_SE3_city, patch_box, patch_angle,patch_size)\n        # map_anno['centerline'] = extract_local_centerline(nearby_centerlines, ego_SE3_city, patch_box, patch_angle,patch_size)\n        all_dividers = self.extract_local_divider(nearby_dividers, ego_SE3_city, patch_box, patch_angle,patch_size)\n\n        map_anno['divider'] = remove_boundary_dividers(all_dividers,map_anno['boundary'])\n\n        ########\n        return map_anno"
  },
  {
    "path": "plugin/datasets/map_utils/nuscmap_extractor.py",
    "content": "from shapely.geometry import LineString, box, Polygon\nfrom shapely import ops, strtree\n\nimport numpy as np\nfrom nuscenes.map_expansion.map_api import NuScenesMap, NuScenesMapExplorer\nfrom nuscenes.eval.common.utils import quaternion_yaw\nfrom pyquaternion import Quaternion\nfrom .utils import split_collections, get_drivable_area_contour, get_ped_crossing_contour\nfrom numpy.typing import NDArray\nfrom typing import Dict, List, Tuple, Union\n\nfrom shapely.geometry import Polygon, MultiPolygon, LineString, Point, box, MultiLineString\nfrom shapely import affinity, ops\nimport networkx as nx\n\n\nclass NuscMapExtractor(object):\n    \"\"\"NuScenes map ground-truth extractor.\n\n    Args:\n        data_root (str): path to nuScenes dataset\n        roi_size (tuple or list): bev range\n    \"\"\"\n    def __init__(self, data_root: str, roi_size: Union[List, Tuple]) -> None:\n        self.roi_size = roi_size\n        self.MAPS = ['boston-seaport', 'singapore-hollandvillage',\n                     'singapore-onenorth', 'singapore-queenstown']\n        \n        self.nusc_maps = {}\n        self.map_explorer = {}\n        for loc in self.MAPS:\n            self.nusc_maps[loc] = NuScenesMap(\n                dataroot=data_root, map_name=loc)\n            self.map_explorer[loc] = CNuScenesMapExplorer(self.nusc_maps[loc])\n    \n    def get_map_geom(self, \n                     location: str, \n                     e2g_translation: Union[List, NDArray],\n                     e2g_rotation: Union[List, NDArray]) -> Dict[str, List[Union[LineString, Polygon]]]:\n        # Borrowed from MapTR's codebase to make sure data are the same\n        # (center_x, center_y, len_y, len_x) in nuscenes format\n        patch_size_ego_coord = (self.roi_size[1], self.roi_size[0])\n        patch_size_lidar_coord = (self.roi_size[0], self.roi_size[1])\n\n        vector_map_maptr = VectorizedLocalMap(self.nusc_maps[location], self.map_explorer[location],\n                                patch_size_lidar_coord, patch_size_ego_coord, map_classes=['divider','ped_crossing','boundary'])\n        map_annos = vector_map_maptr.gen_vectorized_samples(e2g_translation, e2g_rotation)\n        \n        return dict(\n            divider=map_annos['divider'], # List[LineString]\n            ped_crossing=map_annos['ped_crossing'], # List[LineString]\n            boundary=map_annos['boundary'], # List[LineString]\n            drivable_area=[], # List[Polygon],\n        )\n\n\nclass VectorizedLocalMap(object):\n    CLASS2LABEL = {\n        'road_divider': 0,\n        'lane_divider': 0,\n        'ped_crossing': 1,\n        'contours': 2,\n        'others': -1\n    }\n    def __init__(self,\n                 nusc_map,\n                 map_explorer,\n                 patch_size,\n                 roi_size,\n                 map_classes=['divider','ped_crossing','boundary','centerline'],\n                 line_classes=['road_divider', 'lane_divider'],\n                 ped_crossing_classes=['ped_crossing'],\n                 contour_classes=['road_segment', 'lane'],\n                 centerline_classes=['lane_connector','lane'],\n                 use_simplify=True,\n                 ):\n        super().__init__()\n        self.nusc_map = nusc_map\n        self.map_explorer = map_explorer\n        self.vec_classes = map_classes\n        self.line_classes = line_classes\n        self.ped_crossing_classes = ped_crossing_classes\n        self.polygon_classes = contour_classes\n        self.centerline_classes = centerline_classes\n        self.patch_size = patch_size\n        self.roi_size = roi_size\n        self.local_patch = box(-self.roi_size[0] / 2, -self.roi_size[1] / 2, \n                self.roi_size[0] / 2, self.roi_size[1] / 2)\n\n\n    def gen_vectorized_samples(self, lidar2global_translation, lidar2global_rotation):\n        '''\n        use lidar2global to get gt map layers\n        '''\n        \n        map_pose = lidar2global_translation[:2]\n        rotation = Quaternion(lidar2global_rotation)\n        # import ipdb;ipdb.set_trace()\n        patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1])\n        \n        patch_angle = quaternion_yaw(rotation) / np.pi * 180\n        map_dict = {'divider':[],'ped_crossing':[],'boundary':[],'centerline':[]}\n        vectors = []\n\n        for vec_class in self.vec_classes:\n            if vec_class == 'divider':\n                line_geom = self.get_map_geom(patch_box, patch_angle, self.line_classes)\n                line_instances_dict = self.line_geoms_to_instances(line_geom)     \n                for line_type, instances in line_instances_dict.items():\n                    for instance in instances:\n                        instance = affinity.rotate(instance, -90, origin=(0, 0), use_radians=False)\n                        map_dict[vec_class].append(instance)\n                        # vectors.append((instance, self.CLASS2LABEL.get(line_type, -1)))\n            elif vec_class == 'ped_crossing':\n                ped_geom = self.get_map_geom(patch_box, patch_angle, self.ped_crossing_classes)\n                ped_instance_list = ped_geom['ped_crossing']\n                #ped_instance_list = self.ped_poly_geoms_to_instances(ped_geom)\n                for instance in ped_instance_list:\n                    # vectors.append((instance, self.CLASS2LABEL.get('ped_crossing', -1)))\n                    instance = affinity.rotate(instance, -90, origin=(0, 0), use_radians=False)\n                    map_dict[vec_class].append(instance)\n            elif vec_class == 'boundary':\n                polygon_geom = self.get_map_geom(patch_box, patch_angle, self.polygon_classes)\n                poly_bound_list = self.poly_geoms_to_instances(polygon_geom)\n                for instance in poly_bound_list:\n                    # import ipdb;ipdb.set_trace()\n                    instance = affinity.rotate(instance, -90, origin=(0, 0), use_radians=False)\n                    map_dict[vec_class].append(instance)\n                    # vectors.append((contour, self.CLASS2LABEL.get('contours', -1)))\n            elif vec_class =='centerline':\n                centerline_geom = self.get_centerline_geom(patch_box, patch_angle, self.centerline_classes)\n                centerline_list = self.centerline_geoms_to_instances(centerline_geom)\n                for instance in centerline_list:\n                    instance = affinity.rotate(instance, -90, origin=(0, 0), use_radians=False)\n                    map_dict[vec_class].append(instance)\n            else:\n                raise ValueError(f'WRONG vec_class: {vec_class}')\n        return map_dict\n\n    def get_centerline_geom(self, patch_box, patch_angle, layer_names):\n        map_geom = {}\n        for layer_name in layer_names:\n            if layer_name in self.centerline_classes:\n                return_token = False\n                layer_centerline_dict = self.map_explorer._get_centerline(\n                patch_box, patch_angle, layer_name, return_token=return_token)\n                if len(layer_centerline_dict.keys()) == 0:\n                    continue\n                # import ipdb;ipdb.set_trace()\n                map_geom.update(layer_centerline_dict)\n        return map_geom\n\n    def get_map_geom(self, patch_box, patch_angle, layer_names):\n        map_geom = {}\n        for layer_name in layer_names:\n            if layer_name in self.line_classes:\n                geoms = self.get_divider_line(patch_box, patch_angle, layer_name)\n                # map_geom.append((layer_name, geoms))\n                map_geom[layer_name] = geoms\n            elif layer_name in self.polygon_classes:\n                geoms = self.get_contour_line(patch_box, patch_angle, layer_name)\n                # map_geom.append((layer_name, geoms))\n                map_geom[layer_name] = geoms\n            elif layer_name in self.ped_crossing_classes:\n                geoms = self.get_ped_crossing_line_stmmapnet(patch_box, patch_angle)\n                # map_geom.append((layer_name, geoms))\n                map_geom[layer_name] = geoms\n        return map_geom\n\n    def get_divider_line(self,patch_box,patch_angle,layer_name):\n        if layer_name not in self.map_explorer.map_api.non_geometric_line_layers:\n            raise ValueError(\"{} is not a line layer\".format(layer_name))\n\n        if layer_name == 'traffic_light':\n            return None\n\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.map_explorer.get_patch_coord(patch_box, patch_angle)\n\n        line_list = []\n        records = getattr(self.map_explorer.map_api, layer_name)\n        for record in records:\n            line = self.map_explorer.map_api.extract_line(record['line_token'])\n            if line.is_empty:  # Skip lines without nodes.\n                continue\n\n            new_line = line.intersection(patch)\n            if not new_line.is_empty:\n                new_line = affinity.rotate(new_line, -patch_angle, origin=(patch_x, patch_y), use_radians=False)\n                new_line = affinity.affine_transform(new_line,\n                                                     [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])\n                line_list.append(new_line)\n\n        return line_list\n\n    def get_contour_line(self,patch_box,patch_angle,layer_name):\n        if layer_name not in self.map_explorer.map_api.non_geometric_polygon_layers:\n            raise ValueError('{} is not a polygonal layer'.format(layer_name))\n\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.map_explorer.get_patch_coord(patch_box, patch_angle)\n\n        records = getattr(self.map_explorer.map_api, layer_name)\n\n        polygon_list = []\n        if layer_name == 'drivable_area':\n            for record in records:\n                polygons = [self.map_explorer.map_api.extract_polygon(polygon_token) for polygon_token in record['polygon_tokens']]\n\n                for polygon in polygons:\n                    new_polygon = polygon.intersection(patch)\n                    if not new_polygon.is_empty:\n                        new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                                      origin=(patch_x, patch_y), use_radians=False)\n                        new_polygon = affinity.affine_transform(new_polygon,\n                                                                [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])\n                        if new_polygon.geom_type == 'Polygon':\n                            new_polygon = MultiPolygon([new_polygon])\n                        polygon_list.append(new_polygon)\n\n        else:\n            for record in records:\n                polygon = self.map_explorer.map_api.extract_polygon(record['polygon_token'])\n\n                if polygon.is_valid:\n                    new_polygon = polygon.intersection(patch)\n                    if not new_polygon.is_empty:\n                        new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                                      origin=(patch_x, patch_y), use_radians=False)\n                        new_polygon = affinity.affine_transform(new_polygon,\n                                                                [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])\n                        if new_polygon.geom_type == 'Polygon':\n                            new_polygon = MultiPolygon([new_polygon])\n                        polygon_list.append(new_polygon)\n\n        return polygon_list\n\n\n    def get_ped_crossing_line(self, patch_box, patch_angle):\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.map_explorer.get_patch_coord(patch_box, patch_angle)\n        polygon_list = []\n        records = getattr(self.map_explorer.map_api, 'ped_crossing')\n        # records = getattr(self.nusc_maps[location], 'ped_crossing')\n        for record in records:\n            polygon = self.map_explorer.map_api.extract_polygon(record['polygon_token'])\n            if polygon.is_valid:\n                new_polygon = polygon.intersection(patch)\n                if not new_polygon.is_empty:\n                    new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                                      origin=(patch_x, patch_y), use_radians=False)\n                    new_polygon = affinity.affine_transform(new_polygon,\n                                                            [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])\n                    if new_polygon.geom_type == 'Polygon':\n                        new_polygon = MultiPolygon([new_polygon])\n                    polygon_list.append(new_polygon)\n\n        return polygon_list\n    \n    def _union_ped_stmmapnet(self, ped_geoms: List[Polygon]) -> List[Polygon]:\n        ''' merge close ped crossings.\n        \n        Args:\n            ped_geoms (list): list of Polygon\n        \n        Returns:\n            union_ped_geoms (Dict): merged ped crossings \n        '''\n\n        ped_geoms = sorted(ped_geoms, key=lambda x:x.area, reverse=True)\n\n        def get_rec_direction(geom):\n            rect = geom.minimum_rotated_rectangle\n            rect_v_p = np.array(rect.exterior.coords)[:3]\n            rect_v = rect_v_p[1:]-rect_v_p[:-1]\n            v_len = np.linalg.norm(rect_v, axis=-1)\n            longest_v_i = v_len.argmax()\n\n            return rect_v[longest_v_i], v_len[longest_v_i]\n\n        tree = strtree.STRtree(ped_geoms)\n        index_by_id = dict((id(pt), i) for i, pt in enumerate(ped_geoms))\n\n        final_pgeom = []\n        remain_idx = [i for i in range(len(ped_geoms))]\n        for i, pgeom in enumerate(ped_geoms):\n\n            if i not in remain_idx:\n                continue\n            # update\n            remain_idx.pop(remain_idx.index(i))\n            pgeom_v, pgeom_v_norm = get_rec_direction(pgeom)\n            final_pgeom.append(pgeom)\n\n            intersect_pgeom = tree.query(pgeom)\n            intersect_pgeom = sorted(intersect_pgeom, key=lambda x:x.area, reverse=True)\n            for o in intersect_pgeom:\n                o_idx = index_by_id[id(o)]\n                if o_idx not in remain_idx:\n                    continue\n\n                o_v, o_v_norm = get_rec_direction(o)\n                cos = pgeom_v.dot(o_v)/(pgeom_v_norm*o_v_norm)\n\n                o_pgeom_union = o.union(pgeom)\n                ch_union = o_pgeom_union.convex_hull\n                ch_area_ratio = o_pgeom_union.area / ch_union.area\n\n                # add an extra criterion for merging here to handle patch-boundary-case\n                if 1 - np.abs(cos) < 0.01 and ch_area_ratio > 0.8:  # theta < 8 degrees.\n                    final_pgeom[-1] =\\\n                        final_pgeom[-1].union(o)\n                    # update\n                    remain_idx.pop(remain_idx.index(o_idx))\n        \n        final_pgeom = self._handle_small_peds(final_pgeom)\n\n        results = []\n        for p in final_pgeom:\n            results.extend(split_collections(p))\n        return results\n    \n    def _handle_small_peds(self, ped_geoms):\n        def get_two_rec_directions(geom):\n            rect = geom.minimum_rotated_rectangle\n            rect_v_p = np.array(rect.exterior.coords)[:3]\n            rect_v = rect_v_p[1:]-rect_v_p[:-1]\n            v_len = np.linalg.norm(rect_v, axis=-1)\n            return rect_v, v_len\n\n        tree = strtree.STRtree(ped_geoms)\n        index_by_id = dict((id(pt), i) for i, pt in enumerate(ped_geoms))\n\n        final_pgeom = []\n        remain_idx = [i for i in range(len(ped_geoms))]\n\n        for i, pgeom in enumerate(ped_geoms):\n            if i not in remain_idx:\n                continue\n            # update\n            remain_idx.pop(remain_idx.index(i))\n            final_pgeom.append(pgeom)\n\n            pgeom_v, pgeom_v_norm = get_two_rec_directions(pgeom)\n            \n            intersect_pgeom = tree.query(pgeom)\n            intersect_pgeom = sorted(intersect_pgeom, key=lambda x:x.area, reverse=True)\n            for o in intersect_pgeom:\n                o_idx = index_by_id[id(o)]\n                if o_idx not in remain_idx:\n                    continue\n\n                if o.area >= pgeom.area:\n                    continue\n\n                o_pgeom_union = o.union(pgeom)\n                o_v, o_v_norm = get_two_rec_directions(o_pgeom_union)\n\n                ch_union = o_pgeom_union.convex_hull\n                ch_area_ratio = o_pgeom_union.area / ch_union.area\n                #mrr_union = o_pgeom_union.minimum_rotated_rectangle\n                #mrr_area_ratio = o_pgeom_union.area / mrr_union.area\n\n                cos_00 = pgeom_v[0].dot(o_v[0])/(pgeom_v_norm[0]*o_v_norm[0])\n                cos_01 = pgeom_v[0].dot(o_v[1])/(pgeom_v_norm[0]*o_v_norm[1])\n                cos_10 = pgeom_v[1].dot(o_v[0])/(pgeom_v_norm[1]*o_v_norm[0])\n                cos_11 = pgeom_v[1].dot(o_v[1])/(pgeom_v_norm[1]*o_v_norm[1])\n                cos_checks = np.array([(1 - np.abs(cos) < 0.001) for cos in [cos_00, cos_01, cos_10, cos_11]])\n                # add an extra criterion for merging here to handle patch-boundary-case\n\n                if cos_checks.sum() == 2 and ch_area_ratio > 0.8:\n                    final_pgeom[-1] =\\\n                        final_pgeom[-1].union(o)\n                    # update\n                    remain_idx.pop(remain_idx.index(o_idx))\n\n        return final_pgeom\n\n\n    def get_ped_crossing_line_stmmapnet(self, patch_box, patch_angle):\n        # get ped crossings\n        ped_crossings = []\n        ped = self.map_explorer._get_layer_polygon(\n                    patch_box, patch_angle, 'ped_crossing')\n                \n        for p in ped:\n            ped_crossings += split_collections(p)\n        # some ped crossings are split into several small parts\n        # we need to merge them\n        ped_crossings = self._union_ped_stmmapnet(ped_crossings)\n\n        # NOTE: clean-up noisy ped-crossing instances (for our cleaned training data only, maybe need to still\n        # use the original version when evaluation...)\n        # 1). filter too small ped_crossing merging results \n        #areas = [p.area for p in ped_crossings]\n        #print('Ped areas\\n', areas)\n        updated_ped_crossings = []\n        for p_idx, p in enumerate(ped_crossings):\n            area = p.area\n            if area < 1:\n                continue\n            elif area < 20:\n                covered = False\n                for other_idx, p_other in enumerate(ped_crossings):\n                    if other_idx != p_idx and p.covered_by(p_other):\n                        covered = True\n                        break\n                if not covered:\n                    updated_ped_crossings.append(p)\n            else:\n                updated_ped_crossings.append(p)\n\n        ped_crossing_lines = []\n        for p in updated_ped_crossings:\n            # extract exteriors to get a closed polyline                        \n            line = get_ped_crossing_contour(p, self.local_patch)\n            if line is not None:\n                ped_crossing_lines.append(line)\n    \n        return ped_crossing_lines\n\n    def line_geoms_to_instances(self, line_geom):\n        line_instances_dict = dict()\n        for line_type, a_type_of_lines in line_geom.items():\n            one_type_instances = self._one_type_line_geom_to_instances(a_type_of_lines)\n            line_instances_dict[line_type] = one_type_instances\n\n        return line_instances_dict\n\n    def _one_type_line_geom_to_instances(self, line_geom):\n        line_instances = []\n        \n        for line in line_geom:\n            if not line.is_empty:\n                if line.geom_type == 'MultiLineString':\n                    for single_line in line.geoms:\n                        line_instances.append(single_line)\n                elif line.geom_type == 'LineString':\n                    line_instances.append(line)\n                else:\n                    raise NotImplementedError\n        return line_instances\n\n    def ped_poly_geoms_to_instances(self, ped_geom):\n        # ped = ped_geom[0][1]\n        # import ipdb;ipdb.set_trace()\n        ped = ped_geom['ped_crossing']\n        union_segments = ops.unary_union(ped)\n        max_x = self.patch_size[1] / 2\n        max_y = self.patch_size[0] / 2\n        local_patch = box(-max_x - 0.2, -max_y - 0.2, max_x + 0.2, max_y + 0.2)\n        exteriors = []\n        interiors = []\n        if union_segments.geom_type != 'MultiPolygon':\n            union_segments = MultiPolygon([union_segments])\n        for poly in union_segments.geoms:\n            exteriors.append(poly.exterior)\n            for inter in poly.interiors:\n                interiors.append(inter)\n\n        results = []\n        for ext in exteriors:\n            if ext.is_ccw:\n                ext.coords = list(ext.coords)[::-1]\n            lines = ext.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        for inter in interiors:\n            if not inter.is_ccw:\n                inter.coords = list(inter.coords)[::-1]\n            lines = inter.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        return self._one_type_line_geom_to_instances(results)\n\n\n    def poly_geoms_to_instances(self, polygon_geom):\n        roads = polygon_geom['road_segment']\n        lanes = polygon_geom['lane']\n        # import ipdb;ipdb.set_trace()\n        union_roads = ops.unary_union(roads)\n        union_lanes = ops.unary_union(lanes)\n        union_segments = ops.unary_union([union_roads, union_lanes])\n        max_x = self.patch_size[1] / 2\n        max_y = self.patch_size[0] / 2\n        local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)\n        exteriors = []\n        interiors = []\n        if union_segments.geom_type != 'MultiPolygon':\n            union_segments = MultiPolygon([union_segments])\n        for poly in union_segments.geoms:\n            exteriors.append(poly.exterior)\n            for inter in poly.interiors:\n                interiors.append(inter)\n\n        results = []\n        for ext in exteriors:\n            if ext.is_ccw:\n                ext.coords = list(ext.coords)[::-1]\n            lines = ext.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        for inter in interiors:\n            if not inter.is_ccw:\n                inter.coords = list(inter.coords)[::-1]\n            lines = inter.intersection(local_patch)\n            if isinstance(lines, MultiLineString):\n                lines = ops.linemerge(lines)\n            results.append(lines)\n\n        return self._one_type_line_geom_to_instances(results)\n\n    def centerline_geoms_to_instances(self,geoms_dict):\n        centerline_geoms_list,pts_G = self.union_centerline(geoms_dict)\n        # vectors_dict = self.centerline_geoms2vec(centerline_geoms_list)\n        # import ipdb;ipdb.set_trace()\n        return self._one_type_line_geom_to_instances(centerline_geoms_list)\n\n\n    def centerline_geoms2vec(self, centerline_geoms_list):\n        vector_dict = {}\n        # import ipdb;ipdb.set_trace()\n        # centerline_geoms_list = [line.simplify(0.2, preserve_topology=True) \\\n        #                         for line in centerline_geoms_list]\n        vectors = self._geom_to_vectors(\n            centerline_geoms_list)\n        vector_dict.update({'centerline': ('centerline', vectors)})\n        return vector_dict\n\n    def union_centerline(self, centerline_geoms):\n        # import ipdb;ipdb.set_trace()\n        pts_G = nx.DiGraph()\n        junction_pts_list = []\n        for key, value in centerline_geoms.items():\n            centerline_geom = value['centerline']\n            if centerline_geom.geom_type == 'MultiLineString':\n                start_pt = np.array(centerline_geom.geoms[0].coords).round(3)[0]\n                end_pt = np.array(centerline_geom.geoms[-1].coords).round(3)[-1]\n                for single_geom in centerline_geom.geoms:\n                    single_geom_pts = np.array(single_geom.coords).round(3)\n                    for idx, pt in enumerate(single_geom_pts[:-1]):\n                        pts_G.add_edge(tuple(single_geom_pts[idx]),tuple(single_geom_pts[idx+1]))\n            elif centerline_geom.geom_type == 'LineString':\n                centerline_pts = np.array(centerline_geom.coords).round(3)\n                start_pt = centerline_pts[0]\n                end_pt = centerline_pts[-1]\n                for idx, pts in enumerate(centerline_pts[:-1]):\n                    pts_G.add_edge(tuple(centerline_pts[idx]),tuple(centerline_pts[idx+1]))\n            else:\n                raise NotImplementedError\n            valid_incoming_num = 0\n            for idx, pred in enumerate(value['incoming_tokens']):\n                if pred in centerline_geoms.keys():\n                    valid_incoming_num += 1\n                    pred_geom = centerline_geoms[pred]['centerline']\n                    if pred_geom.geom_type == 'MultiLineString':\n                        pred_pt = np.array(pred_geom.geoms[-1].coords).round(3)[-1]\n        #                 if pred_pt != centerline_pts[0]:\n                        pts_G.add_edge(tuple(pred_pt), tuple(start_pt))\n                    else:\n                        pred_pt = np.array(pred_geom.coords).round(3)[-1]\n                        pts_G.add_edge(tuple(pred_pt), tuple(start_pt))\n            if valid_incoming_num > 1:\n                junction_pts_list.append(tuple(start_pt))\n            \n            valid_outgoing_num = 0\n            for idx, succ in enumerate(value['outgoing_tokens']):\n                if succ in centerline_geoms.keys():\n                    valid_outgoing_num += 1\n                    succ_geom = centerline_geoms[succ]['centerline']\n                    if succ_geom.geom_type == 'MultiLineString':\n                        succ_pt = np.array(succ_geom.geoms[0].coords).round(3)[0]\n        #                 if pred_pt != centerline_pts[0]:\n                        pts_G.add_edge(tuple(end_pt), tuple(succ_pt))\n                    else:\n                        succ_pt = np.array(succ_geom.coords).round(3)[0]\n                        pts_G.add_edge(tuple(end_pt), tuple(succ_pt))\n            if valid_outgoing_num > 1:\n                junction_pts_list.append(tuple(end_pt))\n\n        roots = (v for v, d in pts_G.in_degree() if d == 0)\n        leaves = [v for v, d in pts_G.out_degree() if d == 0]\n        all_paths = []\n        for root in roots:\n            paths = nx.all_simple_paths(pts_G, root, leaves)\n            all_paths.extend(paths)\n\n        final_centerline_paths = []\n        for path in all_paths:\n            merged_line = LineString(path)\n            merged_line = merged_line.simplify(0.2, preserve_topology=True)\n            final_centerline_paths.append(merged_line)\n        return final_centerline_paths, pts_G\n\n\nclass CNuScenesMapExplorer(NuScenesMapExplorer):\n    def __ini__(self, *args, **kwargs):\n        super(self, CNuScenesMapExplorer).__init__(*args, **kwargs)\n\n    def _get_centerline(self,\n                           patch_box: Tuple[float, float, float, float],\n                           patch_angle: float,\n                           layer_name: str,\n                           return_token: bool = False) -> dict:\n        \"\"\"\n         Retrieve the centerline of a particular layer within the specified patch.\n         :param patch_box: Patch box defined as [x_center, y_center, height, width].\n         :param patch_angle: Patch orientation in degrees.\n         :param layer_name: name of map layer to be extracted.\n         :return: dict(token:record_dict, token:record_dict,...)\n         \"\"\"\n        if layer_name not in ['lane','lane_connector']:\n            raise ValueError('{} is not a centerline layer'.format(layer_name))\n\n        patch_x = patch_box[0]\n        patch_y = patch_box[1]\n\n        patch = self.get_patch_coord(patch_box, patch_angle)\n\n        records = getattr(self.map_api, layer_name)\n\n        centerline_dict = dict()\n        for record in records:\n            if record['polygon_token'] is None:\n                # import ipdb\n                # ipdb.set_trace()\n                continue\n            polygon = self.map_api.extract_polygon(record['polygon_token'])\n\n            # if polygon.intersects(patch) or polygon.within(patch):\n            #     if not polygon.is_valid:\n            #         print('within: {}, intersect: {}'.format(polygon.within(patch), polygon.intersects(patch)))\n            #         print('polygon token {} is_valid: {}'.format(record['polygon_token'], polygon.is_valid))\n\n            # polygon = polygon.buffer(0)\n\n            if polygon.is_valid:\n                # if within or intersect :\n\n                new_polygon = polygon.intersection(patch)\n                # new_polygon = polygon\n\n                if not new_polygon.is_empty:\n                    centerline = self.map_api.discretize_lanes(\n                            record, 0.5)\n                    centerline = list(self.map_api.discretize_lanes([record['token']], 0.5).values())[0]\n                    centerline = LineString(np.array(centerline)[:,:2].round(3))\n                    if centerline.is_empty:\n                        continue\n                    centerline = centerline.intersection(patch)\n                    if not centerline.is_empty:\n                        centerline = \\\n                            to_patch_coord(centerline, patch_angle, patch_x, patch_y)\n                        \n                        # centerline.coords = np.array(centerline.coords).round(3)\n                        # if centerline.geom_type != 'LineString':\n                            # import ipdb;ipdb.set_trace()\n                        record_dict = dict(\n                            centerline=centerline,\n                            token=record['token'],\n                            incoming_tokens=self.map_api.get_incoming_lane_ids(record['token']),\n                            outgoing_tokens=self.map_api.get_outgoing_lane_ids(record['token']),\n                        )\n                        centerline_dict.update({record['token']: record_dict})\n        return centerline_dict\n\ndef to_patch_coord(new_polygon, patch_angle, patch_x, patch_y):\n    new_polygon = affinity.rotate(new_polygon, -patch_angle,\n                                  origin=(patch_x, patch_y), use_radians=False)\n    new_polygon = affinity.affine_transform(new_polygon,\n                                            [1.0, 0.0, 0.0, 1.0, -patch_x, -patch_y])\n    return new_polygon"
  },
  {
    "path": "plugin/datasets/map_utils/utils.py",
    "content": "from shapely.geometry import LineString, box, Polygon, LinearRing\nfrom shapely.geometry.base import BaseGeometry\nfrom shapely import ops\nimport numpy as np\nfrom scipy.spatial import distance\nfrom typing import List, Optional, Tuple\nfrom numpy.typing import NDArray\n\ndef split_collections(geom: BaseGeometry) -> List[Optional[BaseGeometry]]:\n    ''' Split Multi-geoms to list and check is valid or is empty.\n        \n    Args:\n        geom (BaseGeometry): geoms to be split or validate.\n    \n    Returns:\n        geometries (List): list of geometries.\n    '''\n    assert geom.geom_type in ['MultiLineString', 'LineString', 'MultiPolygon', \n        'Polygon', 'GeometryCollection'], f\"got geom type {geom.geom_type}\"\n    if 'Multi' in geom.geom_type:\n        outs = []\n        for g in geom.geoms:\n            if g.is_valid and not g.is_empty:\n                outs.append(g)\n        return outs\n    else:\n        if geom.is_valid and not geom.is_empty:\n            return [geom,]\n        else:\n            return []\n\ndef get_drivable_area_contour(drivable_areas: List[Polygon], \n                              roi_size: Tuple) -> List[LineString]:\n    ''' Extract drivable area contours to get list of boundaries.\n\n    Args:\n        drivable_areas (list): list of drivable areas.\n        roi_size (tuple): bev range size\n    \n    Returns:\n        boundaries (List): list of boundaries.\n    '''\n    max_x = roi_size[0] / 2\n    max_y = roi_size[1] / 2\n\n    # a bit smaller than roi to avoid unexpected boundaries on edges\n    local_patch = box(-max_x + 0.2, -max_y + 0.2, max_x - 0.2, max_y - 0.2)\n    \n    exteriors = []\n    interiors = []\n    \n    for poly in drivable_areas:\n        exteriors.append(poly.exterior)\n        for inter in poly.interiors:\n            interiors.append(inter)\n    \n    results = []\n    for ext in exteriors:\n        # NOTE: we make sure all exteriors are clock-wise\n        # such that each boundary's right-hand-side is drivable area\n        # and left-hand-side is walk way\n        \n        if ext.is_ccw:\n            ext = LinearRing(list(ext.coords)[::-1])\n        lines = ext.intersection(local_patch)\n        if lines.geom_type == 'GeometryCollection' and len(lines) == 0:\n            continue\n        if lines.geom_type == 'MultiLineString':\n            lines = ops.linemerge(lines)\n        assert lines.geom_type in ['MultiLineString', 'LineString']\n        \n        results.extend(split_collections(lines))\n\n    for inter in interiors:\n        # NOTE: we make sure all interiors are counter-clock-wise\n        if not inter.is_ccw:\n            inter = LinearRing(list(inter.coords)[::-1])\n        lines = inter.intersection(local_patch)\n        if lines.geom_type == 'GeometryCollection' and len(lines) == 0:\n            continue\n        if lines.geom_type == 'MultiLineString':\n            lines = ops.linemerge(lines)\n        assert lines.geom_type in ['MultiLineString', 'LineString']\n        \n        results.extend(split_collections(lines))\n\n    return results\n\ndef get_ped_crossing_contour(polygon: Polygon, \n                             local_patch: box) -> Optional[LineString]:\n    ''' Extract ped crossing contours to get a closed polyline.\n    Different from `get_drivable_area_contour`, this function ensures a closed polyline.\n\n    Args:\n        polygon (Polygon): ped crossing polygon to be extracted.\n        local_patch (tuple): local patch params\n    \n    Returns:\n        line (LineString): a closed line\n    '''\n\n    ext = polygon.exterior\n    if not ext.is_ccw:\n        ext = LinearRing(list(ext.coords)[::-1])\n    lines = ext.intersection(local_patch)\n    if lines.type != 'LineString':\n        # remove points in intersection results\n        lines = [l for l in lines.geoms if l.geom_type != 'Point']\n        lines = ops.linemerge(lines)\n        \n        # same instance but not connected.\n        if lines.type != 'LineString':\n            ls = []\n            for l in lines.geoms:\n                ls.append(np.array(l.coords))\n            \n            lines = np.concatenate(ls, axis=0)\n            lines = LineString(lines)\n\n    if not lines.is_empty:\n        start = list(lines.coords[0])\n        end = list(lines.coords[-1])\n        if not np.allclose(start, end, atol=1e-3):\n            new_line = list(lines.coords)\n            new_line.append(start)\n            lines = LineString(new_line) # make ped cross closed\n        return lines\n    \n    return None\n\ndef remove_repeated_lines(lines: List[LineString]) -> List[LineString]:\n    ''' Remove repeated dividers since each divider in argoverse2 is mentioned twice\n    by both left lane and right lane.\n\n    Args:\n        lines (List): list of dividers\n\n    Returns:\n        lines (List): list of left dividers\n    '''\n\n    new_lines = []\n    for line in lines:\n        repeated = False\n        for l in new_lines:\n            length = min(line.length, l.length)\n            \n            # hand-crafted rule to check overlap\n            # if line.buffer(0.01).intersection(l.buffer(0.01)).area \\\n            #         > 0.2 * length:\n            #     repeated = True\n            #     break\n            area1 = line.buffer(0.1)\n            area2 = l.buffer(0.1)\n            inter = area1.intersection(area2).area\n            union = area1.union(area2).area\n            iou = inter / union\n            if iou >= 0.90:\n                repeated = True\n                break\n        \n        if not repeated:\n            new_lines.append(line)\n    \n    return new_lines\n\ndef remove_repeated_lanesegment(lane_dict):\n    ''' Remove repeated dividers since each divider in argoverse2 is mentioned twice\n    by both left lane and right lane.\n\n    Args:\n        lines (List): list of dividers\n\n    Returns:\n        lines (List): list of left dividers\n    '''\n\n    new_lane_dict = {}\n    # for line in lines:\n    for key, value in lane_dict.items():\n        repeated = False\n        # for l in new_lines:\n        for new_key, new_value in new_lane_dict.items():\n            # length = min(line.length, l.length)\n            line = LineString(value['polyline'].xyz)\n            l = LineString(new_value['polyline'].xyz)\n            \n            area1 = line.buffer(0.01)\n            area2 = l.buffer(0.01)\n            inter = area1.intersection(area2).area\n            union = area1.union(area2).area\n            iou = inter / union\n            if iou >= 0.90:\n                repeated = True\n                break\n        \n        if not repeated:\n            new_lane_dict[key] = value\n    \n    return new_lane_dict\n\n\ndef reassign_graph_attribute(lane_dict):\n    for key, value in lane_dict.items():\n        if len(value['predecessors']) > 0:\n            if value['predecessors'][0] not in lane_dict.keys() or value['predecessors'][0]==key:\n                value['predecessors'] = []\n            else:\n                lane_dict[value['predecessors'][0]]['successors']  = [key]\n    for key, value in lane_dict.items():\n        if len(value['successors']) > 0:\n            if value['successors'][0] not in lane_dict.keys() or value['successors'][0]==key:\n                value['successors'] = []\n            else:\n                lane_dict[value['successors'][0]]['predecessors']  = [key]\n\n    return lane_dict\n\n\ndef remove_boundary_dividers(dividers: List[LineString], \n                             boundaries: List[LineString]) -> List[LineString]:\n    ''' Some dividers overlaps with boundaries in argoverse2 dataset so\n    we need to remove these dividers.\n\n    Args:\n        dividers (list): list of dividers\n        boundaries (list): list of boundaries\n\n    Returns:\n        left_dividers (list): list of left dividers\n    '''\n\n    for idx in range(len(dividers))[::-1]:\n        divider = dividers[idx]\n        \n        for bound in boundaries:\n            length = min(divider.length, bound.length)\n\n            # hand-crafted rule to check overlap\n            if divider.buffer(0.3).intersection(bound.buffer(0.3)).area \\\n                    > 0.2 * length:\n                # the divider overlaps boundary\n                dividers.pop(idx)\n                break\n\n    return dividers\n\ndef connect_lines(lines: List[LineString]) -> List[LineString]:\n    ''' Some dividers are split into multiple small parts\n    so we need to connect these lines.\n\n    Args:\n        dividers (list): list of dividers\n        boundaries (list): list of boundaries\n\n    Returns:\n        left_dividers (list): list of left dividers\n    '''\n\n    new_lines = []\n    eps = 0.1 # threshold to identify continuous lines\n    while len(lines) > 1:\n        line1 = lines[0]\n        merged_flag = False\n        for i, line2 in enumerate(lines[1:]):\n            # hand-crafted rule\n            begin1 = list(line1.coords)[0]\n            end1 = list(line1.coords)[-1]\n            begin2 = list(line2.coords)[0]\n            end2 = list(line2.coords)[-1]\n\n            dist_matrix = distance.cdist([begin1, end1], [begin2, end2])\n            if dist_matrix[0, 0] < eps:\n                coords = list(line2.coords)[::-1] + list(line1.coords)\n            elif dist_matrix[0, 1] < eps:\n                coords = list(line2.coords) + list(line1.coords)\n            elif dist_matrix[1, 0] < eps:\n                coords = list(line1.coords) + list(line2.coords)\n            elif dist_matrix[1, 1] < eps:\n                coords = list(line1.coords) + list(line2.coords)[::-1]\n            else: continue\n\n            new_line = LineString(coords)\n            lines.pop(i + 1)\n            lines[0] = new_line\n            merged_flag = True\n            break\n        \n        if merged_flag: continue\n\n        new_lines.append(line1)\n        lines.pop(0)\n\n    if len(lines) == 1:\n        new_lines.append(lines[0])\n\n    return new_lines\n\ndef transform_from(xyz: NDArray, \n                   translation: NDArray, \n                   rotation: NDArray) -> NDArray:\n    ''' Transform points between different coordinate system.\n\n    Args:\n        xyz (array): original point coordinates\n        translation (array): translation\n        rotation (array): rotation matrix\n\n    Returns:\n        left_dividers (list): list of left dividers\n    '''\n    \n    new_xyz = xyz @ rotation.T + translation\n    return new_xyz\n"
  },
  {
    "path": "plugin/datasets/nusc_dataset.py",
    "content": "from.base_dataset import BaseMapDataset\nfrom .map_utils.nuscmap_extractor import NuscMapExtractor\nfrom mmdet.datasets import DATASETS\nimport numpy as np\nfrom .visualize.renderer import Renderer\nimport mmcv\nfrom time import time\nfrom pyquaternion import Quaternion\nimport pickle\n\n\n@DATASETS.register_module()\nclass NuscDataset(BaseMapDataset):\n    \"\"\"NuScenes map dataset class.\n\n    Args:\n        ann_file (str): annotation file path\n        cat2id (dict): category to class id\n        roi_size (tuple): bev range\n        eval_config (Config): evaluation config\n        meta (dict): meta information\n        pipeline (Config): data processing pipeline config\n        interval (int): annotation load interval\n        work_dir (str): path to work dir\n        test_mode (bool): whether in test mode\n    \"\"\"\n    \n    def __init__(self, data_root, **kwargs):\n        super().__init__(**kwargs)\n        self.map_extractor = NuscMapExtractor(data_root, self.roi_size)\n        self.renderer = Renderer(self.cat2id, self.roi_size, 'nusc')\n    \n    def load_annotations(self, ann_file):\n        \"\"\"Load annotations from ann_file.\n\n        Args:\n            ann_file (str): Path of the annotation file.\n\n        Returns:\n            list[dict]: List of annotations.\n        \"\"\"\n        \n        start_time = time()\n        ann = mmcv.load(ann_file)\n        samples = ann[::self.interval]\n        \n        print(f'collected {len(samples)} samples in {(time() - start_time):.2f}s')\n        self.samples = samples\n    \n    def load_matching(self, matching_file):\n        with open(matching_file, 'rb') as pf:\n            data = pickle.load(pf)\n        total_samples = 0\n        for scene_name, info in data.items():\n            total_samples += len(info['sample_ids'])\n        assert total_samples == len(self.samples), 'Matching info not matched with data samples'\n        self.matching_meta = data\n        print(f'loaded matching meta for {len(data)} scenes')\n\n    def get_sample(self, idx):\n        \"\"\"Get data sample. For each sample, map extractor will be applied to extract \n        map elements. \n\n        Args:\n            idx (int): data index\n\n        Returns:\n            result (dict): dict of input\n        \"\"\"\n\n        sample = self.samples[idx]\n        location = sample['location']\n\n        lidar2ego = np.eye(4)\n        lidar2ego[:3,:3] = Quaternion(sample['lidar2ego_rotation']).rotation_matrix\n        lidar2ego[:3, 3] = sample['lidar2ego_translation']\n\n        ego2global = np.eye(4)\n        ego2global[:3,:3] = Quaternion(sample['e2g_rotation']).rotation_matrix\n        ego2global[:3, 3] = sample['e2g_translation']\n\n        # NOTE: The original StreamMapNet uses the ego location to query the map,\n        # to align with the lidar-centered setting in MapTR, we made some modifiactions \n        # here to switch to the lidar-center setting\n        lidar2global = ego2global @ lidar2ego\n        lidar2global_translation = list(lidar2global[:3, 3])\n        lidar2global_translation = [float(x) for x in lidar2global_translation]\n        lidar2global_rotation = list(Quaternion(matrix=lidar2global).q)\n\n        map_geoms = self.map_extractor.get_map_geom(location, lidar2global_translation, \n                lidar2global_rotation)\n        \n        lidar_shifted_e2g_translation = np.array(sample['e2g_translation'])\n        lidar_shifted_e2g_translation[0] = lidar2global_translation[0]\n        lidar_shifted_e2g_translation[1] = lidar2global_translation[1]\n        lidar_shifted_e2g_translation = lidar_shifted_e2g_translation.tolist()\n        e2g_rotation = sample['e2g_rotation']\n\n        lidar2global = np.eye(4)\n        lidar2global[:3,:3] = Quaternion(e2g_rotation).rotation_matrix\n        lidar2global[:3, 3] = lidar_shifted_e2g_translation\n        global2lidar = np.linalg.inv(lidar2global)\n        \n        ego2lidar = global2lidar  @ ego2global\n\n        map_label2geom = {}\n        for k, v in map_geoms.items():\n            if k in self.cat2id.keys():\n                map_label2geom[self.cat2id[k]] = v\n        \n        ego2img_rts = []\n        ego2cam_rts = []\n        for c in sample['cams'].values():\n            extrinsic, intrinsic = np.array(\n                c['extrinsics']), np.array(c['intrinsics'])\n\n            # ego coord to cam coord\n            #ego2cam_rt = extrinsic\n\n            cam2ego_rt = np.linalg.inv(extrinsic)\n            cam2lidar_rt = ego2lidar @ cam2ego_rt\n            lidar2cam_rt = np.linalg.inv(cam2lidar_rt)\n            ego2cam_rt = lidar2cam_rt\n\n            viewpad = np.eye(4)\n            viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic\n\n            ego2img_rt = (viewpad @ ego2cam_rt)\n            ego2cam_rts.append(ego2cam_rt)\n            ego2img_rts.append(ego2img_rt)\n\n\n        input_dict = {\n            'location': location,\n            'token': sample['token'],\n            'img_filenames': [c['img_fpath'] for c in sample['cams'].values()],\n            # intrinsics are 3x3 Ks\n            'cam_intrinsics': [c['intrinsics'] for c in sample['cams'].values()],\n            # extrinsics are 4x4 tranform matrix, **ego2cam**\n            'cam_extrinsics': [c['extrinsics'] for c in sample['cams'].values()],\n            'ego2img': ego2img_rts,\n            'ego2cam': ego2cam_rts,\n            'map_geoms': map_label2geom, # {0: List[ped_crossing(LineString)], 1: ...}\n            #'ego2global_translation': sample['e2g_translation'], \n            #'ego2global_rotation': Quaternion(sample['e2g_rotation']).rotation_matrix.tolist(),\n            'ego2global_translation': lidar_shifted_e2g_translation, \n            'ego2global_rotation': Quaternion(e2g_rotation).rotation_matrix.tolist(),\n            'sample_idx': sample['sample_idx'],\n            'scene_name': sample['scene_name'],\n            'lidar2ego_translation': sample['lidar2ego_translation'],\n            'lidar2ego_rotation': sample['lidar2ego_rotation'],\n        }\n\n        return input_dict"
  },
  {
    "path": "plugin/datasets/pipelines/__init__.py",
    "content": "from .loading import LoadMultiViewImagesFromFiles\nfrom .formating import FormatBundleMap\nfrom .transform import ResizeMultiViewImages, PadMultiViewImages, Normalize3D, PhotoMetricDistortionMultiViewImage\nfrom .rasterize import RasterizeMap, PV_Map\nfrom .vectorize import VectorizeMap\n\n__all__ = [\n    'LoadMultiViewImagesFromFiles',\n    'FormatBundleMap', 'Normalize3D', 'ResizeMultiViewImages', 'PadMultiViewImages',\n    'RasterizeMap', 'PV_Map', 'VectorizeMap', 'PhotoMetricDistortionMultiViewImage'\n]"
  },
  {
    "path": "plugin/datasets/pipelines/formating.py",
    "content": "import numpy as np\nfrom mmcv.parallel import DataContainer as DC\n\nfrom mmdet3d.core.points import BasePoints\nfrom mmdet.datasets.builder import PIPELINES\nfrom mmdet.datasets.pipelines import to_tensor\n\n@PIPELINES.register_module()\nclass FormatBundleMap(object):\n    \"\"\"Format data for map tasks and then collect data for model input.\n\n    These fields are formatted as follows.\n\n    - img: (1) transpose, (2) to tensor, (3) to DataContainer (stack=True)\n    - semantic_mask (if exists): (1) to tensor, (2) to DataContainer (stack=True)\n    - vectors (if exists): (1) to DataContainer (cpu_only=True)\n    - img_metas: (1) to DataContainer (cpu_only=True)\n    \"\"\"\n\n    def __init__(self, process_img=True, \n                keys=['img', 'semantic_mask', 'vectors'], \n                meta_keys=['intrinsics', 'extrinsics']):\n        \n        self.process_img = process_img\n        self.keys = keys\n        self.meta_keys = meta_keys\n\n    def __call__(self, results):\n        \"\"\"Call function to transform and format common fields in results.\n\n        Args:\n            results (dict): Result dict contains the data to convert.\n\n        Returns:\n            dict: The result dict contains the data that is formatted with\n                default bundle.\n        \"\"\"\n        # Format 3D data\n        if 'points' in results:\n            assert isinstance(results['points'], BasePoints)\n            results['points'] = DC(results['points'].tensor)\n\n        for key in ['voxels', 'coors', 'voxel_centers', 'num_points']:\n            if key not in results:\n                continue\n            results[key] = DC(to_tensor(results[key]), stack=False)\n\n        if 'img' in results and self.process_img:\n            if isinstance(results['img'], list):\n                # process multiple imgs in single frame\n                imgs = [img.transpose(2, 0, 1) for img in results['img']]\n                imgs = np.ascontiguousarray(np.stack(imgs, axis=0))\n                results['img'] = DC(to_tensor(imgs), stack=True)\n            else:\n                img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))\n                results['img'] = DC(to_tensor(img), stack=True)\n        \n        if 'semantic_mask' in results:\n            #results['semantic_mask'] = DC(to_tensor(results['semantic_mask']), stack=True)\n            if isinstance(results['semantic_mask'], np.ndarray):\n                results['semantic_mask'] = DC(to_tensor(results['semantic_mask']), stack=True,\n                                              pad_dims=None)\n            else:\n                assert isinstance(results['semantic_mask'], list)\n                results['semantic_mask'] = DC(results['semantic_mask'], stack=False)\n\n        if 'vectors' in results:\n            # vectors may have different sizes\n            vectors = results['vectors']\n            results['vectors'] = DC(vectors, stack=False, cpu_only=True)\n        \n        if 'polys' in results:\n            results['polys'] = DC(results['polys'], stack=False, cpu_only=True)\n        \n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(process_img={self.process_img}, '\n        return repr_str\n"
  },
  {
    "path": "plugin/datasets/pipelines/loading.py",
    "content": "import mmcv\nimport numpy as np\nfrom mmdet.datasets.builder import PIPELINES\n\n@PIPELINES.register_module(force=True)\nclass LoadMultiViewImagesFromFiles(object):\n    \"\"\"Load multi channel images from a list of separate channel files.\n\n    Expects results['img_filename'] to be a list of filenames.\n\n    Args:\n        to_float32 (bool): Whether to convert the img to float32.\n            Defaults to False.\n        color_type (str): Color type of the file. Defaults to 'unchanged'.\n    \"\"\"\n\n    def __init__(self, to_float32=False, color_type='unchanged'):\n        self.to_float32 = to_float32\n        self.color_type = color_type\n\n    def __call__(self, results):\n        \"\"\"Call function to load multi-view image from files.\n\n        Args:\n            results (dict): Result dict containing multi-view image filenames.\n\n        Returns:\n            dict: The result dict containing the multi-view image data. \\\n                Added keys and values are described below.\n\n                - filename (str): Multi-view image filenames.\n                - img (np.ndarray): Multi-view image arrays.\n                - img_shape (tuple[int]): Shape of multi-view image arrays.\n                - ori_shape (tuple[int]): Shape of original image arrays.\n                - pad_shape (tuple[int]): Shape of padded image arrays.\n                - scale_factor (float): Scale factor.\n                - img_norm_cfg (dict): Normalization configuration of images.\n        \"\"\"\n        filename = results['img_filenames']\n        img = [mmcv.imread(name, self.color_type) for name in filename]\n        if self.to_float32:\n            img = [i.astype(np.float32) for i in img]\n        results['img'] = img\n        results['img_shape'] = [i.shape for i in img]\n        results['ori_shape'] = [i.shape for i in img]\n        # Set initial values for default meta_keys\n        results['pad_shape'] = [i.shape for i in img]\n        # results['scale_factor'] = 1.0\n        num_channels = 1 if len(img[0].shape) < 3 else img[0].shape[2]\n        results['img_norm_cfg'] = dict(\n            mean=np.zeros(num_channels, dtype=np.float32),\n            std=np.ones(num_channels, dtype=np.float32),\n            to_rgb=False)\n        results['img_fields'] = ['img']\n        return results\n\n    def __repr__(self):\n        \"\"\"str: Return a string that describes the module.\"\"\"\n        return f'{self.__class__.__name__} (to_float32={self.to_float32}, '\\\n            f\"color_type='{self.color_type}')\"\n"
  },
  {
    "path": "plugin/datasets/pipelines/rasterize.py",
    "content": "import numpy as np\nfrom mmdet.datasets.builder import PIPELINES\nfrom shapely.geometry import LineString, Polygon\nfrom shapely import affinity\nimport cv2\nfrom PIL import Image, ImageDraw\nfrom numpy.typing import NDArray\nfrom typing import List, Tuple, Union, Dict\nimport torch\n\nimport pdb\n\n@PIPELINES.register_module(force=True)\nclass RasterizeMap(object):\n    \"\"\"Generate rasterized semantic map and put into \n    `semantic_mask` key.\n\n    Args:\n        roi_size (tuple or list): bev range\n        canvas_size (tuple or list): bev feature size\n        thickness (int): thickness of rasterized lines\n        coords_dim (int): dimension of point coordinates\n    \"\"\"\n\n    def __init__(self, \n                 roi_size: Union[Tuple, List], \n                 canvas_size: Union[Tuple, List], \n                 thickness: int, \n                 coords_dim: int,\n                 semantic_mask=False,\n                 ):\n\n        self.roi_size = roi_size\n        self.canvas_size = canvas_size\n        self.scale_x = self.canvas_size[0] / self.roi_size[0]\n        self.scale_y = self.canvas_size[1] / self.roi_size[1]\n        self.thickness = thickness\n        self.coords_dim = coords_dim\n        self.semantic_mask = semantic_mask\n\n    def line_ego_to_mask(self, \n                         line_ego: LineString, \n                         mask: NDArray, \n                         color: int=1, \n                         thickness: int=3,\n                         fill_poly=False\n                        ) -> None:\n        # \"\"\"Rasterize a single line to mask.\n        # Args:\n        #     line_ego (LineString): line\n        #     mask (array): semantic mask to paint on\n        #     color (int): positive label, default: 1\n        #     thickness (int): thickness of rasterized lines, default: 3\n        # \"\"\"\n\n        trans_x = self.canvas_size[0] / 2\n        trans_y = self.canvas_size[1] / 2\n        line_ego = affinity.scale(line_ego, self.scale_x, self.scale_y, origin=(0, 0))\n        line_ego = affinity.affine_transform(line_ego, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])\n        \n        coords = np.array(list(line_ego.coords), dtype=np.int32)[:, :2]\n        coords = coords.reshape((-1, 2))\n        assert len(coords) >= 2\n        \n        if fill_poly:\n            cv2.fillPoly(mask, np.int32([coords]), color=color)\n        else:\n            cv2.polylines(mask, np.int32([coords]), False, color=color, thickness=thickness)\n\n        \n    def polygons_ego_to_mask(self, \n                             polygons: List[Polygon], \n                             color: int=1) -> NDArray:\n        # ''' Rasterize a polygon to mask.\n        \n        # Args:\n        #     polygons (list): list of polygons\n        #     color (int): positive label, default: 1\n        \n        # Returns:\n        #     mask (array): mask with rasterize polygons\n        # '''\n\n        #mask = Image.new(\"L\", size=(self.canvas_size[0], self.canvas_size[1]), color=0) \n        # Image lib api expect size as (w, h)\n        trans_x = self.canvas_size[0] / 2\n        trans_y = self.canvas_size[1] / 2\n        masks = []\n        for polygon in polygons:\n            mask = Image.new(\"L\", size=(self.canvas_size[0], self.canvas_size[1]), color=0) \n            polygon = affinity.scale(polygon, self.scale_x, self.scale_y, origin=(0, 0))\n            polygon = affinity.affine_transform(polygon, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y])\n            ext = np.array(polygon.exterior.coords)[:, :2]\n            vert_list = [(x, y) for x, y in ext]\n\n            ImageDraw.Draw(mask).polygon(vert_list, outline=1, fill=color)\n            masks.append(mask)\n\n        #return np.array(mask, np.uint8)\n        return masks\n    \n    def get_semantic_mask(self, map_geoms: Dict) -> NDArray:\n        # ''' Rasterize all map geometries to semantic mask.\n        \n        # Args:\n        #     map_geoms (dict): map geoms by class\n        \n        # Returns:\n        #     semantic_mask (array): semantic mask\n        # '''\n\n        num_classes = len(map_geoms)\n        if self.semantic_mask:\n            semantic_mask = np.zeros((num_classes, self.canvas_size[1], self.canvas_size[0]), dtype=np.uint8)\n        else:\n            instance_masks = []\n\n        for label, geom_list in map_geoms.items():\n            if len(geom_list) == 0:\n                continue\n            if geom_list[0].geom_type == 'LineString':\n                for line in geom_list:\n                    if self.semantic_mask:\n                        fill_poly = True if label == 0 else False\n                        self.line_ego_to_mask(line, semantic_mask[label], color=1,\n                                            thickness=self.thickness, fill_poly=fill_poly)\n                    else:\n                        canvas = np.zeros((self.canvas_size[1], self.canvas_size[0]), dtype=np.uint8)\n                        self.line_ego_to_mask(line, canvas, color=1,\n                            thickness=self.thickness, fill_poly=False)\n                        instance_masks.append([canvas, label])\n            elif geom_list[0].geom_type == 'Polygon':\n                # drivable area \n                polygons = []\n                for polygon in geom_list:\n                    polygons.append(polygon)\n                if self.semantic_mask:\n                    semantic_mask[label] = self.polygons_ego_to_mask(polygons, color=1)\n                else:\n                    polygon_masks = self.polygons_ego_to_mask(polygons, color=1)\n                    for mask in polygon_masks:\n                        instance_masks.append([mask, label])\n            else:\n                raise ValueError('map geoms must be either LineString or Polygon!')\n        \n        if self.semantic_mask:\n            semantic_mask = np.ascontiguousarray(semantic_mask)\n            return semantic_mask\n        else:\n            return instance_masks\n\n    def __call__(self, input_dict: Dict) -> Dict:\n        map_geoms = input_dict['map_geoms'] # {0: List[ped_crossing: LineString], 1: ...}\n\n        semantic_mask = self.get_semantic_mask(map_geoms)\n        input_dict['semantic_mask'] = semantic_mask # (num_class, canvas_size[1], canvas_size[0])\n        return input_dict\n    \n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(roi_size={self.roi_size}, '\n        repr_str += f'canvas_size={self.canvas_size}), '\n        repr_str += f'thickness={self.thickness}), ' \n        repr_str += f'coords_dim={self.coords_dim})'\n\n        return repr_str\n\n\n@PIPELINES.register_module(force=True)\nclass PV_Map(object):\n    \"\"\"Generate rasterized semantic map and put into \n    `semantic_mask` key.\n\n    Args:\n        roi_size (tuple or list): bev range\n        canvas_size (tuple or list): bev feature size\n        thickness (int): thickness of rasterized lines\n        coords_dim (int): dimension of point coordinates\n    \"\"\"\n\n    def __init__(self,\n                 img_shape: Union[Tuple, List], \n                 feat_down_sample: int,\n                 thickness: int, \n                 coords_dim: int,\n                 pv_mask=False,\n                 num_cams=6,\n                 num_coords=2\n                 ):\n\n        self.num_cams = num_cams\n        self.num_coords = num_coords\n        self.img_shape = img_shape\n        self.feat_down_sample = feat_down_sample\n\n        self.pv_scale_x = self.img_shape[0] // feat_down_sample\n        self.pv_scale_y = self.img_shape[1] // feat_down_sample\n\n        self.thickness = thickness\n        self.coords_dim = coords_dim\n        self.pv_mask = pv_mask\n        \n    def perspective(self,cam_coords, proj_mat):\n        pix_coords = proj_mat @ cam_coords\n        valid_idx = pix_coords[2, :] > 0\n        pix_coords = pix_coords[:, valid_idx]\n        pix_coords = pix_coords[:2, :] / (pix_coords[2, :] + 1e-7)\n        pix_coords = pix_coords.transpose(1, 0)\n        return pix_coords\n\n    @staticmethod\n    def get_valid_pix_coords(pix_coords):\n        valid_idx = pix_coords[:, 2] > 0\n        pix_coords = pix_coords[valid_idx, :]\n        pix_coords = pix_coords[:, :2] / (pix_coords[:, 2:3] + 1e-7)\n        return pix_coords\n\n    def line_ego_to_pvmask(self,\n                          line_ego, \n                          mask, \n                          lidar2feat,\n                          color=1, \n                          thickness=1):\n\n        distances = np.linspace(0, line_ego.length, 200)\n        coords = np.array([np.array(line_ego.interpolate(distance).coords) for distance in distances]).reshape(-1, self.num_coords)\n        if coords.shape[1] == 2:\n            coords = np.concatenate((coords,np.zeros((coords.shape[0],1))),axis=1)\n        \n        pts_num = coords.shape[0]\n        ones = np.ones((pts_num,1))\n        lidar_coords = np.concatenate([coords,ones], axis=1).transpose(1,0)\n        pix_coords = self.perspective(lidar_coords, lidar2feat) // self.feat_down_sample\n        cv2.polylines(mask, np.int32([pix_coords]), False, color=color, thickness=thickness)\n    \n    def lines_ego_to_pv(self, lines_ego, pv_mask, ego2imgs, color=1, thickness=1):\n        lines_coord = []\n        for line_ego in lines_ego:\n            distances = np.linspace(0, line_ego.length, 100)\n            coords = np.array([np.array(line_ego.interpolate(distance).coords) for distance in distances]).reshape(-1, self.num_coords)\n            if coords.shape[1] == 2:\n                coords = np.concatenate((coords,np.zeros((coords.shape[0],1))),axis=1)\n            pts_num = coords.shape[0]\n            ones = np.ones((pts_num,1))\n            lidar_coords = np.concatenate([coords,ones], axis=1)\n            lines_coord.append(lidar_coords)\n        lines_coord = torch.tensor(np.stack(lines_coord, axis=0))\n        for cam_idx in range(len(ego2imgs)):\n            ego2img_i = torch.tensor(ego2imgs[cam_idx])\n            pers_lines_coord = torch.einsum('lk,ijk->ijl', ego2img_i, lines_coord)\n            valid_lines_coord = [self.get_valid_pix_coords(pers_coord) for pers_coord in pers_lines_coord]\n            valid_lines_coord = [x // self.feat_down_sample for x in valid_lines_coord if len(x) > 0]\n            lines_to_draw = [x.numpy().astype(np.int32) for x in valid_lines_coord]\n            cv2.polylines(pv_mask[cam_idx], lines_to_draw, False, color=color, thickness=thickness)\n    \n    def get_pvmask_old(self,map_geoms: Dict,ego2img: List, img_filenames: List) -> NDArray:\n        # ''' Rasterize all map geometries to semantic mask.\n    \n        # Args:\n        #     map_geoms (dict): map geoms by class\n    \n        # Returns:\n        #     semantic_mask (array): semantic mask\n        # '''\n        num_classes = len(map_geoms)\n        if self.pv_mask:\n            gt_pv_mask = np.zeros((self.num_cams, num_classes, self.pv_scale_x, self.pv_scale_y), dtype=np.uint8)\n        else:\n            instance_masks = []\n\n        for label, geom_list in map_geoms.items():\n            if len(geom_list) == 0:\n                continue\n            if geom_list[0].geom_type == 'LineString':\n                for line in geom_list:\n                    for cam_index in range(self.num_cams):\n                        self.line_ego_to_pvmask(line,gt_pv_mask[cam_index][label],ego2img[cam_index],color=1,thickness=self.thickness)\n        if self.pv_mask:\n             gt_pv_mask = np.ascontiguousarray(gt_pv_mask)\n            ## Visualize to double-check the pv seg is correct\n             #self.visualize_all_pv_masks(gt_pv_mask, img_filenames)\n             #import pdb; pdb.set_trace()\n             return gt_pv_mask\n        else:\n            return instance_masks\n\n    def get_pvmask(self, map_geoms: Dict,ego2img: List, img_filenames: List) -> NDArray:\n        # ''' Rasterize all map geometries to semantic mask.\n        \n        # Args:\n        #     map_geoms (dict): map geoms by class\n        \n        # Returns:\n        #     semantic_mask (array): semantic mask\n        # '''\n        num_classes = len(map_geoms)\n        if self.pv_mask:\n            gt_pv_mask = np.zeros((num_classes, self.num_cams, self.pv_scale_x, self.pv_scale_y), dtype=np.uint8)\n        else:\n            instance_masks = []\n\n        for label, geom_list in map_geoms.items():\n            if len(geom_list) == 0:\n                continue\n            self.lines_ego_to_pv(geom_list, gt_pv_mask[label], ego2img, color=1, thickness=self.thickness)\n\n        gt_pv_mask = gt_pv_mask.transpose(1, 0, 2, 3)\n        if self.pv_mask:\n            gt_pv_mask = np.ascontiguousarray(gt_pv_mask)\n            ## Visualize to double-check the pv seg is correct\n            #self.visualize_all_pv_masks(gt_pv_mask, img_filenames)\n            #import pdb; pdb.set_trace()\n            return gt_pv_mask\n        else:\n            return instance_masks\n\n    def __call__(self, input_dict: Dict) -> Dict:\n        map_geoms = input_dict['map_geoms'] # {0: List[ped_crossing: LineString], 1: ...}\n        pv_mask = self.get_pvmask(map_geoms, input_dict['ego2img'], input_dict['img_filenames'])\n        input_dict['pv_mask'] =  pv_mask # (num_class, canvas_size[1], canvas_size[0])\n        return input_dict\n    \n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(roi_size={self.roi_size}, '\n        repr_str += f'canvas_size={self.canvas_size}), '\n        repr_str += f'thickness={self.thickness}), ' \n        repr_str += f'coords_dim={self.coords_dim})'\n\n        return repr_str\n    \n    def visualize_all_pv_masks(self, gt_pv_mask, img_filenames):\n        for cam_id in range(gt_pv_mask.shape[0]):\n            viz_img = self._visualize_pv_mask(gt_pv_mask[cam_id])\n            viz_img = viz_img.transpose(1, 2, 0)\n            out_path = './check_pv_seg/viz_{}.png'.format(cam_id)\n            out_raw_path = './check_pv_seg/viz_raw_{}.png'.format(cam_id)\n            filepath = img_filenames[cam_id]\n            pv_img = cv2.imread(filepath)\n            #pv_img = cv2.resize(pv_img, (800, 480))\n            #viz_mask = cv2.resize(viz_img, (800, 480))\n            pv_img = cv2.resize(pv_img, (608, 608))\n            viz_mask = cv2.resize(viz_img, (608, 608))\n            mask = (viz_mask == 255).all(-1)[..., None]\n            viz_img = pv_img * mask + viz_mask * (1-mask)\n            cv2.imwrite(out_path, viz_img)\n            cv2.imwrite(out_raw_path, pv_img)\n    \n    def _visualize_pv_mask(self, pv_mask):\n        COLOR_MAPS_BGR = {\n            # bgr colors\n            1: (0, 0, 255),\n            2: (0, 255, 0),\n            0: (255, 0, 0),\n        }\n        num_classes, h, w = pv_mask.shape\n        viz_img = np.ones((num_classes, h, w), dtype=np.uint8) * 255\n        for label in range(num_classes):\n            valid = (pv_mask[label] == 1)\n            viz_img[:, valid] = np.array(COLOR_MAPS_BGR[label]).reshape(3, 1)\n\n        return viz_img"
  },
  {
    "path": "plugin/datasets/pipelines/transform.py",
    "content": "import numpy as np\nimport mmcv\n\nfrom mmdet.datasets.builder import PIPELINES\nfrom numpy import random\n\n@PIPELINES.register_module(force=True)\nclass Normalize3D(object):\n    \"\"\"Normalize the image.\n    Added key is \"img_norm_cfg\".\n    Args:\n        mean (sequence): Mean values of 3 channels.\n        std (sequence): Std values of 3 channels.\n        to_rgb (bool): Whether to convert the image from BGR to RGB,\n            default is true.\n    \"\"\"\n\n    def __init__(self, mean, std, to_rgb=True):\n        self.mean = np.array(mean, dtype=np.float32)\n        self.std = np.array(std, dtype=np.float32)\n        self.to_rgb = to_rgb\n\n    def __call__(self, results):\n        \"\"\"Call function to normalize images.\n        Args:\n            results (dict): Result dict from loading pipeline.\n        Returns:\n            dict: Normalized results, 'img_norm_cfg' key is added into\n                result dict.\n        \"\"\"\n        for key in results.get('img_fields', ['img']):\n            results[key] = [mmcv.imnormalize(\n                img, self.mean, self.std, self.to_rgb) for img in results[key]]\n        results['img_norm_cfg'] = dict(\n            mean=self.mean, std=self.std, to_rgb=self.to_rgb)\n        return results\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})'\n        return repr_str\n\n\n@PIPELINES.register_module(force=True)\nclass PadMultiViewImages(object):\n    \"\"\"Pad multi-view images and change intrinsics\n    There are two padding modes: (1) pad to a fixed size and (2) pad to the\n    minimum size that is divisible by some number.\n    Added keys are \"pad_shape\", \"pad_fixed_size\", \"pad_size_divisor\",\n    If set `change_intrinsics=True`, key 'cam_intrinsics' and 'ego2img' will be changed.\n\n    Args:\n        size (tuple, optional): Fixed padding size, (h, w).\n        size_divisor (int, optional): The divisor of padded size.\n        pad_val (float, optional): Padding value, 0 by default.\n        change_intrinsics (bool): whether to update intrinsics.\n    \"\"\"\n\n    def __init__(self, size=None, size_divisor=None, pad_val=0, change_intrinsics=False):\n        self.size = size\n        self.size_divisor = size_divisor\n        self.pad_val = pad_val\n        # only one of size and size_divisor should be valid\n        assert size is not None or size_divisor is not None\n        assert size is None or size_divisor is None\n\n        self.change_intrinsics = change_intrinsics\n\n    def _pad_img(self, results):\n        \"\"\"Pad images according to ``self.size``.\"\"\"\n        original_shape = [img.shape for img in results['img']]\n\n        for key in results.get('img_fields', ['img']):\n            if self.size is not None:\n                padded_img = [mmcv.impad(\n                    img, shape=self.size, pad_val=self.pad_val) for img in results[key]]\n            elif self.size_divisor is not None:\n                padded_img = [mmcv.impad_to_multiple(\n                    img, self.size_divisor, pad_val=self.pad_val) for img in results[key]]\n            results[key] = padded_img\n\n        if self.change_intrinsics:\n            post_intrinsics, post_ego2imgs = [], []\n            for img, oshape, cam_intrinsic, ego2img in zip(results['img'], \\\n                    original_shape, results['cam_intrinsics'], results['ego2img']):\n                scaleW = img.shape[1] / oshape[1]\n                scaleH = img.shape[0] / oshape[0]\n\n                rot_resize_matrix = np.array([ \n                                        [scaleW, 0,      0,    0],\n                                        [0,      scaleH, 0,    0],\n                                        [0,      0,      1,    0],\n                                        [0,      0,      0,    1]])\n                post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic\n                post_ego2img = rot_resize_matrix @ ego2img\n                post_intrinsics.append(post_intrinsic)\n                post_ego2imgs.append(post_ego2img)\n        \n            results.update({\n                'cam_intrinsics': post_intrinsics,\n                'ego2img': post_ego2imgs,\n            })\n\n\n        results['img_shape'] = [img.shape for img in padded_img]\n        results['img_fixed_size'] = self.size\n        results['img_size_divisor'] = self.size_divisor\n\n    def __call__(self, results):\n        \"\"\"Call function to pad images, masks, semantic segmentation maps.\n        Args:\n            results (dict): Result dict from loading pipeline.\n        Returns:\n            dict: Updated result dict.\n        \"\"\"\n        self._pad_img(results)\n        return results\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(size={self.size}, '\n        repr_str += f'size_divisor={self.size_divisor}, '\n        repr_str += f'pad_val={self.pad_val})'\n        repr_str += f'change_intrinsics={self.change_intrinsics})'\n\n        return repr_str\n\n\n@PIPELINES.register_module(force=True)\nclass ResizeMultiViewImages(object):\n    \"\"\"Resize mulit-view images and change intrinsics\n    If set `change_intrinsics=True`, key 'cam_intrinsics' and 'ego2img' will be changed\n\n    Args:\n        size (tuple, optional): resize target size, (h, w).\n        change_intrinsics (bool): whether to update intrinsics.\n    \"\"\"\n    def __init__(self, size=None, scale=None, change_intrinsics=True):\n        self.size = size\n        self.scale = scale\n        assert size is None or scale is None\n        self.change_intrinsics = change_intrinsics\n\n    def __call__(self, results:dict):\n\n        new_imgs, post_intrinsics, post_ego2imgs = [], [], []\n\n        for img,  cam_intrinsic, ego2img in zip(results['img'], \\\n                results['cam_intrinsics'], results['ego2img']):\n            if self.scale is not None:\n                h, w = img.shape[:2]\n                target_h = int(h * self.scale)\n                target_w = int(w * self.scale)\n            else:\n                target_h = self.size[0]\n                target_w = self.size[1]\n            \n            tmp, scaleW, scaleH = mmcv.imresize(img,\n                                                # NOTE: mmcv.imresize expect (w, h) shape\n                                                (target_w, target_h),\n                                                return_scale=True)\n            new_imgs.append(tmp)\n\n            rot_resize_matrix = np.array([\n                [scaleW, 0,      0,    0],\n                [0,      scaleH, 0,    0],\n                [0,      0,      1,    0],\n                [0,      0,      0,    1]])\n            post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic\n            post_ego2img = rot_resize_matrix @ ego2img\n            post_intrinsics.append(post_intrinsic)\n            post_ego2imgs.append(post_ego2img)\n\n        results['img'] = new_imgs\n        results['img_shape'] = [img.shape for img in new_imgs]\n        if self.change_intrinsics:\n            results.update({\n                'cam_intrinsics': post_intrinsics,\n                'ego2img': post_ego2imgs,\n            })\n\n        return results\n    \n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(size={self.size}, '\n        repr_str += f'change_intrinsics={self.change_intrinsics})'\n\n        return repr_str\n    \n\n@PIPELINES.register_module()\nclass PhotoMetricDistortionMultiViewImage:\n    \"\"\"Apply photometric distortion to image sequentially, every transformation\n    is applied with a probability of 0.5. The position of random contrast is in\n    second or second to last.\n    1. random brightness\n    2. random contrast (mode 0)\n    3. convert color from BGR to HSV\n    4. random saturation\n    5. random hue\n    6. convert color from HSV to BGR\n    7. random contrast (mode 1)\n    8. randomly swap channels\n    Args:\n        brightness_delta (int): delta of brightness.\n        contrast_range (tuple): range of contrast.\n        saturation_range (tuple): range of saturation.\n        hue_delta (int): delta of hue.\n    \"\"\"\n\n    def __init__(self,\n                 brightness_delta=32,\n                 contrast_range=(0.5, 1.5),\n                 saturation_range=(0.5, 1.5),\n                 hue_delta=18):\n        self.brightness_delta = brightness_delta\n        self.contrast_lower, self.contrast_upper = contrast_range\n        self.saturation_lower, self.saturation_upper = saturation_range\n        self.hue_delta = hue_delta\n\n    def __call__(self, results):\n        \"\"\"Call function to perform photometric distortion on images.\n        Args:\n            results (dict): Result dict from loading pipeline.\n        Returns:\n            dict: Result dict with images distorted.\n        \"\"\"\n        imgs = results['img']\n        new_imgs = []\n        for img in imgs:\n            assert img.dtype == np.float32, \\\n                'PhotoMetricDistortion needs the input image of dtype np.float32,'\\\n                ' please set \"to_float32=True\" in \"LoadImageFromFile\" pipeline'\n            # random brightness\n            if random.randint(2):\n                delta = random.uniform(-self.brightness_delta,\n                                    self.brightness_delta)\n                img += delta\n\n            # mode == 0 --> do random contrast first\n            # mode == 1 --> do random contrast last\n            mode = random.randint(2)\n            if mode == 1:\n                if random.randint(2):\n                    alpha = random.uniform(self.contrast_lower,\n                                        self.contrast_upper)\n                    img *= alpha\n\n            # convert color from BGR to HSV\n            img = mmcv.bgr2hsv(img)\n\n            # random saturation\n            if random.randint(2):\n                img[..., 1] *= random.uniform(self.saturation_lower,\n                                            self.saturation_upper)\n\n            # random hue\n            if random.randint(2):\n                img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)\n                img[..., 0][img[..., 0] > 360] -= 360\n                img[..., 0][img[..., 0] < 0] += 360\n\n            # convert color from HSV to BGR\n            img = mmcv.hsv2bgr(img)\n\n            # random contrast\n            if mode == 0:\n                if random.randint(2):\n                    alpha = random.uniform(self.contrast_lower,\n                                        self.contrast_upper)\n                    img *= alpha\n\n            # randomly swap channels\n            # if random.randint(2):\n            #     img = img[..., random.permutation(3)]\n            new_imgs.append(img)\n        results['img'] = new_imgs\n        return results\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(\\nbrightness_delta={self.brightness_delta},\\n'\n        repr_str += 'contrast_range='\n        repr_str += f'{(self.contrast_lower, self.contrast_upper)},\\n'\n        repr_str += 'saturation_range='\n        repr_str += f'{(self.saturation_lower, self.saturation_upper)},\\n'\n        repr_str += f'hue_delta={self.hue_delta})'\n        return repr_str"
  },
  {
    "path": "plugin/datasets/pipelines/vectorize.py",
    "content": "import numpy as np\nfrom mmdet.datasets.builder import PIPELINES\nfrom shapely.geometry import LineString\nfrom numpy.typing import NDArray\nfrom typing import List, Tuple, Union, Dict\n\n@PIPELINES.register_module(force=True)\nclass VectorizeMap(object):\n    \"\"\"Generate vectoized map and put into `semantic_mask` key.\n    Concretely, shapely geometry objects are converted into sample points (ndarray).\n    We use args `sample_num`, `sample_dist`, `simplify` to specify sampling method.\n\n    Args:\n        roi_size (tuple or list): bev range .\n        normalize (bool): whether to normalize points to range (0, 1).\n        coords_dim (int): dimension of point coordinates.\n        simplify (bool): whether to use simpily function. If true, `sample_num` \\\n            and `sample_dist` will be ignored.\n        sample_num (int): number of points to interpolate from a polyline. Set to -1 to ignore.\n        sample_dist (float): interpolate distance. Set to -1 to ignore.\n    \"\"\"\n\n    def __init__(self, \n                 roi_size: Union[Tuple, List], \n                 normalize: bool,\n                 coords_dim: int,\n                 simplify: bool=False, \n                 sample_num: int=-1, \n                 sample_dist: float=-1, \n                 permute: bool=False\n        ):\n        self.coords_dim = coords_dim\n        self.sample_num = sample_num\n        self.sample_dist = sample_dist\n        self.roi_size = np.array(roi_size)\n        self.normalize = normalize\n        self.simplify = simplify\n        self.permute = permute\n\n        if sample_dist > 0:\n            assert sample_num < 0 and not simplify\n            self.sample_fn = self.interp_fixed_dist\n        elif sample_num > 0:\n            assert sample_dist < 0 and not simplify\n            self.sample_fn = self.interp_fixed_num\n        else:\n            assert simplify\n\n    def interp_fixed_num(self, line: LineString) -> NDArray:\n        ''' Interpolate a line to fixed number of points.\n        \n        Args:\n            line (LineString): line\n        \n        Returns:\n            points (array): interpolated points, shape (N, 2)\n        '''\n\n        distances = np.linspace(0, line.length, self.sample_num)\n        sampled_points = np.array([list(line.interpolate(distance).coords) \n            for distance in distances]).squeeze()\n\n        return sampled_points\n\n    def interp_fixed_dist(self, line: LineString) -> NDArray:\n        ''' Interpolate a line at fixed interval.\n        \n        Args:\n            line (LineString): line\n        \n        Returns:\n            points (array): interpolated points, shape (N, 2)\n        '''\n\n        distances = list(np.arange(self.sample_dist, line.length, self.sample_dist))\n        # make sure to sample at least two points when sample_dist > line.length\n        distances = [0,] + distances + [line.length,] \n        \n        sampled_points = np.array([list(line.interpolate(distance).coords)\n                                for distance in distances]).squeeze()\n        \n        return sampled_points\n    \n    def get_vectorized_lines(self, map_geoms: Dict) -> Dict:\n        ''' Vectorize map elements. Iterate over the input dict and apply the \n        specified sample funcion.\n        \n        Args:\n            line (LineString): line\n        \n        Returns:\n            vectors (array): dict of vectorized map elements.\n        '''\n\n        vectors = {}\n        for label, geom_list in map_geoms.items():\n            vectors[label] = []\n            for geom in geom_list:\n                if geom.geom_type == 'LineString':\n                    if self.simplify:\n                        line = geom.simplify(0.2, preserve_topology=True)\n                        line = np.array(line.coords)\n                    else:\n                        line = self.sample_fn(geom)\n                    line = line[:, :self.coords_dim]\n\n                    if self.normalize:\n                        line = self.normalize_line(line)\n                    if self.permute:\n                        line = self.permute_line(line)\n                    vectors[label].append(line)\n\n                elif geom.geom_type == 'Polygon':\n                    # polygon objects will not be vectorized\n                    continue\n                \n                else:\n                    raise ValueError('map geoms must be either LineString or Polygon!')\n        return vectors\n    \n    def normalize_line(self, line: NDArray) -> NDArray:\n        ''' Convert points to range (0, 1).\n        \n        Args:\n            line (LineString): line\n        \n        Returns:\n            normalized (array): normalized points.\n        '''\n\n        origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])\n\n        line[:, :2] = line[:, :2] - origin\n\n        # transform from range [0, 1] to (0, 1)\n        eps = 1e-5\n        line[:, :2] = line[:, :2] / (self.roi_size + eps)\n\n        return line\n    \n    def permute_line(self, line: np.ndarray, padding=1e5):\n        '''\n        (num_pts, 2) -> (num_permute, num_pts, 2)\n        where num_permute = 2 * (num_pts - 1)\n        '''\n        is_closed = np.allclose(line[0], line[-1], atol=1e-3)\n        num_points = len(line)\n        permute_num = num_points - 1\n        permute_lines_list = []\n        if is_closed:\n            pts_to_permute = line[:-1, :] # throw away replicate start end pts\n            for shift_i in range(permute_num):\n                permute_lines_list.append(np.roll(pts_to_permute, shift_i, axis=0))\n            flip_pts_to_permute = np.flip(pts_to_permute, axis=0)\n            for shift_i in range(permute_num):\n                permute_lines_list.append(np.roll(flip_pts_to_permute, shift_i, axis=0))\n        else:\n            permute_lines_list.append(line)\n            permute_lines_list.append(np.flip(line, axis=0))\n\n        permute_lines_array = np.stack(permute_lines_list, axis=0)\n\n        if is_closed:\n            tmp = np.zeros((permute_num * 2, num_points, self.coords_dim))\n            tmp[:, :-1, :] = permute_lines_array\n            tmp[:, -1, :] = permute_lines_array[:, 0, :] # add replicate start end pts\n            permute_lines_array = tmp\n\n        else:\n            # padding\n            padding = np.full([permute_num * 2 - 2, num_points, self.coords_dim], padding)\n            permute_lines_array = np.concatenate((permute_lines_array, padding), axis=0)\n        \n        return permute_lines_array\n    \n    def __call__(self, input_dict):\n        map_geoms = input_dict['map_geoms']\n\n        input_dict['vectors'] = self.get_vectorized_lines(map_geoms)\n        return input_dict\n\n    def __repr__(self):\n        repr_str = self.__class__.__name__\n        repr_str += f'(simplify={self.simplify}, '\n        repr_str += f'sample_num={self.sample_num}), '\n        repr_str += f'sample_dist={self.sample_dist}), ' \n        repr_str += f'roi_size={self.roi_size})'\n        repr_str += f'normalize={self.normalize})'\n        repr_str += f'coords_dim={self.coords_dim})'\n\n        return repr_str"
  },
  {
    "path": "plugin/datasets/samplers/__init__.py",
    "content": "from .group_sampler import DistributedGroupSampler, InfiniteGroupEachSampleInBatchSampler\nfrom .distributed_sampler import DistributedSampler\nfrom .sampler import SAMPLER, build_sampler\n\n"
  },
  {
    "path": "plugin/datasets/samplers/distributed_sampler.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n#  Modified by Shihao Wang\n# ---------------------------------------------\nimport math\nimport torch\nfrom torch.utils.data import DistributedSampler as _DistributedSampler\nfrom .sampler import SAMPLER\nimport numpy as np\n\n@SAMPLER.register_module()\nclass DistributedSampler(_DistributedSampler):\n\n    def __init__(self,\n                 dataset=None,\n                 num_replicas=None,\n                 rank=None,\n                 shuffle=True,\n                 seed=0):\n        super().__init__(\n            dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)\n        # for the compatibility from PyTorch 1.3+\n        self.seed = seed if seed is not None else 0\n        self.flag = self.dataset.flag\n        self.group_sizes = np.bincount(self.flag)\n        self.groups_num = len(self.group_sizes)\n        self.groups = list(set(self.flag))\n        assert self.groups == list(range(self.groups_num))\n\n        # Now, for efficiency, make a dict {group_idx: List[dataset sample_idxs]}\n        self.group_idx_to_sample_idxs = {\n            group_idx: np.where(self.flag == group_idx)[0].tolist()\n            for group_idx in range(self.groups_num)}  \n\n        num_groups_per_gpu = math.ceil(len(self.groups) / self.num_replicas)\n        # assign groups (continuous videos) to each gpu rank\n        # self.sample_group_idx = self.groups[self.rank*num_groups_per_gpu: min(len(self.groups), (self.rank+1)*num_groups_per_gpu)]\n        self.sample_group_idx = self.groups[self.rank::self.num_replicas]\n        \n        \n        self.sample_idxs = []\n        for i in self.sample_group_idx:\n            self.sample_idxs.extend(self.group_idx_to_sample_idxs[i])\n\n        #print('Rank', rank, 'Num samples', len(self.sample_idxs), 'Samples', self.sample_idxs)\n        self.num_samples = len(self.sample_idxs)\n        self.total_size = len(self.dataset)\n\n    def __iter__(self):\n        # only used for validation/testing \n        # only support batchsize = 1\n        if self.shuffle:\n            assert False\n        # else:\n        #     indices = torch.arange(len(self.dataset)).tolist()\n\n        # # add extra samples to make it evenly divisible\n        # # in case that indices is shorter than half of total_size\n        # indices = (indices *\n        #            math.ceil(self.total_size / len(indices)))[:self.total_size]\n        # assert len(indices) == self.total_size\n\n        # # subsample\n        # per_replicas = self.total_size//self.num_replicas\n        # # indices = indices[self.rank:self.total_size:self.num_replicas]\n        # indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas]\n        # assert len(indices) == self.num_samples\n\n        return iter(self.sample_idxs)\n"
  },
  {
    "path": "plugin/datasets/samplers/group_sampler.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n#  Modified by Shihao Wang\n# ---------------------------------------------\nimport math\nimport itertools\nimport copy\nimport torch.distributed as dist\nimport numpy as np\nimport torch\nfrom mmcv.runner import get_dist_info\nfrom torch.utils.data import Sampler\nfrom .sampler import SAMPLER\nimport random\n\nclass GroupSampler(Sampler):\n\n    def __init__(self, dataset, samples_per_gpu=1):\n        assert hasattr(dataset, 'flag')\n        self.dataset = dataset\n        self.samples_per_gpu = samples_per_gpu\n        self.flag = dataset.flag.astype(np.int64)\n        self.group_sizes = np.bincount(self.flag)\n        self.num_samples = 0\n        for i, size in enumerate(self.group_sizes):\n            self.num_samples += int(np.ceil(\n                size / self.samples_per_gpu)) * self.samples_per_gpu\n        \n        print('Warning!!! Only used for testing!')\n\n    def __iter__(self):\n        for i, size in enumerate(self.group_sizes):\n            if size == 0:\n                continue\n            indice = np.where(self.flag == i)[0]\n            assert len(indice) == size\n            yield from indice\n            \n    def __len__(self):\n        return self.num_samples\n\n@SAMPLER.register_module()\nclass DistributedGroupSampler(Sampler):\n    \"\"\"Sampler that restricts data loading to a subset of the dataset.\n    It is especially useful in conjunction with\n    :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each\n    process can pass a DistributedSampler instance as a DataLoader sampler,\n    and load a subset of the original dataset that is exclusive to it.\n    .. note::\n        Dataset is assumed to be of constant size.\n    Arguments:\n        dataset: Dataset used for sampling.\n        num_replicas (optional): Number of processes participating in\n            distributed training.\n        rank (optional): Rank of the current process within num_replicas.\n        seed (int, optional): random seed used to shuffle the sampler if\n            ``shuffle=True``. This number should be identical across all\n            processes in the distributed group. Default: 0.\n    \"\"\"\n\n    def __init__(self,\n                 dataset,\n                 samples_per_gpu=1,\n                 num_replicas=None,\n                 rank=None,\n                 seed=0):\n        _rank, _num_replicas = get_dist_info()\n        if num_replicas is None:\n            num_replicas = _num_replicas\n        if rank is None:\n            rank = _rank\n        self.dataset = dataset\n        self.samples_per_gpu = samples_per_gpu\n        self.num_replicas = num_replicas\n        self.rank = rank\n        self.epoch = 0\n        self.seed = seed if seed is not None else 0\n\n        assert hasattr(self.dataset, 'flag')\n        self.flag = self.dataset.flag\n        self.group_sizes = np.bincount(self.flag)\n\n        self.num_samples = 0\n        for i, j in enumerate(self.group_sizes):\n            self.num_samples += int(\n                math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /\n                          self.num_replicas)) * self.samples_per_gpu\n        self.total_size = self.num_samples * self.num_replicas\n\n    def __iter__(self):\n        # deterministically shuffle based on epoch\n        g = torch.Generator()\n        g.manual_seed(self.epoch + self.seed)\n\n        indices = []\n        for i, size in enumerate(self.group_sizes):\n            if size > 0:\n                indice = np.where(self.flag == i)[0]\n                assert len(indice) == size\n                # add .numpy() to avoid bug when selecting indice in parrots.\n                # TODO: check whether torch.randperm() can be replaced by\n                # numpy.random.permutation().\n                indice = indice[list(\n                    torch.randperm(int(size), generator=g).numpy())].tolist()\n                extra = int(\n                    math.ceil(\n                        size * 1.0 / self.samples_per_gpu / self.num_replicas)\n                ) * self.samples_per_gpu * self.num_replicas - len(indice)\n                # pad indice\n                tmp = indice.copy()\n                for _ in range(extra // size):\n                    indice.extend(tmp)\n                indice.extend(tmp[:extra % size])\n                indices.extend(indice)\n\n        assert len(indices) == self.total_size\n\n        indices = [\n            indices[j] for i in list(\n                torch.randperm(\n                    len(indices) // self.samples_per_gpu, generator=g))\n            for j in range(i * self.samples_per_gpu, (i + 1) *\n                           self.samples_per_gpu)\n        ]\n\n        # subsample\n        offset = self.num_samples * self.rank\n        indices = indices[offset:offset + self.num_samples]\n        assert len(indices) == self.num_samples\n        \n        return iter(indices)\n\n    def __len__(self):\n        return self.num_samples\n\n    def set_epoch(self, epoch):\n        self.epoch = epoch\n\n\ndef sync_random_seed(seed=None, device='cuda'):\n    \"\"\"Make sure different ranks share the same seed.\n    All workers must call this function, otherwise it will deadlock.\n    This method is generally used in `DistributedSampler`,\n    because the seed should be identical across all processes\n    in the distributed group.\n    In distributed sampling, different ranks should sample non-overlapped\n    data in the dataset. Therefore, this function is used to make sure that\n    each rank shuffles the data indices in the same order based\n    on the same seed. Then different ranks could use different indices\n    to select non-overlapped data from the same data list.\n    Args:\n        seed (int, Optional): The seed. Default to None.\n        device (str): The device where the seed will be put on.\n            Default to 'cuda'.\n    Returns:\n        int: Seed to be used.\n    \"\"\"\n    if seed is None:\n        seed = np.random.randint(2**31)\n    assert isinstance(seed, int)\n\n    rank, num_replicas = get_dist_info()\n\n    if num_replicas == 1:\n        return seed\n\n    if rank == 0:\n        random_num = torch.tensor(seed, dtype=torch.int32, device=device)\n    else:\n        random_num = torch.tensor(0, dtype=torch.int32, device=device)\n    dist.broadcast(random_num, src=0)\n    return random_num.item()\n\n@SAMPLER.register_module()\nclass InfiniteGroupEachSampleInBatchSampler(Sampler):\n    \"\"\"\n    Pardon this horrendous name. Basically, we want every sample to be from its own group.\n    If batch size is 4 and # of GPUs is 8, each sample of these 32 should be operating on\n    its own group.\n    Shuffling is only done for group order, not done within groups.\n    Arguments:\n        dataset: Dataset used for sampling.\n        min_len: Minimum sequence sampling length\n        max_len: Maximum sequence sampling length\n        num_iters_to_seq: After `num_iters_to_seq` iterations, \n            start sequential sampling. Default: 0\n        samples_per_gpu (optional): Per gpu batchsize. Default: 1\n        num_replicas (optional): Number of processes participating in\n            distributed training.\n        rank (optional): Rank of the current process within num_replicas.\n        seed (int, optional): random seed used to shuffle the sampler if\n            ``shuffle=True``. This number should be identical across all\n            processes in the distributed group. Default: 0.\n    \"\"\"\n\n    def __init__(self, \n                 dataset,\n                 seq_split_num=-1,\n                 num_iters_to_seq=0,\n                 random_drop=0,\n                 samples_per_gpu=1,\n                 num_replicas=None,\n                 rank=None,\n                 seed=0):\n\n        _rank, _num_replicas = get_dist_info()\n        if num_replicas is None:\n            num_replicas = _num_replicas\n        if rank is None:\n            rank = _rank\n\n        self.dataset = dataset\n        self.batch_size = samples_per_gpu\n        self.num_replicas = num_replicas\n        self.rank = rank\n        self.seq_split_num = seq_split_num\n        self.sub_seq_generator = torch.Generator()\n        self.sub_seq_generator.manual_seed(self.rank + seed)\n        self.seed = sync_random_seed(seed)\n        self.random_drop = random_drop\n\n        self.size = len(self.dataset)\n        self._iters = 0\n        self.num_iters_to_seq = num_iters_to_seq\n\n        assert hasattr(self.dataset, 'flag')\n        self.flag = self.dataset.flag\n        self.group_sizes = np.bincount(self.flag)\n        self.groups_num = len(self.group_sizes)\n        self.global_batch_size = samples_per_gpu * num_replicas\n        assert self.groups_num >= self.global_batch_size\n\n        # Now, for efficiency, make a dict {group_idx: List[dataset sample_idxs]}\n        self.group_idx_to_sample_idxs = {\n            group_idx: np.where(self.flag == group_idx)[0].tolist()\n            for group_idx in range(self.groups_num)} \n\n        self.group_idx_to_sample_idxs_generator = {\n            group_idx: self._sample_sub_sequence(group_idx)\n            for group_idx in range(self.groups_num)\n        }\n\n        # Get a generator per sample idx. Considering samples over all\n        # GPUs, each sample position has its own generator \n        self.group_indices_per_global_sample_idx = [\n            self._group_indices_per_global_sample_idx(self.rank * self.batch_size + local_sample_idx) \n            for local_sample_idx in range(self.batch_size)]\n        \n        # Keep track of a buffer of dataset sample idxs for each local sample idx\n        self.buffer_per_local_sample = [[] for _ in range(self.batch_size)]\n\n\n    def _infinite_group_indices(self):\n        g = torch.Generator()\n        g.manual_seed(self.seed)\n        while True:\n            yield from torch.randperm(self.groups_num, generator=g).tolist()\n\n    def _group_indices_per_global_sample_idx(self, global_sample_idx):\n        yield from itertools.islice(self._infinite_group_indices(), \n                                    global_sample_idx, \n                                    None,\n                                    self.global_batch_size)\n\n    def _sample_sub_sequence(self, group_idx):\n        '''randomly split sub-sequences in a whole sequence'''\n\n        sample_ids = self.group_idx_to_sample_idxs[group_idx]\n        while True:\n            if self._iters < self.num_iters_to_seq or self.seq_split_num == -1:\n                shuffled = torch.randperm(len(sample_ids), generator=self.sub_seq_generator).tolist()\n                yield from [[sample_ids[i]] for i in shuffled]\n            \n            else:\n                # split the sequence into parts\n                idx = torch.randperm(len(sample_ids), generator=self.sub_seq_generator).tolist()\n                idx.remove(0)\n                idx = sorted(idx[:self.seq_split_num - 1]) # choose n-1 split position\n                split_idx = [0] + idx + [len(sample_ids)]\n                sub_seq_idx = [sample_ids[split_idx[i]: split_idx[i + 1]] \n                            for i in range(len(split_idx) - 1)] # [[1,2,3], [4,5], ...]\n                shuffled = torch.randperm(len(sub_seq_idx), generator=self.sub_seq_generator).tolist()\n                for i in shuffled:\n                    sub_seq = sub_seq_idx[i]\n                    length = len(sub_seq)\n                    drop_num = math.floor(length * self.random_drop)\n                    drop_idxs = torch.randperm(length, generator=self.sub_seq_generator).tolist()[:drop_num]\n                    new_sub_seq = [sub_seq[j] for j in range(length) if j not in drop_idxs]\n                    yield new_sub_seq\n                # yield from [sub_seq_idx[i] for i in shuffled]\n        \n\n    def __iter__(self):\n        last_group_idx_batch = [-1 for i in range(self.batch_size)]\n        while True:\n            curr_batch = []\n            for local_sample_idx in range(self.batch_size):\n                if len(self.buffer_per_local_sample[local_sample_idx]) == 0:\n                    # Finished current group, refill with next group\n                    new_group_idx = next(self.group_indices_per_global_sample_idx[local_sample_idx])\n\n                    # 保证不会连续两段相同的序列\n                    # 如果不加的话，在epoch轮换时会有概率连续两段相同序列\n                    if new_group_idx == last_group_idx_batch[local_sample_idx]:\n                        new_group_idx = next(self.group_indices_per_global_sample_idx[local_sample_idx])\n                    last_group_idx_batch[local_sample_idx] = new_group_idx\n\n                    self.buffer_per_local_sample[local_sample_idx] = \\\n                        copy.deepcopy(next(self.group_idx_to_sample_idxs_generator[new_group_idx]))\n\n                curr_batch.append(self.buffer_per_local_sample[local_sample_idx].pop(0))\n            \n            self._iters += 1\n            yield curr_batch\n\n    def __len__(self):\n        \"\"\"Length of base dataset.\"\"\"\n        return self.size\n        \n    def set_epoch(self, epoch):\n        self.epoch = epoch"
  },
  {
    "path": "plugin/datasets/samplers/sampler.py",
    "content": "# ---------------------------------------------\r\n# Copyright (c) OpenMMLab. All rights reserved.\r\n# ---------------------------------------------\r\n#  Modified by Zhiqi Li\r\n# ---------------------------------------------\r\n#  Modified by Shihao Wang\r\n# ---------------------------------------------\r\nfrom mmcv.utils.registry import Registry, build_from_cfg\r\n\r\nSAMPLER = Registry('sampler')\r\n\r\n\r\ndef build_sampler(cfg, default_args):\r\n    return build_from_cfg(cfg, SAMPLER, default_args)\r\n"
  },
  {
    "path": "plugin/datasets/visualize/renderer.py",
    "content": "import os.path as osp\nimport os\nimport av2.geometry.interpolate as interp_utils\nimport numpy as np\nimport copy\nimport cv2\nimport matplotlib\nimport matplotlib.pyplot as plt\nfrom PIL import Image\n\nmatplotlib.use('agg') # prevent memory leak for drawing figures in a loop\n\ndef remove_nan_values(uv):\n    is_u_valid = np.logical_not(np.isnan(uv[:, 0]))\n    is_v_valid = np.logical_not(np.isnan(uv[:, 1]))\n    is_uv_valid = np.logical_and(is_u_valid, is_v_valid)\n\n    uv_valid = uv[is_uv_valid]\n    return uv_valid\n\ndef points_ego2img(pts_ego, extrinsics, intrinsics):\n    pts_ego_4d = np.concatenate([pts_ego, np.ones([len(pts_ego), 1])], axis=-1)\n    pts_cam_4d = extrinsics @ pts_ego_4d.T\n    \n    uv = (intrinsics @ pts_cam_4d[:3, :]).T\n    uv = remove_nan_values(uv)\n    depth = uv[:, 2]\n    uv = uv[:, :2] / uv[:, 2].reshape(-1, 1)\n\n    return uv, depth\n\ndef draw_polyline_ego_on_img(polyline_ego, img_bgr, extrinsics, intrinsics, color_bgr, thickness):\n    if polyline_ego.shape[1] == 2:\n        zeros = np.zeros((polyline_ego.shape[0], 1))\n        polyline_ego = np.concatenate([polyline_ego, zeros], axis=1)\n\n    polyline_ego = interp_utils.interp_arc(t=500, points=polyline_ego)\n    \n    uv, depth = points_ego2img(polyline_ego, extrinsics, intrinsics)\n\n    h, w, c = img_bgr.shape\n\n    is_valid_x = np.logical_and(0 <= uv[:, 0], uv[:, 0] < w - 1)\n    is_valid_y = np.logical_and(0 <= uv[:, 1], uv[:, 1] < h - 1)\n    is_valid_z = depth > 0\n    is_valid_points = np.logical_and.reduce([is_valid_x, is_valid_y, is_valid_z])\n\n    if is_valid_points.sum() == 0:\n        return\n    \n    uv = np.round(uv[is_valid_points]).astype(np.int32)\n\n    draw_visible_polyline_cv2(\n        copy.deepcopy(uv),\n        valid_pts_bool=np.ones((len(uv), 1), dtype=bool),\n        image=img_bgr,\n        color=color_bgr,\n        thickness_px=thickness,\n    )\n\ndef draw_visible_polyline_cv2(line, valid_pts_bool, image, color, thickness_px):\n    \"\"\"Draw a polyline onto an image using given line segments.\n\n    Args:\n        line: Array of shape (K, 2) representing the coordinates of line.\n        valid_pts_bool: Array of shape (K,) representing which polyline coordinates are valid for rendering.\n            For example, if the coordinate is occluded, a user might specify that it is invalid.\n            Line segments touching an invalid vertex will not be rendered.\n        image: Array of shape (H, W, 3), representing a 3-channel BGR image\n        color: Tuple of shape (3,) with a BGR format color\n        thickness_px: thickness (in pixels) to use when rendering the polyline.\n    \"\"\"\n    line = np.round(line).astype(int)  # type: ignore\n    for i in range(len(line) - 1):\n\n        if (not valid_pts_bool[i]) or (not valid_pts_bool[i + 1]):\n            continue\n\n        x1 = line[i][0]\n        y1 = line[i][1]\n        x2 = line[i + 1][0]\n        y2 = line[i + 1][1]\n\n        # Use anti-aliasing (AA) for curves\n        image = cv2.line(image, pt1=(x1, y1), pt2=(x2, y2), color=color, thickness=thickness_px, lineType=cv2.LINE_AA)\n\n\nCOLOR_MAPS_BGR = {\n    # bgr colors\n    'divider': (0, 0, 255),\n    'boundary': (0, 255, 0),\n    'ped_crossing': (255, 0, 0),\n    'centerline': (51, 183, 255),\n    'drivable_area': (171, 255, 255)\n}\n\nCOLOR_MAPS_PLT = {\n    'divider': 'r',\n    'boundary': 'g',\n    'ped_crossing': 'b',\n    'centerline': 'orange',\n    'drivable_area': 'y',\n}\n\nCAM_NAMES_AV2 = ['ring_front_center', 'ring_front_right', 'ring_front_left',\n    'ring_rear_right','ring_rear_left', 'ring_side_right', 'ring_side_left',\n    ]\nCAM_NAMES_NUSC = ['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT',\n    'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT',]\n\nclass Renderer(object):\n    \"\"\"Render map elements on image views.\n\n    Args:\n        cat2id (dict): category to class id\n        roi_size (tuple): bev range\n        dataset (str): 'av2' or 'nusc'\n    \"\"\"\n\n    def __init__(self, cat2id, roi_size, dataset='av2'):\n        self.roi_size = roi_size\n        self.cat2id = cat2id\n        self.id2cat = {v: k for k, v in cat2id.items()}\n        if dataset == 'av2':\n            self.cam_names = CAM_NAMES_AV2\n        else:\n            self.cam_names = CAM_NAMES_NUSC\n\n    def render_bev_from_vectors(self, vectors, out_dir, draw_scores=False, specified_path=None,\n            id_info=None):\n        '''Render bev segmentation using vectorized map elements.\n        \n        Args:\n            vectors (dict): dict of vectorized map elements.\n            out_dir (str): output directory\n        '''\n\n        car_img = Image.open('resources/car.png')\n        #car_img = Image.open('resources/car_lidar_coord.png')\n        if specified_path:\n            map_path = specified_path\n        else:\n            map_path = os.path.join(out_dir, 'map.jpg')\n\n        fig = plt.figure(figsize=(self.roi_size[0], self.roi_size[1]))\n        ax = fig.add_subplot(1, 1, 1)\n        ax.set_xlim(-self.roi_size[0] / 2, self.roi_size[0] / 2)\n        ax.set_ylim(-self.roi_size[1] / 2, self.roi_size[1] / 2)\n        ax.axis('off')\n        #ax.imshow(car_img, extent=[-2.0, 2.0, -2.5, 2.5])\n        ax.imshow(car_img, extent=[-2.5, 2.5, -2.0, 2.0])\n\n        for label, vector_list in vectors.items():\n            cat = self.id2cat[label]\n            color = COLOR_MAPS_PLT[cat]\n            for vec_i, vector in enumerate(vector_list):\n                if draw_scores:\n                    vector, score, prop = vector\n                if isinstance(vector, list):\n                    vector = np.array(vector)\n                    from shapely.geometry import LineString\n                    vector = np.array(LineString(vector).simplify(0.2).coords)\n                pts = vector[:, :2]\n                x = np.array([pt[0] for pt in pts])\n                y = np.array([pt[1] for pt in pts])\n                # plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], angles='xy', color=color,\n                #     scale_units='xy', scale=1)\n                # for i in range(len(x)):\n                ax.plot(x, y, 'o-', color=color, linewidth=20, markersize=50)\n                if draw_scores:\n                    #print('Prop:', prop, 'Label:', label)\n                    if prop:\n                        p = 'p'\n                    else:\n                        p = ''\n                    score = round(score, 2)\n                    mid_idx = len(x) // 2\n                    ax.text(x[mid_idx], y[mid_idx], str(score)+p, fontsize=100, color=color)\n                if id_info:\n                    vec_id = id_info[label][vec_i]\n                    mid_idx = len(x) // 2\n                    ax.text(x[mid_idx], y[mid_idx], f'{cat[:1].upper()}{vec_id}', fontsize=100, color=color)\n                    \n        #plt.savefig(map_path, bbox_inches='tight', dpi=40)\n        fig.savefig(map_path, bbox_inches='tight', dpi=20)\n        plt.clf()  # or cla() to simulate use case of plotting fresh figures\n        \n    def render_camera_views_from_vectors(self, vectors, imgs, extrinsics, \n            intrinsics, ego2cams, thickness, out_dir):\n        '''Project vectorized map elements to camera views.\n        \n        Args:\n            vectors (dict): dict of vectorized map elements.\n            imgs (tensor): images in bgr color.\n            extrinsics (array): ego2img extrinsics, shape (4, 4)\n            intrinsics (array): intrinsics, shape (3, 3) \n            thickness (int): thickness of lines to draw on images.\n            out_dir (str): output directory\n        '''\n\n        for i in range(len(imgs)):\n            img = imgs[i]\n            extrinsic = extrinsics[i]\n            intrinsic = intrinsics[i]\n            ego2cam = ego2cams[i]\n            img_bgr = copy.deepcopy(img)\n\n            for label, vector_list in vectors.items():\n                cat = self.id2cat[label]\n                color = COLOR_MAPS_BGR[cat]\n                for vector in vector_list:\n                    img_bgr = np.ascontiguousarray(img_bgr)\n                    if isinstance(vector, list):\n                        vector = np.array(vector)\n                    draw_polyline_ego_on_img(vector, img_bgr, ego2cam, intrinsic, color, thickness)\n                    \n            out_path = osp.join(out_dir, self.cam_names[i]) + '.jpg'\n            cv2.imwrite(out_path, img_bgr)\n\n    def render_bev_from_mask(self, semantic_mask, out_dir, flip=False):\n        '''Render bev segmentation from semantic_mask.\n        \n        Args:\n            semantic_mask (array): semantic mask.\n            out_dir (str): output directory\n        '''\n\n        \n        if len(semantic_mask.shape) == 3:\n            c, h, w = semantic_mask.shape\n        else:\n            h, w = semantic_mask.shape\n        \n        bev_img = np.ones((3, h, w), dtype=np.uint8) * 255\n        if 'drivable_area' in self.cat2id:\n            drivable_area_mask = semantic_mask[self.cat2id['drivable_area']]\n            bev_img[:, drivable_area_mask == 1] = \\\n                    np.array(COLOR_MAPS_BGR['drivable_area']).reshape(3, 1)\n        \n        for label in self.id2cat:\n            cat = self.id2cat[label]\n            if cat == 'drivable_area':\n                continue\n            if len(semantic_mask.shape) == 3:\n                valid = (semantic_mask[label] == 1)\n            else:\n                valid = semantic_mask == (label + 1)\n            bev_img[:, valid] = np.array(COLOR_MAPS_BGR[cat]).reshape(3, 1)\n\n        #for label in range(c):\n        #    cat = self.id2cat[label]\n        #    if cat == 'drivable_area':\n        #        continue\n        #    mask = semantic_mask[label]\n        #    valid = mask == 1\n        #    bev_img[:, valid] = np.array(COLOR_MAPS_BGR[cat]).reshape(3, 1)\n\n        out_path = osp.join(out_dir, 'semantic_map.jpg')\n        if flip:\n            bev_img_flipud = np.array([np.flipud(i) for i in bev_img], dtype=np.uint8)\n            cv2.imwrite(out_path, bev_img_flipud.transpose((1, 2, 0)))\n        else:\n            cv2.imwrite(out_path, bev_img.transpose((1, 2, 0)))\n            \n        "
  },
  {
    "path": "plugin/models/__init__.py",
    "content": "from .backbones import *\nfrom .heads import *\nfrom .necks import *\nfrom .losses import *\nfrom .mapers import *\nfrom .transformer_utils import *\nfrom .assigner import *\nfrom .utils import *"
  },
  {
    "path": "plugin/models/assigner/__init__.py",
    "content": "from .assigner import HungarianLinesAssigner\nfrom .match_cost import MapQueriesCost, BBoxLogitsCost, DynamicLinesCost, IoUCostC, BBoxCostC, LinesL1Cost, LinesFixNumChamferCost, ClsSigmoidCost\n"
  },
  {
    "path": "plugin/models/assigner/assigner.py",
    "content": "import torch\n\nfrom mmdet.core.bbox.builder import BBOX_ASSIGNERS\nfrom mmdet.core.bbox.assigners import AssignResult\nfrom mmdet.core.bbox.assigners import BaseAssigner\nfrom mmdet.core.bbox.match_costs import build_match_cost\nfrom scipy.optimize import linear_sum_assignment\n\nimport numpy as np\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianLinesAssigner(BaseAssigner):\n    \"\"\"\n        Computes one-to-one matching between predictions and ground truth.\n        This class computes an assignment between the targets and the predictions\n        based on the costs. The costs are weighted sum of three components:\n        classification cost and regression L1 cost. The\n        targets don't include the no_object, so generally there are more\n        predictions than targets. After the one-to-one matching, the un-matched\n        are treated as backgrounds. Thus each query prediction will be assigned\n        with `0` or a positive integer indicating the ground truth index:\n        - 0: negative sample, no assigned gt\n        - positive integer: positive sample, index (1-based) of assigned gt\n        Args:\n            cls_weight (int | float, optional): The scale factor for classification\n                cost. Default 1.0.\n            bbox_weight (int | float, optional): The scale factor for regression\n                L1 cost. Default 1.0.\n    \"\"\"\n\n    def __init__(self,\n                 cost=dict(\n                     type='MapQueriesCost',\n                     cls_cost=dict(type='ClassificationCost', weight=1.),\n                     reg_cost=dict(type='LinesCost', weight=1.0),\n                    ),\n                 **kwargs):\n\n        self.cost = build_match_cost(cost)\n\n    def assign(self,\n               preds: dict,\n               gts: dict,\n               track_info=None,\n               gt_bboxes_ignore=None,\n               eps=1e-7):\n        \"\"\"\n            Computes one-to-one matching based on the weighted costs.\n            This method assign each query prediction to a ground truth or\n            background. The `assigned_gt_inds` with -1 means don't care,\n            0 means negative sample, and positive number is the index (1-based)\n            of assigned gt.\n            The assignment is done in the following steps, the order matters.\n            1. assign every prediction to -1\n            2. compute the weighted costs\n            3. do Hungarian matching on CPU based on the costs\n            4. assign all to 0 (background) first, then for each matched pair\n            between predictions and gts, treat this prediction as foreground\n            and assign the corresponding gt index (plus 1) to it.\n            Args:\n                lines_pred (Tensor): predicted normalized lines:\n                    [num_query, num_points, 2]\n                cls_pred (Tensor): Predicted classification logits, shape\n                    [num_query, num_class].\n\n                lines_gt (Tensor): Ground truth lines\n                    [num_gt, num_points, 2].\n                labels_gt (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n                gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                    labelled as `ignored`. Default None.\n                eps (int | float, optional): A value added to the denominator for\n                    numerical stability. Default 1e-7.\n            Returns:\n                :obj:`AssignResult`: The assigned result.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            'Only case when gt_bboxes_ignore is None is supported.'\n        \n        num_gts, num_lines = gts['lines'].size(0), preds['lines'].size(0)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = \\\n            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)\n        assigned_labels = \\\n            preds['lines'].new_full((num_lines,), -1, dtype=torch.long)\n\n        if num_gts == 0 or num_lines == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gts, assigned_gt_inds, None, labels=assigned_labels), None\n\n        # 2. compute the weighted costs\n        gt_permute_idx = None # (num_preds, num_gts)\n        if self.cost.reg_cost.permute:\n            cost, gt_permute_idx, reg_cost = self.cost(preds, gts)\n        else:\n            cost, reg_cost = self.cost(preds, gts)\n        \n        # Manipulate the cost matrix here using the two-frame matching info\n        # for non-first-frame supervision\n        if track_info is not None:\n            prop_i = 0\n            # iterate through queries\n            for j in range(cost.shape[0]):\n                if j >= len(track_info['track_queries_fal_pos_mask']):\n                    # padding queries, loss will be filtered later\n                    cost[j] = np.inf\n                    continue\n                    \n                if track_info['track_queries_fal_pos_mask'][j]:\n                    # false positive and palceholder track queries should not\n                    # be matched to any target\n                    cost[j] = np.inf\n                \n                # Tweak the cost matrix here to force the G.T. assignment of the track queries\n                elif track_info['track_queries_mask'][j]:\n                    track_query_id = track_info['track_query_match_ids'][prop_i].long().item()\n                    prop_i += 1\n\n                    cost[j] = np.inf\n                    cost[:, track_query_id] = np.inf\n                    cost[j, track_query_id] = -1\n\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu().numpy()\n        if linear_sum_assignment is None:\n            raise ImportError('Please run \"pip install scipy\" '\n                              'to install scipy first.')\n        try:\n            matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        except:\n            print('cost max{}, min{}'.format(cost.max(), cost.min()))\n            import pdb; pdb.set_trace()\n\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(\n            preds['lines'].device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(\n            preds['lines'].device)\n        \n        # Pass out the un-weighted reg cost for temporal propagation\n        mathced_reg_cost = reg_cost[matched_row_inds, matched_col_inds]        \n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gts['labels'][matched_col_inds]\n        return AssignResult(\n            num_gts, assigned_gt_inds, None, labels=assigned_labels), gt_permute_idx, mathced_reg_cost"
  },
  {
    "path": "plugin/models/assigner/match_cost.py",
    "content": "import torch\nfrom mmdet.core.bbox.match_costs.builder import MATCH_COST\nfrom mmdet.core.bbox.match_costs import build_match_cost\nfrom torch.nn.functional import smooth_l1_loss\n\nfrom mmdet.core.bbox.iou_calculators import bbox_overlaps\nfrom mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy\ndef chamfer_distance(line1, line2) -> float:\n    ''' Calculate chamfer distance between two lines. Make sure the \n    lines are interpolated.\n\n    Args:\n        line1 (tensor): shape (num_pts, 2)\n        line2 (tensor): shape (num_pts, 2)\n    \n    Returns:\n        distance (float): chamfer distance\n    '''\n    \n    dist_matrix = torch.cdist(line1, line2, p=2)\n    dist12 = dist_matrix.min(-1)[0].sum() / len(line1)\n    dist21 = dist_matrix.min(-2)[0].sum() / len(line2)\n\n    return (dist12 + dist21) / 2\n\n\n@MATCH_COST.register_module()\nclass ClsSigmoidCost:\n    \"\"\"ClsSoftmaxCost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.):\n        self.weight = weight\n\n    def __call__(self, cls_pred, gt_labels):\n        \"\"\"\n        Args:\n            cls_pred (Tensor): Predicted classification logits, shape\n                [num_query, num_class].\n            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n        Returns:\n            torch.Tensor: cls_cost value with weight\n        \"\"\"\n        # Following the official DETR repo, contrary to the loss that\n        # NLL is used, we approximate it in 1 - cls_score[gt_label].\n        # The 1 is a constant that doesn't change the matching,\n        # so it can be omitted.\n        cls_score = cls_pred.sigmoid()\n        cls_cost = -cls_score[:, gt_labels]\n        return cls_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass LinesFixNumChamferCost(object):\n    \"\"\"BBox3DL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.0, permute=False):\n        self.weight = weight\n        self.permute = permute\n\n    def __call__(self, lines_pred, gt_lines):\n        \"\"\"\n        Args:\n            lines_pred (Tensor): predicted normalized lines:\n                [num_query, 2*num_points]\n            gt_lines (Tensor): Ground truth lines\n                [num_gt, 2*num_points] or [num_gt, num_permute, 2*num_points]\n        Returns:\n            torch.Tensor: reg_cost value with weight\n                shape [num_pred, num_gt]\n        \"\"\"\n\n        if self.permute:\n            assert len(gt_lines.shape) == 3\n        else:\n            assert len(gt_lines.shape) == 2\n        \n        num_gt, num_pred = len(gt_lines), len(lines_pred)\n        if self.permute:\n            gt_lines = gt_lines.flatten(0, 1) # (num_gt*num_permute, 2*num_pts)\n\n        num_pts = lines_pred.shape[-1] // 2\n        lines_pred = lines_pred.view(-1, 2) # [num_query*num_points, 2]\n        gt_lines = gt_lines.view(-1, 2) # [num_gt*num_points, 2]\n        \n        dist_mat = torch.cdist(lines_pred, gt_lines, p=2) # (num_query*num_points, num_gt*num_points)\n        dist_mat = torch.stack(torch.split(dist_mat, num_pts, dim=-1)) # (num_gt, num_query*num_points, num_pts)\n        dist_mat = torch.stack(torch.split(dist_mat, num_pts, dim=1)) # (num_q, num_gt, num_pts, num_pts)\n\n        dist1 = dist_mat.min(-1)[0].sum(-1)\n        dist2 = dist_mat.min(-2)[0].sum(-1)\n\n        dist_mat = (dist1 + dist2) / (2 * num_pts) # (num_pred, num_gt)\n\n        if self.permute:\n            # dist_mat: (num_pred, num_gt*num_permute)\n            dist_mat = dist_mat.view(num_pred, num_gt, -1) # (num_pred, num_gt, num_permute)\n            dist_mat, gt_permute_index = dist_mat.min(-1)\n            return dist_mat * self.weight, gt_permute_index\n\n        return dist_mat * self.weight\n\n\n@MATCH_COST.register_module()\nclass LinesL1Cost(object):\n    \"\"\"LinesL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.0, beta=0.0, permute=False):\n        self.weight = weight\n        self.permute = permute\n        self.beta = beta\n\n    def __call__(self, lines_pred, gt_lines, **kwargs):\n        \"\"\"\n        Args:\n            lines_pred (Tensor): predicted normalized lines:\n                [num_query, 2*num_points]\n            gt_lines (Tensor): Ground truth lines\n                [num_gt, 2*num_points] or [num_gt, num_permute, 2*num_points]\n        Returns:\n            torch.Tensor: reg_cost value with weight\n                shape [num_pred, num_gt]\n        \"\"\"\n        \n        if self.permute:\n            assert len(gt_lines.shape) == 3\n        else:\n            assert len(gt_lines.shape) == 2\n\n        num_pred, num_gt = len(lines_pred), len(gt_lines)\n        if self.permute:\n            # permute-invarint labels\n            gt_lines = gt_lines.flatten(0, 1) # (num_gt*num_permute, 2*num_pts)\n\n        num_pts = lines_pred.shape[-1]//2\n\n        if self.beta > 0:\n            lines_pred = lines_pred.unsqueeze(1).repeat(1, len(gt_lines), 1)\n            gt_lines = gt_lines.unsqueeze(0).repeat(num_pred, 1, 1)\n            dist_mat = smooth_l1_loss(lines_pred, gt_lines, reduction='none', beta=self.beta).sum(-1)\n        \n        else:\n            dist_mat = torch.cdist(lines_pred, gt_lines, p=1)\n\n        dist_mat = dist_mat / num_pts\n\n        if self.permute:\n            # dist_mat: (num_pred, num_gt*num_permute)\n            dist_mat = dist_mat.view(num_pred, num_gt, -1) # (num_pred, num_gt, num_permute)\n            dist_mat, gt_permute_index = torch.min(dist_mat, 2)\n            return dist_mat * self.weight, gt_permute_index\n        \n        return dist_mat * self.weight\n\n\n@MATCH_COST.register_module()\nclass BBoxCostC:\n    \"\"\"BBoxL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n         box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN\n     Examples:\n         >>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost\n         >>> import torch\n         >>> self = BBoxL1Cost()\n         >>> bbox_pred = torch.rand(1, 4)\n         >>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])\n         >>> factor = torch.tensor([10, 8, 10, 8])\n         >>> self(bbox_pred, gt_bboxes, factor)\n         tensor([[1.6172, 1.6422]])\n    \"\"\"\n\n    def __init__(self, weight=1., box_format='xyxy'):\n        self.weight = weight\n        assert box_format in ['xyxy', 'xywh']\n        self.box_format = box_format\n\n    def __call__(self, bbox_pred, gt_bboxes):\n        \"\"\"\n        Args:\n            bbox_pred (Tensor): Predicted boxes with normalized coordinates\n                (cx, cy, w, h), which are all in range [0, 1]. Shape\n                [num_query, 4].\n            gt_bboxes (Tensor): Ground truth boxes with normalized\n                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].\n        Returns:\n            torch.Tensor: bbox_cost value with weight\n        \"\"\"\n        # if self.box_format == 'xywh':\n        #     gt_bboxes = bbox_xyxy_to_cxcywh(gt_bboxes)\n        # elif self.box_format == 'xyxy':\n        #     bbox_pred = bbox_cxcywh_to_xyxy(bbox_pred)\n        bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)\n        return bbox_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass IoUCostC:\n    \"\"\"IoUCost.\n     Args:\n         iou_mode (str, optional): iou mode such as 'iou' | 'giou'\n         weight (int | float, optional): loss weight\n     Examples:\n         >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost\n         >>> import torch\n         >>> self = IoUCost()\n         >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])\n         >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])\n         >>> self(bboxes, gt_bboxes)\n         tensor([[-0.1250,  0.1667],\n                [ 0.1667, -0.5000]])\n    \"\"\"\n\n    def __init__(self, iou_mode='giou', weight=1., box_format='xywh'):\n        self.weight = weight\n        self.iou_mode = iou_mode\n        assert box_format in ['xyxy', 'xywh']\n        self.box_format = box_format\n\n    def __call__(self, bboxes, gt_bboxes):\n        \"\"\"\n        Args:\n            bboxes (Tensor): Predicted boxes with unnormalized coordinates\n                (x1, y1, x2, y2). Shape [num_query, 4].\n            gt_bboxes (Tensor): Ground truth boxes with unnormalized\n                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].\n        Returns:\n            torch.Tensor: iou_cost value with weight\n        \"\"\"\n        if self.box_format == 'xywh':\n            bboxes = bbox_cxcywh_to_xyxy(bboxes)\n            gt_bboxes = bbox_cxcywh_to_xyxy(gt_bboxes)\n\n        # overlaps: [num_bboxes, num_gt]\n        overlaps = bbox_overlaps(\n            bboxes, gt_bboxes, mode=self.iou_mode, is_aligned=False)\n        # The 1 is a constant that doesn't change the matching, so omitted.\n        iou_cost = -overlaps\n        return iou_cost * self.weight\n\n@MATCH_COST.register_module()\nclass DynamicLinesCost(object):\n    \"\"\"LinesL1Cost.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.):\n        self.weight = weight\n\n    def __call__(self, lines_pred, lines_gt, masks_pred, masks_gt):\n        \"\"\"\n        Args:\n            lines_pred (Tensor): predicted normalized lines:\n                [nP, num_points, 2]\n            lines_gt (Tensor): Ground truth lines\n                [nG, num_points, 2]\n            masks_pred: [nP, num_points]\n            masks_gt: [nG, num_points]\n        Returns:\n            dist_mat: reg_cost value with weight\n                shape [nP, nG]\n        \"\"\"\n\n        dist_mat = self.cal_dist(lines_pred, lines_gt)\n\n        dist_mat = self.get_dynamic_line(dist_mat, masks_pred, masks_gt)\n\n        dist_mat = dist_mat * self.weight\n\n        return dist_mat\n\n    def cal_dist(self, x1, x2):\n        '''\n            Args:\n                x1: B1,N,2\n                x2: B2,N,2\n            Return:\n                dist_mat: B1,B2,N\n        '''\n        x1 = x1.permute(1, 0, 2)\n        x2 = x2.permute(1, 0, 2)\n\n        dist_mat = torch.cdist(x1, x2, p=2)\n\n        dist_mat = dist_mat.permute(1, 2, 0)\n\n        return dist_mat\n\n    def get_dynamic_line(self, mat, m1, m2):\n        '''\n            get dynamic line with difference approach\n            mat: N1xN2xnpts\n            m1: N1xnpts\n            m2: N2xnpts\n        '''\n\n        # nPxnGxnum_points\n        m1 = m1.unsqueeze(1).sigmoid() > 0.5\n        m2 = m2.unsqueeze(0)\n\n        valid_points_mask = (m1 + m2)/2.\n\n        average_factor_mask = valid_points_mask.sum(-1) > 0\n        average_factor = average_factor_mask.masked_fill(\n            ~average_factor_mask, 1)\n\n        # takes the average\n        mat = mat * valid_points_mask\n        mat = mat.sum(-1) / average_factor\n\n        return mat\n\n\n@MATCH_COST.register_module()\nclass BBoxLogitsCost(object):\n    \"\"\"BBoxLogits.\n     Args:\n         weight (int | float, optional): loss_weight\n    \"\"\"\n\n    def __init__(self, weight=1.):\n        self.weight = weight\n\n    def calNLL(self, logits, value):\n        '''\n            Args:\n                logits: B1, 8, cls_dim\n                value: B2, 8,\n            Return:\n                log_likelihood: B1,B2,8\n        '''\n\n        logits = logits[:, None]\n        value = value[None]\n\n        value = value.long().unsqueeze(-1)\n        value, log_pmf = torch.broadcast_tensors(value, logits)\n        value = value[..., :1]\n        return log_pmf.gather(-1, value).squeeze(-1)\n\n    def __call__(self, bbox_pred, bbox_gt, **kwargs):\n        \"\"\"\n        Args:\n            bbox_pred: nproposal, 4*2, pos_dim\n            bbox_gt: ngt, 4*2\n        Returns:\n            cost: nproposal, ngt\n        \"\"\"\n\n        cost = self.calNLL(bbox_pred, bbox_gt).mean(-1)\n\n        return cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass MapQueriesCost(object):\n\n    def __init__(self, cls_cost, reg_cost, iou_cost=None):\n\n        self.cls_cost = build_match_cost(cls_cost)\n        self.reg_cost = build_match_cost(reg_cost)\n\n        self.iou_cost = None\n        if iou_cost is not None:\n            self.iou_cost = build_match_cost(iou_cost)\n\n    def __call__(self, preds: dict, gts: dict):\n\n        # classification and bboxcost.\n        cls_cost = self.cls_cost(preds['scores'], gts['labels'])\n\n        # regression cost\n        regkwargs = {}\n        if 'masks' in preds and 'masks' in gts:\n            assert isinstance(self.reg_cost, DynamicLinesCost), ' Issues!!'\n            regkwargs = {\n                'masks_pred': preds['masks'],\n                'masks_gt': gts['masks'],\n            }\n\n        reg_cost = self.reg_cost(preds['lines'], gts['lines'], **regkwargs)\n        if self.reg_cost.permute:\n            reg_cost, gt_permute_idx = reg_cost\n\n        # weighted sum of above three costs\n        cost = cls_cost + reg_cost\n\n        # Need to pass the reg cost out, and use this to filter deviated\n        # instances for temporal label assignment...\n        raw_reg_cost = reg_cost / self.reg_cost.weight\n\n        # Iou\n        if self.iou_cost is not None:\n            iou_cost = self.iou_cost(preds['lines'],gts['lines'])\n            cost += iou_cost\n        \n        if self.reg_cost.permute:\n            return cost, gt_permute_idx, raw_reg_cost\n        return cost, raw_reg_cost\n"
  },
  {
    "path": "plugin/models/backbones/__init__.py",
    "content": "from .bevformer_backbone import BEVFormerBackbone\n"
  },
  {
    "path": "plugin/models/backbones/bevformer/__init__.py",
    "content": "from .custom_base_transformer_layer import MyCustomBaseTransformerLayer\nfrom .encoder import BEVFormerEncoder\nfrom .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D, MSIPM3D\nfrom .temporal_self_attention import TemporalSelfAttention\nfrom .transformer import PerceptionTransformer\nfrom .temporal_net import TemporalNet"
  },
  {
    "path": "plugin/models/backbones/bevformer/custom_base_transformer_layer.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n\nimport copy\nimport warnings\n\nimport torch\nimport torch.nn as nn\n\nfrom mmcv import ConfigDict, deprecated_api_warning\nfrom mmcv.cnn import Linear, build_activation_layer, build_norm_layer\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\n\nfrom mmcv.cnn.bricks.registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING,\n                                      TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE)\n\n# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file\ntry:\n    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention  # noqa F401\n    warnings.warn(\n        ImportWarning(\n            '``MultiScaleDeformableAttention`` has been moved to '\n            '``mmcv.ops.multi_scale_deform_attn``, please change original path '  # noqa E501\n            '``from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` '  # noqa E501\n            'to ``from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` '  # noqa E501\n        ))\nexcept ImportError:\n    warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from '\n                  '``mmcv.ops.multi_scale_deform_attn``, '\n                  'You should install ``mmcv-full`` if you need this module. ')\nfrom mmcv.cnn.bricks.transformer import build_feedforward_network, build_attention\n\n\n@TRANSFORMER_LAYER.register_module()\nclass MyCustomBaseTransformerLayer(BaseModule):\n    \"\"\"Base `TransformerLayer` for vision transformer.\n    It can be built from `mmcv.ConfigDict` and support more flexible\n    customization, for example, using any number of `FFN or LN ` and\n    use different kinds of `attention` by specifying a list of `ConfigDict`\n    named `attn_cfgs`. It is worth mentioning that it supports `prenorm`\n    when you specifying `norm` as the first element of `operation_order`.\n    More details about the `prenorm`: `On Layer Normalization in the\n    Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .\n    Args:\n        attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for `self_attention` or `cross_attention` modules,\n            The order of the configs in the list should be consistent with\n            corresponding attentions in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config. Default: None.\n        ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for FFN, The order of the configs in the list should be\n            consistent with corresponding ffn in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config.\n        operation_order (tuple[str]): The execution order of operation\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\n            Support `prenorm` when you specifying first element as `norm`.\n            Default：None.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='LN').\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n        batch_first (bool): Key, Query and Value are shape\n            of (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 attn_cfgs=None,\n                 ffn_cfgs=dict(\n                     type='FFN',\n                     embed_dims=256,\n                     feedforward_channels=1024,\n                     num_fcs=2,\n                     ffn_drop=0.,\n                     act_cfg=dict(type='ReLU', inplace=True),\n                 ),\n                 operation_order=None,\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None,\n                 batch_first=True,\n                 **kwargs):\n\n        deprecated_args = dict(\n            feedforward_channels='feedforward_channels',\n            ffn_dropout='ffn_drop',\n            ffn_num_fcs='num_fcs')\n        for ori_name, new_name in deprecated_args.items():\n            if ori_name in kwargs:\n                warnings.warn(\n                    f'The arguments `{ori_name}` in BaseTransformerLayer '\n                    f'has been deprecated, now you should set `{new_name}` '\n                    f'and other FFN related arguments '\n                    f'to a dict named `ffn_cfgs`. ')\n                ffn_cfgs[new_name] = kwargs[ori_name]\n\n        super(MyCustomBaseTransformerLayer, self).__init__(init_cfg)\n\n        self.batch_first = batch_first\n\n        assert set(operation_order) & set(\n            ['self_attn', 'norm', 'ffn', 'cross_attn']) == \\\n            set(operation_order), f'The operation_order of' \\\n            f' {self.__class__.__name__} should ' \\\n            f'contains all four operation type ' \\\n            f\"{['self_attn', 'norm', 'ffn', 'cross_attn']}\"\n\n        num_attn = operation_order.count('self_attn') + operation_order.count(\n            'cross_attn')\n        if isinstance(attn_cfgs, dict):\n            attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]\n        else:\n            assert num_attn == len(attn_cfgs), f'The length ' \\\n                f'of attn_cfg {num_attn} is ' \\\n                f'not consistent with the number of attention' \\\n                f'in operation_order {operation_order}.'\n\n        self.num_attn = num_attn\n        self.operation_order = operation_order\n        self.norm_cfg = norm_cfg\n        self.pre_norm = operation_order[0] == 'norm'\n        self.attentions = ModuleList()\n\n        index = 0\n        for operation_name in operation_order:\n            if operation_name in ['self_attn', 'cross_attn']:\n                if 'batch_first' in attn_cfgs[index]:\n                    assert self.batch_first == attn_cfgs[index]['batch_first']\n                else:\n                    attn_cfgs[index]['batch_first'] = self.batch_first\n                attention = build_attention(attn_cfgs[index])\n                # Some custom attentions used as `self_attn`\n                # or `cross_attn` can have different behavior.\n                attention.operation_name = operation_name\n                self.attentions.append(attention)\n                index += 1\n\n        self.embed_dims = self.attentions[0].embed_dims\n\n        self.ffns = ModuleList()\n        num_ffns = operation_order.count('ffn')\n        if isinstance(ffn_cfgs, dict):\n            ffn_cfgs = ConfigDict(ffn_cfgs)\n        if isinstance(ffn_cfgs, dict):\n            ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]\n        assert len(ffn_cfgs) == num_ffns\n        for ffn_index in range(num_ffns):\n            if 'embed_dims' not in ffn_cfgs[ffn_index]:\n                ffn_cfgs['embed_dims'] = self.embed_dims\n            else:\n                assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims\n\n            self.ffns.append(\n                build_feedforward_network(ffn_cfgs[ffn_index]))\n\n        self.norms = ModuleList()\n        num_norms = operation_order.count('norm')\n        for _ in range(num_norms):\n            self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                query_pos=None,\n                key_pos=None,\n                attn_masks=None,\n                query_key_padding_mask=None,\n                key_padding_mask=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoderLayer`.\n        **kwargs contains some specific arguments of attentions.\n        Args:\n            query (Tensor): The input query with shape\n                [num_queries, bs, embed_dims] if\n                self.batch_first is False, else\n                [bs, num_queries embed_dims].\n            key (Tensor): The key tensor with shape [num_keys, bs,\n                embed_dims] if self.batch_first is False, else\n                [bs, num_keys, embed_dims] .\n            value (Tensor): The value tensor with same shape as `key`.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`.\n                Default: None.\n            attn_masks (List[Tensor] | None): 2D Tensor used in\n                calculation of corresponding attention. The length of\n                it should equal to the number of `attention` in\n                `operation_order`. Default: None.\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_queries]. Only used in `self_attn` layer.\n                Defaults to None.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_keys]. Default: None.\n        Returns:\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\n        \"\"\"\n\n        norm_index = 0\n        attn_index = 0\n        ffn_index = 0\n        identity = query\n        if attn_masks is None:\n            attn_masks = [None for _ in range(self.num_attn)]\n        elif isinstance(attn_masks, torch.Tensor):\n            attn_masks = [\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\n            ]\n            warnings.warn(f'Use same attn_mask in all attentions in '\n                          f'{self.__class__.__name__} ')\n        else:\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\n                f'attn_masks {len(attn_masks)} must be equal ' \\\n                f'to the number of attention in ' \\\n                f'operation_order {self.num_attn}'\n\n        for layer in self.operation_order:\n            if layer == 'self_attn':\n                temp_key = temp_value = query\n                query = self.attentions[attn_index](\n                    query,\n                    temp_key,\n                    temp_value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=query_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=query_key_padding_mask,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'norm':\n                query = self.norms[norm_index](query)\n                norm_index += 1\n\n            elif layer == 'cross_attn':\n                query = self.attentions[attn_index](\n                    query,\n                    key,\n                    value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=key_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=key_padding_mask,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'ffn':\n                query = self.ffns[ffn_index](\n                    query, identity if self.pre_norm else None)\n                ffn_index += 1\n\n        return query\n\n\n\n@TRANSFORMER_LAYER.register_module()\nclass MyCustomBaseTransformerLayerWithoutSelfAttn(BaseModule):\n    \"\"\"Base `TransformerLayer` for vision transformer.\n    It can be built from `mmcv.ConfigDict` and support more flexible\n    customization, for example, using any number of `FFN or LN ` and\n    use different kinds of `attention` by specifying a list of `ConfigDict`\n    named `attn_cfgs`. It is worth mentioning that it supports `prenorm`\n    when you specifying `norm` as the first element of `operation_order`.\n    More details about the `prenorm`: `On Layer Normalization in the\n    Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .\n    Args:\n        attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for `self_attention` or `cross_attention` modules,\n            The order of the configs in the list should be consistent with\n            corresponding attentions in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config. Default: None.\n        ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for FFN, The order of the configs in the list should be\n            consistent with corresponding ffn in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config.\n        operation_order (tuple[str]): The execution order of operation\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\n            Support `prenorm` when you specifying first element as `norm`.\n            Default：None.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='LN').\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n        batch_first (bool): Key, Query and Value are shape\n            of (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 attn_cfgs=None,\n                 ffn_cfgs=dict(\n                     type='FFN',\n                     embed_dims=256,\n                     feedforward_channels=1024,\n                     num_fcs=2,\n                     ffn_drop=0.,\n                     act_cfg=dict(type='ReLU', inplace=True),\n                 ),\n                 operation_order=None,\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None,\n                 batch_first=True,\n                 **kwargs):\n\n        deprecated_args = dict(\n            feedforward_channels='feedforward_channels',\n            ffn_dropout='ffn_drop',\n            ffn_num_fcs='num_fcs')\n        for ori_name, new_name in deprecated_args.items():\n            if ori_name in kwargs:\n                warnings.warn(\n                    f'The arguments `{ori_name}` in BaseTransformerLayer '\n                    f'has been deprecated, now you should set `{new_name}` '\n                    f'and other FFN related arguments '\n                    f'to a dict named `ffn_cfgs`. ')\n                ffn_cfgs[new_name] = kwargs[ori_name]\n\n        super(MyCustomBaseTransformerLayerWithoutSelfAttn, self).__init__(init_cfg)\n\n        self.batch_first = batch_first\n\n        assert set(operation_order) & set(\n            ['norm', 'ffn', 'cross_attn']) == \\\n            set(operation_order), f'The operation_order of' \\\n            f' {self.__class__.__name__} should ' \\\n            f'contains all three operation type ' \\\n            f\"{['norm', 'ffn', 'cross_attn']}\"\n\n        num_attn = operation_order.count(\n            'cross_attn')\n        if isinstance(attn_cfgs, dict):\n            attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]\n        else:\n            assert num_attn == len(attn_cfgs), f'The length ' \\\n                f'of attn_cfg {num_attn} is ' \\\n                f'not consistent with the number of attention' \\\n                f'in operation_order {operation_order}.'\n\n        self.num_attn = num_attn\n        self.operation_order = operation_order\n        self.norm_cfg = norm_cfg\n        self.pre_norm = operation_order[0] == 'norm'\n        self.attentions = ModuleList()\n\n        index = 0\n        for operation_name in operation_order:\n            if operation_name in ['self_attn', 'cross_attn']:\n                if 'batch_first' in attn_cfgs[index]:\n                    assert self.batch_first == attn_cfgs[index]['batch_first']\n                else:\n                    attn_cfgs[index]['batch_first'] = self.batch_first\n                attention = build_attention(attn_cfgs[index])\n                # Some custom attentions used as `self_attn`\n                # or `cross_attn` can have different behavior.\n                attention.operation_name = operation_name\n                self.attentions.append(attention)\n                index += 1\n\n        self.embed_dims = self.attentions[0].embed_dims\n\n        self.ffns = ModuleList()\n        num_ffns = operation_order.count('ffn')\n        if isinstance(ffn_cfgs, dict):\n            ffn_cfgs = ConfigDict(ffn_cfgs)\n        if isinstance(ffn_cfgs, dict):\n            ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]\n        assert len(ffn_cfgs) == num_ffns\n        for ffn_index in range(num_ffns):\n            if 'embed_dims' not in ffn_cfgs[ffn_index]:\n                ffn_cfgs['embed_dims'] = self.embed_dims\n            else:\n                assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims\n\n            self.ffns.append(\n                build_feedforward_network(ffn_cfgs[ffn_index]))\n\n        self.norms = ModuleList()\n        num_norms = operation_order.count('norm')\n        for _ in range(num_norms):\n            self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                query_pos=None,\n                key_pos=None,\n                attn_masks=None,\n                query_key_padding_mask=None,\n                key_padding_mask=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoderLayer`.\n        **kwargs contains some specific arguments of attentions.\n        Args:\n            query (Tensor): The input query with shape\n                [num_queries, bs, embed_dims] if\n                self.batch_first is False, else\n                [bs, num_queries embed_dims].\n            key (Tensor): The key tensor with shape [num_keys, bs,\n                embed_dims] if self.batch_first is False, else\n                [bs, num_keys, embed_dims] .\n            value (Tensor): The value tensor with same shape as `key`.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`.\n                Default: None.\n            attn_masks (List[Tensor] | None): 2D Tensor used in\n                calculation of corresponding attention. The length of\n                it should equal to the number of `attention` in\n                `operation_order`. Default: None.\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_queries]. Only used in `self_attn` layer.\n                Defaults to None.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_keys]. Default: None.\n        Returns:\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\n        \"\"\"\n\n        norm_index = 0\n        attn_index = 0\n        ffn_index = 0\n        identity = query\n        if attn_masks is None:\n            attn_masks = [None for _ in range(self.num_attn)]\n        elif isinstance(attn_masks, torch.Tensor):\n            attn_masks = [\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\n            ]\n            warnings.warn(f'Use same attn_mask in all attentions in '\n                          f'{self.__class__.__name__} ')\n        else:\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\n                f'attn_masks {len(attn_masks)} must be equal ' \\\n                f'to the number of attention in ' \\\n                f'operation_order {self.num_attn}'\n\n        for layer in self.operation_order:\n            if layer == 'self_attn':\n                temp_key = temp_value = query\n                query = self.attentions[attn_index](\n                    query,\n                    temp_key,\n                    temp_value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=query_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=query_key_padding_mask,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'norm':\n                query = self.norms[norm_index](query)\n                norm_index += 1\n\n            elif layer == 'cross_attn':\n                query = self.attentions[attn_index](\n                    query,\n                    key,\n                    value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=key_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=key_padding_mask,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'ffn':\n                query = self.ffns[ffn_index](\n                    query, identity if self.pre_norm else None)\n                ffn_index += 1\n\n        return query\n"
  },
  {
    "path": "plugin/models/backbones/bevformer/encoder.py",
    "content": "\"\"\"\nBorrowed from StreamMapNet, and add BEV memory fusion\n\"\"\"\n\nfrom .custom_base_transformer_layer import MyCustomBaseTransformerLayer\nfrom .temporal_net import TemporalNet\nimport copy\nimport warnings\nfrom mmcv.cnn.bricks.registry import (ATTENTION,\n                                      TRANSFORMER_LAYER,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import TransformerLayerSequence\nfrom mmcv.runner import force_fp32, auto_fp16\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.utils import TORCH_VERSION, digit_version\nfrom mmcv.utils import ext_loader\n\nfrom einops import rearrange\n\next_module = ext_loader.load_ext(\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\n\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass BEVFormerEncoder(TransformerLayerSequence):\n\n    \"\"\"\n    Attention with both self and cross\n    Implements the decoder in DETR transformer.\n    Args:\n        return_intermediate (bool): Whether to return intermediate outputs.\n        coder_norm_cfg (dict): Config of last normalization layer. Default：\n            `LN`.\n    \"\"\"\n\n    def __init__(self, *args, pc_range=None, num_points_in_pillar=4, return_intermediate=False, dataset_type='nuscenes',\n                 **kwargs):\n\n        super(BEVFormerEncoder, self).__init__(*args, **kwargs)\n        self.return_intermediate = return_intermediate\n\n        temporal_mem_layers = []\n        for _ in range(self.num_layers):\n            mem_conv = TemporalNet(history_steps=4, hidden_dims=self.embed_dims, num_blocks=1)\n            temporal_mem_layers.append(mem_conv)\n        self.temporal_mem_layers = nn.ModuleList(temporal_mem_layers)\n\n        self.num_points_in_pillar = num_points_in_pillar\n        self.pc_range = pc_range\n        self.fp16_enabled = False\n\n    @staticmethod\n    def get_reference_points(H, W, Z=8, num_points_in_pillar=4, dim='3d', bs=1, device='cuda', dtype=torch.float):\n        \"\"\"Get the reference points used in SCA and TSA.\n        Args:\n            H, W: spatial shape of bev.\n            Z: hight of pillar.\n            D: sample D points uniformly from each pillar.\n            device (obj:`device`): The device where\n                reference_points should be.\n        Returns:\n            Tensor: reference points used in decoder, has \\\n                shape (bs, num_keys, num_levels, 2).\n        \"\"\"\n\n        # reference points in 3D space, used in spatial cross-attention (SCA)\n        if dim == '3d':\n            zs = torch.linspace(0.5, Z - 0.5, num_points_in_pillar, dtype=dtype,\n                                device=device).view(-1, 1, 1).expand(num_points_in_pillar, H, W) / Z\n            xs = torch.linspace(0.5, W - 0.5, W, dtype=dtype,\n                                device=device).view(1, 1, W).expand(num_points_in_pillar, H, W) / W\n            # ys = torch.linspace(0.5, H - 0.5, H, dtype=dtype,\n            #                     device=device).view(1, H, 1).expand(num_points_in_pillar, H, W) / H\n            # change y-axis direction\n            ys = torch.linspace(H - 0.5, 0.5, H, dtype=dtype,\n                                device=device).view(1, H, 1).expand(num_points_in_pillar, H, W) / H\n            ref_3d = torch.stack((xs, ys, zs), -1)\n            ref_3d = ref_3d.permute(0, 3, 1, 2).flatten(2).permute(0, 2, 1)\n            ref_3d = ref_3d[None].repeat(bs, 1, 1, 1)\n            return ref_3d\n\n        # reference points on 2D bev plane, used in temporal self-attention (TSA).\n        elif dim == '2d':\n            ref_y, ref_x = torch.meshgrid(\n                # torch.linspace(\n                #     0.5, H - 0.5, H, dtype=dtype, device=device),\n                torch.linspace(\n                    H - 0.5, 0.5, H, dtype=dtype, device=device),\n                torch.linspace(\n                    0.5, W - 0.5, W, dtype=dtype, device=device)\n            )\n            ref_y = ref_y.reshape(-1)[None] / H\n            ref_x = ref_x.reshape(-1)[None] / W\n            ref_2d = torch.stack((ref_x, ref_y), -1)\n            ref_2d = ref_2d.repeat(bs, 1, 1).unsqueeze(2)\n            return ref_2d\n\n    # This function must use fp32!!!\n    @force_fp32(apply_to=('reference_points', 'img_metas'))\n    def point_sampling(self, reference_points, pc_range, img_metas):\n\n        ego2img = []\n        for img_meta in img_metas:\n            ego2img.append(img_meta['ego2img'])\n        ego2img = np.asarray(ego2img)\n        ego2img = reference_points.new_tensor(ego2img)  # (B, N, 4, 4)\n        reference_points = reference_points.clone()\n\n        reference_points[..., 0:1] = reference_points[..., 0:1] * \\\n            (pc_range[3] - pc_range[0]) + pc_range[0]\n        reference_points[..., 1:2] = reference_points[..., 1:2] * \\\n            (pc_range[4] - pc_range[1]) + pc_range[1]\n        reference_points[..., 2:3] = reference_points[..., 2:3] * \\\n            (pc_range[5] - pc_range[2]) + pc_range[2]\n\n        reference_points = torch.cat(\n            (reference_points, torch.ones_like(reference_points[..., :1])), -1)\n\n        reference_points = reference_points.permute(1, 0, 2, 3)\n        D, B, num_query = reference_points.size()[:3]\n        num_cam = ego2img.size(1)\n\n        reference_points = reference_points.view(\n            D, B, 1, num_query, 4).repeat(1, 1, num_cam, 1, 1).unsqueeze(-1)\n\n        ego2img = ego2img.view(\n            1, B, num_cam, 1, 4, 4).repeat(D, 1, 1, num_query, 1, 1)\n\n        reference_points_cam = torch.matmul(ego2img.to(torch.float32),\n                                            reference_points.to(torch.float32)).squeeze(-1)\n        eps = 1e-5\n\n        bev_mask = (reference_points_cam[..., 2:3] > eps)\n        reference_points_cam = reference_points_cam[..., 0:2] / torch.maximum(\n            reference_points_cam[..., 2:3], torch.ones_like(reference_points_cam[..., 2:3]) * eps)\n\n        reference_points_cam[..., 0] /= img_metas[0]['img_shape'][0][1]\n        reference_points_cam[..., 1] /= img_metas[0]['img_shape'][0][0]\n\n        bev_mask = (bev_mask & (reference_points_cam[..., 1:2] > 0.0)\n                    & (reference_points_cam[..., 1:2] < 1.0)\n                    & (reference_points_cam[..., 0:1] < 1.0)\n                    & (reference_points_cam[..., 0:1] > 0.0))\n        if digit_version(TORCH_VERSION) >= digit_version('1.8'):\n            bev_mask = torch.nan_to_num(bev_mask)\n        else:\n            bev_mask = bev_mask.new_tensor(\n                np.nan_to_num(bev_mask.cpu().numpy()))\n\n        reference_points_cam = reference_points_cam.permute(2, 1, 3, 0, 4)\n        bev_mask = bev_mask.permute(2, 1, 3, 0, 4).squeeze(-1)\n\n        return reference_points_cam, bev_mask\n\n    @auto_fp16()\n    def forward(self,\n                bev_query,\n                key,\n                value,\n                *args,\n                bev_h=None,\n                bev_w=None,\n                bev_pos=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                prev_bev=None,\n                shift=0.,\n                warped_history_bev=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoder`.\n        Args:\n            bev_query (Tensor): Input BEV query with shape\n                `(num_query, bs, embed_dims)`.\n            key & value (Tensor): Input multi-cameta features with shape\n                (num_cam, num_value, bs, embed_dims)\n            reference_points (Tensor): The reference\n                points of offset. has shape\n                (bs, num_query, 4) when as_two_stage,\n                otherwise has shape ((bs, num_query, 2).\n            valid_ratios (Tensor): The radios of valid\n                points on the feature map, has shape\n                (bs, num_levels, 2)\n        Returns:\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\n                return_intermediate is `False`, otherwise it has shape\n                [num_layers, num_query, bs, embed_dims].\n        \"\"\"\n\n        output = bev_query\n        intermediate = []\n\n        ref_3d = self.get_reference_points(\n            bev_h, bev_w, self.pc_range[5]-self.pc_range[2], self.num_points_in_pillar, dim='3d', bs=bev_query.size(1),  device=bev_query.device, dtype=bev_query.dtype)\n        ref_2d = self.get_reference_points(\n            bev_h, bev_w, dim='2d', bs=bev_query.size(1), device=bev_query.device, dtype=bev_query.dtype)\n\n        reference_points_cam, bev_mask = self.point_sampling(\n            ref_3d, self.pc_range, kwargs['img_metas'])\n\n        # bug: this code should be 'shift_ref_2d = ref_2d.clone()', we keep this bug for reproducing our results in paper.\n        # shift_ref_2d = ref_2d  # .clone()\n        shift_ref_2d = ref_2d.clone()\n        shift_ref_2d += shift[:, None, None, :]\n\n        # (num_query, bs, embed_dims) -> (bs, num_query, embed_dims)\n        bev_query = bev_query.permute(1, 0, 2)\n        bev_pos = bev_pos.permute(1, 0, 2)\n        bs, len_bev, num_bev_level, _ = ref_2d.shape\n\n        if prev_bev is not None:\n            prev_bev = prev_bev.permute(1, 0, 2)\n            prev_bev = torch.stack(\n                [prev_bev, bev_query], 1).reshape(bs*2, len_bev, -1)\n            hybird_ref_2d = torch.stack([shift_ref_2d, ref_2d], 1).reshape(\n                bs*2, len_bev, num_bev_level, 2)\n        else:\n            hybird_ref_2d = torch.stack([ref_2d, ref_2d], 1).reshape(\n                bs*2, len_bev, num_bev_level, 2)\n        \n        for lid, layer in enumerate(self.layers):\n            output = layer(\n                bev_query,\n                key,\n                value,\n                *args,\n                bev_pos=bev_pos,\n                ref_2d=hybird_ref_2d,\n                ref_3d=ref_3d,\n                bev_h=bev_h,\n                bev_w=bev_w,\n                spatial_shapes=spatial_shapes,\n                level_start_index=level_start_index,\n                reference_points_cam=reference_points_cam,\n                bev_mask=bev_mask,\n                prev_bev=prev_bev,\n                warped_history_bev=warped_history_bev,\n                **kwargs)\n            \n            # BEV memory fusion layer\n            mem_layer = self.temporal_mem_layers[lid]\n            curr_feat = rearrange(output, 'b (h w) c -> b c h w', h=warped_history_bev.shape[3])\n            fused_output = mem_layer(warped_history_bev, curr_feat)\n            fused_output = rearrange(fused_output, 'b c h w -> b (h w) c')\n            output = output + fused_output\n\n            bev_query = output\n            if self.return_intermediate:\n                intermediate.append(output)\n\n        if self.return_intermediate:\n            return torch.stack(intermediate)\n\n        return output\n\n\n@TRANSFORMER_LAYER.register_module()\nclass BEVFormerLayer(MyCustomBaseTransformerLayer):\n    \"\"\"Implements decoder layer in DETR transformer.\n    Args:\n        attn_cfgs (list[`mmcv.ConfigDict`] | list[dict] | dict )):\n            Configs for self_attention or cross_attention, the order\n            should be consistent with it in `operation_order`. If it is\n            a dict, it would be expand to the number of attention in\n            `operation_order`.\n        feedforward_channels (int): The hidden dimension for FFNs.\n        ffn_dropout (float): Probability of an element to be zeroed\n            in ffn. Default 0.0.\n        operation_order (tuple[str]): The execution order of operation\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\n            Default：None\n        act_cfg (dict): The activation config for FFNs. Default: `LN`\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: `LN`.\n        ffn_num_fcs (int): The number of fully-connected layers in FFNs.\n            Default：2.\n    \"\"\"\n\n    def __init__(self,\n                 attn_cfgs,\n                 feedforward_channels,\n                 ffn_dropout=0.0,\n                 operation_order=None,\n                 act_cfg=dict(type='ReLU', inplace=True),\n                 norm_cfg=dict(type='LN'),\n                 ffn_num_fcs=2,\n                 **kwargs):\n        super(BEVFormerLayer, self).__init__(\n            attn_cfgs=attn_cfgs,\n            feedforward_channels=feedforward_channels,\n            ffn_dropout=ffn_dropout,\n            operation_order=operation_order,\n            act_cfg=act_cfg,\n            norm_cfg=norm_cfg,\n            ffn_num_fcs=ffn_num_fcs,\n            **kwargs)\n        self.fp16_enabled = False\n        assert len(operation_order) == 6\n        assert set(operation_order) == set(\n            ['self_attn', 'norm', 'cross_attn', 'ffn'])\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                bev_pos=None,\n                query_pos=None,\n                key_pos=None,\n                attn_masks=None,\n                query_key_padding_mask=None,\n                key_padding_mask=None,\n                ref_2d=None,\n                ref_3d=None,\n                bev_h=None,\n                bev_w=None,\n                reference_points_cam=None,\n                mask=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                prev_bev=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoderLayer`.\n\n        **kwargs contains some specific arguments of attentions.\n\n        Args:\n            query (Tensor): The input query with shape\n                [num_queries, bs, embed_dims] if\n                self.batch_first is False, else\n                [bs, num_queries embed_dims].\n            key (Tensor): The key tensor with shape [num_keys, bs,\n                embed_dims] if self.batch_first is False, else\n                [bs, num_keys, embed_dims] .\n            value (Tensor): The value tensor with same shape as `key`.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`.\n                Default: None.\n            attn_masks (List[Tensor] | None): 2D Tensor used in\n                calculation of corresponding attention. The length of\n                it should equal to the number of `attention` in\n                `operation_order`. Default: None.\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_queries]. Only used in `self_attn` layer.\n                Defaults to None.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_keys]. Default: None.\n\n        Returns:\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\n        \"\"\"\n\n        norm_index = 0\n        attn_index = 0\n        ffn_index = 0\n        identity = query\n        if attn_masks is None:\n            attn_masks = [None for _ in range(self.num_attn)]\n        elif isinstance(attn_masks, torch.Tensor):\n            attn_masks = [\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\n            ]\n            warnings.warn(f'Use same attn_mask in all attentions in '\n                          f'{self.__class__.__name__} ')\n        else:\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\n                                                     f'attn_masks {len(attn_masks)} must be equal ' \\\n                                                     f'to the number of attention in ' \\\n                f'operation_order {self.num_attn}'\n\n        for layer in self.operation_order:\n            # temporal self attention\n            if layer == 'self_attn':\n                query = self.attentions[attn_index](\n                    query,\n                    prev_bev,\n                    prev_bev,\n                    identity if self.pre_norm else None,\n                    query_pos=bev_pos,\n                    key_pos=bev_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=query_key_padding_mask,\n                    reference_points=ref_2d,\n                    spatial_shapes=torch.tensor(\n                        [[bev_h, bev_w]], device=query.device),\n                    level_start_index=torch.tensor([0], device=query.device),\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'norm':\n                query = self.norms[norm_index](query)\n                norm_index += 1\n\n            # spaital cross attention\n            elif layer == 'cross_attn':\n                query = self.attentions[attn_index](\n                    query,\n                    key,\n                    value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=key_pos,\n                    reference_points=ref_3d,\n                    reference_points_cam=reference_points_cam,\n                    mask=mask,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=key_padding_mask,\n                    spatial_shapes=spatial_shapes,\n                    level_start_index=level_start_index,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n            elif layer == 'ffn':\n                query = self.ffns[ffn_index](\n                    query, identity if self.pre_norm else None)\n                ffn_index += 1\n\n        return query\n"
  },
  {
    "path": "plugin/models/backbones/bevformer/grid_mask.py",
    "content": "import torch\nimport torch.nn as nn\nimport numpy as np\nfrom PIL import Image\nfrom mmcv.runner import force_fp32, auto_fp16\n\nclass Grid(object):\n    def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):\n        self.use_h = use_h\n        self.use_w = use_w\n        self.rotate = rotate\n        self.offset = offset\n        self.ratio = ratio\n        self.mode=mode\n        self.st_prob = prob\n        self.prob = prob\n\n    def set_prob(self, epoch, max_epoch):\n        self.prob = self.st_prob * epoch / max_epoch\n\n    def __call__(self, img, label):\n        if np.random.rand() > self.prob:\n            return img, label\n        h = img.size(1)\n        w = img.size(2)\n        self.d1 = 2\n        self.d2 = min(h, w)\n        hh = int(1.5*h)\n        ww = int(1.5*w)\n        d = np.random.randint(self.d1, self.d2)\n        if self.ratio == 1:\n            self.l = np.random.randint(1, d)\n        else:\n            self.l = min(max(int(d*self.ratio+0.5),1),d-1)\n        mask = np.ones((hh, ww), np.float32)\n        st_h = np.random.randint(d)\n        st_w = np.random.randint(d)\n        if self.use_h:\n            for i in range(hh//d):\n                s = d*i + st_h\n                t = min(s+self.l, hh)\n                mask[s:t,:] *= 0\n        if self.use_w:\n            for i in range(ww//d):\n                s = d*i + st_w\n                t = min(s+self.l, ww)\n                mask[:,s:t] *= 0\n       \n        r = np.random.randint(self.rotate)\n        mask = Image.fromarray(np.uint8(mask))\n        mask = mask.rotate(r)\n        mask = np.asarray(mask)\n        mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]\n\n        mask = torch.from_numpy(mask).float()\n        if self.mode == 1:\n            mask = 1-mask\n\n        mask = mask.expand_as(img)\n        if self.offset:\n            offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()\n            offset = (1 - mask) * offset\n            img = img * mask + offset\n        else:\n            img = img * mask \n\n        return img, label\n\n\nclass GridMask(nn.Module):\n    def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):\n        super(GridMask, self).__init__()\n        self.use_h = use_h\n        self.use_w = use_w\n        self.rotate = rotate\n        self.offset = offset\n        self.ratio = ratio\n        self.mode = mode\n        self.st_prob = prob\n        self.prob = prob\n        self.fp16_enable = False\n\n    def set_prob(self, epoch, max_epoch):\n        self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5\n    \n    def set_ratio_and_prob(self, ratio, prob):\n        self.prob = prob\n        self.ratio = ratio\n\n    @auto_fp16()\n    def forward(self, x):\n        if np.random.rand() > self.prob or not self.training:\n            return x\n        n,c,h,w = x.size()\n        x = x.view(-1,h,w)\n        hh = int(1.5*h)\n        ww = int(1.5*w)\n        d = np.random.randint(2, h)\n        self.l = min(max(int(d*self.ratio+0.5),1),d-1)\n        mask = np.ones((hh, ww), np.float32)\n        st_h = np.random.randint(d)\n        st_w = np.random.randint(d)\n        if self.use_h:\n            for i in range(hh//d):\n                s = d*i + st_h\n                t = min(s+self.l, hh)\n                mask[s:t,:] *= 0\n        if self.use_w:\n            for i in range(ww//d):\n                s = d*i + st_w\n                t = min(s+self.l, ww)\n                mask[:,s:t] *= 0\n       \n        r = np.random.randint(self.rotate)\n        mask = Image.fromarray(np.uint8(mask))\n        mask = mask.rotate(r)\n        mask = np.asarray(mask)\n        mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]\n\n        mask = torch.from_numpy(mask).to(x.dtype).cuda()\n\n        if self.mode == 1:\n            mask = 1-mask\n\n        mask = mask.expand_as(x)\n        if self.offset:\n            offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda()\n            x = x * mask + offset * (1 - mask)\n        else:\n            x = x * mask \n        \n        return x.view(n,c,h,w)"
  },
  {
    "path": "plugin/models/backbones/bevformer/multi_scale_deformable_attn_function.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n\nimport torch\nfrom torch.cuda.amp import custom_bwd, custom_fwd\nfrom torch.autograd.function import Function, once_differentiable\nfrom mmcv.utils import ext_loader\next_module = ext_loader.load_ext(\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\n\n\nclass MultiScaleDeformableAttnFunction_fp16(Function):\n\n    @staticmethod\n    @custom_fwd(cast_inputs=torch.float16)\n    def forward(ctx, value, value_spatial_shapes, value_level_start_index,\n                sampling_locations, attention_weights, im2col_step):\n        \"\"\"GPU version of multi-scale deformable attention.\n\n        Args:\n            value (Tensor): The value has shape\n                (bs, num_keys, mum_heads, embed_dims//num_heads)\n            value_spatial_shapes (Tensor): Spatial shape of\n                each feature map, has shape (num_levels, 2),\n                last dimension 2 represent (h, w)\n            sampling_locations (Tensor): The location of sampling points,\n                has shape\n                (bs ,num_queries, num_heads, num_levels, num_points, 2),\n                the last dimension 2 represent (x, y).\n            attention_weights (Tensor): The weight of sampling points used\n                when calculate the attention, has shape\n                (bs ,num_queries, num_heads, num_levels, num_points),\n            im2col_step (Tensor): The step used in image to column.\n\n        Returns:\n            Tensor: has shape (bs, num_queries, embed_dims)\n        \"\"\"\n        ctx.im2col_step = im2col_step\n        output = ext_module.ms_deform_attn_forward(\n            value,\n            value_spatial_shapes,\n            value_level_start_index,\n            sampling_locations,\n            attention_weights,\n            im2col_step=ctx.im2col_step)\n        ctx.save_for_backward(value, value_spatial_shapes,\n                              value_level_start_index, sampling_locations,\n                              attention_weights)\n        return output\n\n    @staticmethod\n    @once_differentiable\n    @custom_bwd\n    def backward(ctx, grad_output):\n        \"\"\"GPU version of backward function.\n\n        Args:\n            grad_output (Tensor): Gradient\n                of output tensor of forward.\n\n        Returns:\n             Tuple[Tensor]: Gradient\n                of input tensors in forward.\n        \"\"\"\n        value, value_spatial_shapes, value_level_start_index, \\\n            sampling_locations, attention_weights = ctx.saved_tensors\n        grad_value = torch.zeros_like(value)\n        grad_sampling_loc = torch.zeros_like(sampling_locations)\n        grad_attn_weight = torch.zeros_like(attention_weights)\n\n        ext_module.ms_deform_attn_backward(\n            value,\n            value_spatial_shapes,\n            value_level_start_index,\n            sampling_locations,\n            attention_weights,\n            grad_output.contiguous(),\n            grad_value,\n            grad_sampling_loc,\n            grad_attn_weight,\n            im2col_step=ctx.im2col_step)\n\n        return grad_value, None, None, \\\n            grad_sampling_loc, grad_attn_weight, None\n\n\nclass MultiScaleDeformableAttnFunction_fp32(Function):\n\n    @staticmethod\n    @custom_fwd(cast_inputs=torch.float32)\n    def forward(ctx, value, value_spatial_shapes, value_level_start_index,\n                sampling_locations, attention_weights, im2col_step):\n        \"\"\"GPU version of multi-scale deformable attention.\n\n        Args:\n            value (Tensor): The value has shape\n                (bs, num_keys, mum_heads, embed_dims//num_heads)\n            value_spatial_shapes (Tensor): Spatial shape of\n                each feature map, has shape (num_levels, 2),\n                last dimension 2 represent (h, w)\n            sampling_locations (Tensor): The location of sampling points,\n                has shape\n                (bs ,num_queries, num_heads, num_levels, num_points, 2),\n                the last dimension 2 represent (x, y).\n            attention_weights (Tensor): The weight of sampling points used\n                when calculate the attention, has shape\n                (bs ,num_queries, num_heads, num_levels, num_points),\n            im2col_step (Tensor): The step used in image to column.\n\n        Returns:\n            Tensor: has shape (bs, num_queries, embed_dims)\n        \"\"\"\n\n        ctx.im2col_step = im2col_step\n        output = ext_module.ms_deform_attn_forward(\n            value,\n            value_spatial_shapes,\n            value_level_start_index,\n            sampling_locations,\n            attention_weights,\n            im2col_step=ctx.im2col_step)\n        ctx.save_for_backward(value, value_spatial_shapes,\n                              value_level_start_index, sampling_locations,\n                              attention_weights)\n        return output\n\n    @staticmethod\n    @once_differentiable\n    @custom_bwd\n    def backward(ctx, grad_output):\n        \"\"\"GPU version of backward function.\n\n        Args:\n            grad_output (Tensor): Gradient\n                of output tensor of forward.\n\n        Returns:\n             Tuple[Tensor]: Gradient\n                of input tensors in forward.\n        \"\"\"\n        value, value_spatial_shapes, value_level_start_index, \\\n            sampling_locations, attention_weights = ctx.saved_tensors\n        grad_value = torch.zeros_like(value)\n        grad_sampling_loc = torch.zeros_like(sampling_locations)\n        grad_attn_weight = torch.zeros_like(attention_weights)\n\n        ext_module.ms_deform_attn_backward(\n            value,\n            value_spatial_shapes,\n            value_level_start_index,\n            sampling_locations,\n            attention_weights,\n            grad_output.contiguous(),\n            grad_value,\n            grad_sampling_loc,\n            grad_attn_weight,\n            im2col_step=ctx.im2col_step)\n\n        return grad_value, None, None, \\\n            grad_sampling_loc, grad_attn_weight, None\n"
  },
  {
    "path": "plugin/models/backbones/bevformer/spatial_cross_attention.py",
    "content": "\n# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n\nfrom mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch\nimport warnings\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init, constant_init\nfrom mmcv.cnn.bricks.registry import (ATTENTION,\n                                      TRANSFORMER_LAYER,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import build_attention\nimport math\nfrom mmcv.runner import force_fp32, auto_fp16\n\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\n\nfrom mmcv.utils import ext_loader\nfrom .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32, \\\n    MultiScaleDeformableAttnFunction_fp16\next_module = ext_loader.load_ext(\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\n\n\n@ATTENTION.register_module()\nclass SpatialCrossAttention(BaseModule):\n    \"\"\"An attention module used in BEVFormer.\n    Args:\n        embed_dims (int): The embedding dimension of Attention.\n            Default: 256.\n        num_cams (int): The number of cameras\n        dropout (float): A Dropout layer on `inp_residual`.\n            Default: 0..\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n        deformable_attention: (dict): The config for the deformable attention used in SCA.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims=256,\n                 num_cams=6,\n                 pc_range=None,\n                 dropout=0.1,\n                 init_cfg=None,\n                 batch_first=False,\n                 deformable_attention=dict(\n                     type='MSDeformableAttention3D',\n                     embed_dims=256,\n                     num_levels=4),\n                 **kwargs\n                 ):\n        super(SpatialCrossAttention, self).__init__(init_cfg)\n\n        self.init_cfg = init_cfg\n        self.dropout = nn.Dropout(dropout)\n        self.pc_range = pc_range\n        self.fp16_enabled = False\n        self.deformable_attention = build_attention(deformable_attention)\n        self.embed_dims = embed_dims\n        self.num_cams = num_cams\n        self.output_proj = nn.Linear(embed_dims, embed_dims)\n        self.batch_first = batch_first\n        self.init_weight()\n\n    def init_weight(self):\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\n    \n    @force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points_cam'))\n    def forward(self,\n                query,\n                key,\n                value,\n                residual=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                reference_points_cam=None,\n                bev_mask=None,\n                level_start_index=None,\n                flag='encoder',\n                **kwargs):\n        \"\"\"Forward Function of Detr3DCrossAtten.\n        Args:\n            query (Tensor): Query of Transformer with shape\n                (num_query, bs, embed_dims).\n            key (Tensor): The key tensor with shape\n                `(num_key, bs, embed_dims)`.\n            value (Tensor): The value tensor with shape\n                `(num_key, bs, embed_dims)`. (B, N, C, H, W)\n            residual (Tensor): The tensor used for addition, with the\n                same shape as `x`. Default None. If None, `x` will be used.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for  `key`. Default\n                None.\n            reference_points (Tensor):  The normalized reference\n                points with shape (bs, num_query, 4),\n                all elements is range in [0, 1], top-left (0,0),\n                bottom-right (1, 1), including padding area.\n                or (N, Length_{query}, num_levels, 4), add\n                additional two dimensions is (w, h) to\n                form reference boxes.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_key].\n            spatial_shapes (Tensor): Spatial shape of features in\n                different level. With shape  (num_levels, 2),\n                last dimension represent (h, w).\n            level_start_index (Tensor): The start index of each level.\n                A tensor has shape (num_levels) and can be represented\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\n        Returns:\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if key is None:\n            key = query\n        if value is None:\n            value = key\n\n        if residual is None:\n            inp_residual = query\n            slots = torch.zeros_like(query)\n        if query_pos is not None:\n            query = query + query_pos\n\n        bs, num_query, _ = query.size()\n\n        D = reference_points_cam.size(3)\n        indexes = []\n        for i, mask_per_img in enumerate(bev_mask):\n            index_query_per_img = mask_per_img[0].sum(-1).nonzero().squeeze(-1)\n            indexes.append(index_query_per_img)\n        max_len = max([len(each) for each in indexes])\n\n        # each camera only interacts with its corresponding BEV queries. This step can  greatly save GPU memory.\n        queries_rebatch = query.new_zeros(\n            [bs, self.num_cams, max_len, self.embed_dims])\n        reference_points_rebatch = reference_points_cam.new_zeros(\n            [bs, self.num_cams, max_len, D, 2])\n        \n        for j in range(bs):\n            for i, reference_points_per_img in enumerate(reference_points_cam):   \n                index_query_per_img = indexes[i]\n                queries_rebatch[j, i, :len(index_query_per_img)] = query[j, index_query_per_img]\n                reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, index_query_per_img]\n\n        num_cams, l, bs, embed_dims = key.shape\n\n        key = key.permute(2, 0, 1, 3).reshape(\n            bs * self.num_cams, l, self.embed_dims)\n        value = value.permute(2, 0, 1, 3).reshape(\n            bs * self.num_cams, l, self.embed_dims)\n\n        queries = self.deformable_attention(query=queries_rebatch.view(bs*self.num_cams, max_len, self.embed_dims), key=key, value=value,\n                                            reference_points=reference_points_rebatch.view(bs*self.num_cams, max_len, D, 2), spatial_shapes=spatial_shapes,\n                                            level_start_index=level_start_index).view(bs, self.num_cams, max_len, self.embed_dims)\n        for j in range(bs):\n            for i, index_query_per_img in enumerate(indexes):\n                slots[j, index_query_per_img] += queries[j, i, :len(index_query_per_img)]\n\n        count = bev_mask.sum(-1) > 0\n        count = count.permute(1, 2, 0).sum(-1)\n        count = torch.clamp(count, min=1.0)\n        slots = slots / count[..., None]\n        slots = self.output_proj(slots)\n\n        return self.dropout(slots) + inp_residual\n\n\n@ATTENTION.register_module()\nclass MSDeformableAttention3D(BaseModule):\n    \"\"\"An attention module used in BEVFormer based on Deformable-Detr.\n    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.\n    <https://arxiv.org/pdf/2010.04159.pdf>`_.\n    Args:\n        embed_dims (int): The embedding dimension of Attention.\n            Default: 256.\n        num_heads (int): Parallel attention heads. Default: 64.\n        num_levels (int): The number of feature map used in\n            Attention. Default: 4.\n        num_points (int): The number of sampling points for\n            each query in each head. Default: 4.\n        im2col_step (int): The step used in image_to_column.\n            Default: 64.\n        dropout (float): A Dropout layer on `inp_identity`.\n            Default: 0.1.\n        batch_first (bool): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: None.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims=256,\n                 num_heads=8,\n                 num_levels=4,\n                 num_points=8,\n                 im2col_step=64,\n                 dropout=0.1,\n                 batch_first=True,\n                 norm_cfg=None,\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        if embed_dims % num_heads != 0:\n            raise ValueError(f'embed_dims must be divisible by num_heads, '\n                             f'but got {embed_dims} and {num_heads}')\n        dim_per_head = embed_dims // num_heads\n        self.norm_cfg = norm_cfg\n        self.batch_first = batch_first\n        self.output_proj = None\n        self.fp16_enabled = False\n\n        # you'd better set dim_per_head to a power of 2\n        # which is more efficient in the CUDA implementation\n        def _is_power_of_2(n):\n            if (not isinstance(n, int)) or (n < 0):\n                raise ValueError(\n                    'invalid input for _is_power_of_2: {} (type: {})'.format(\n                        n, type(n)))\n            return (n & (n - 1) == 0) and n != 0\n\n        if not _is_power_of_2(dim_per_head):\n            warnings.warn(\n                \"You'd better set embed_dims in \"\n                'MultiScaleDeformAttention to make '\n                'the dimension of each attention head a power of 2 '\n                'which is more efficient in our CUDA implementation.')\n\n        self.im2col_step = im2col_step\n        self.embed_dims = embed_dims\n        self.num_levels = num_levels\n        self.num_heads = num_heads\n        self.num_points = num_points\n        self.sampling_offsets = nn.Linear(\n            embed_dims, num_heads * num_levels * num_points * 2)\n        self.attention_weights = nn.Linear(embed_dims,\n                                           num_heads * num_levels * num_points)\n        self.value_proj = nn.Linear(embed_dims, embed_dims)\n\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\n        constant_init(self.sampling_offsets, 0.)\n        thetas = torch.arange(\n            self.num_heads,\n            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)\n        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\n        grid_init = (grid_init /\n                     grid_init.abs().max(-1, keepdim=True)[0]).view(\n            self.num_heads, 1, 1,\n            2).repeat(1, self.num_levels, self.num_points, 1)\n        for i in range(self.num_points):\n            grid_init[:, :, i, :] *= i + 1\n\n        self.sampling_offsets.bias.data = grid_init.view(-1)\n        constant_init(self.attention_weights, val=0., bias=0.)\n        xavier_init(self.value_proj, distribution='uniform', bias=0.)\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\n        self._is_init = True\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                **kwargs):\n        \"\"\"Forward Function of MultiScaleDeformAttention.\n        Args:\n            query (Tensor): Query of Transformer with shape\n                ( bs, num_query, embed_dims).\n            key (Tensor): The key tensor with shape\n                `(bs, num_key,  embed_dims)`.\n            value (Tensor): The value tensor with shape\n                `(bs, num_key,  embed_dims)`.\n            identity (Tensor): The tensor used for addition, with the\n                same shape as `query`. Default None. If None,\n                `query` will be used.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`. Default\n                None.\n            reference_points (Tensor):  The normalized reference\n                points with shape (bs, num_query, num_levels, 2),\n                all elements is range in [0, 1], top-left (0,0),\n                bottom-right (1, 1), including padding area.\n                or (N, Length_{query}, num_levels, 4), add\n                additional two dimensions is (w, h) to\n                form reference boxes.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_key].\n            spatial_shapes (Tensor): Spatial shape of features in\n                different levels. With shape (num_levels, 2),\n                last dimension represents (h, w).\n            level_start_index (Tensor): The start index of each level.\n                A tensor has shape ``(num_levels, )`` and can be represented\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\n        Returns:\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if value is None:\n            value = query\n        if identity is None:\n            identity = query\n        if query_pos is not None:\n            query = query + query_pos\n\n        if not self.batch_first:\n            # change to (bs, num_query ,embed_dims)\n            query = query.permute(1, 0, 2)\n            value = value.permute(1, 0, 2)\n\n        bs, num_query, _ = query.shape\n        bs, num_value, _ = value.shape\n        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value\n\n        value = self.value_proj(value)\n        if key_padding_mask is not None:\n            value = value.masked_fill(key_padding_mask[..., None], 0.0)\n        value = value.view(bs, num_value, self.num_heads, -1)\n        sampling_offsets = self.sampling_offsets(query).view(\n            bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)\n        attention_weights = self.attention_weights(query).view(\n            bs, num_query, self.num_heads, self.num_levels * self.num_points)\n\n        attention_weights = attention_weights.softmax(-1)\n\n        attention_weights = attention_weights.view(bs, num_query,\n                                                   self.num_heads,\n                                                   self.num_levels,\n                                                   self.num_points)\n\n        if reference_points.shape[-1] == 2:\n            \"\"\"\n            For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights.\n            After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image.\n            For each referent point, we sample `num_points` sampling points.\n            For `num_Z_anchors` reference points,  it has overall `num_points * num_Z_anchors` sampling points.\n            \"\"\"\n            offset_normalizer = torch.stack(\n                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)\n\n            bs, num_query, num_Z_anchors, xy = reference_points.shape\n            reference_points = reference_points[:, :, None, None, None, :, :]\n            sampling_offsets = sampling_offsets / \\\n                offset_normalizer[None, None, None, :, None, :]\n            bs, num_query, num_heads, num_levels, num_all_points, xy = sampling_offsets.shape\n            sampling_offsets = sampling_offsets.view(\n                bs, num_query, num_heads, num_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy)\n            sampling_locations = reference_points + sampling_offsets\n            bs, num_query, num_heads, num_levels, num_points, num_Z_anchors, xy = sampling_locations.shape\n            assert num_all_points == num_points * num_Z_anchors\n\n            sampling_locations = sampling_locations.view(\n                bs, num_query, num_heads, num_levels, num_all_points, xy)\n\n        elif reference_points.shape[-1] == 4:\n            assert False\n        else:\n            raise ValueError(\n                f'Last dim of reference_points must be'\n                f' 2 or 4, but get {reference_points.shape[-1]} instead.')\n\n        #  sampling_locations.shape: bs, num_query, num_heads, num_levels, num_all_points, 2\n        #  attention_weights.shape: bs, num_query, num_heads, num_levels, num_all_points\n        #\n\n        if torch.cuda.is_available() and value.is_cuda:\n            if value.dtype == torch.float16:\n                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32\n            else:\n                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32\n            output = MultiScaleDeformableAttnFunction.apply(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n        else:\n            output = multi_scale_deformable_attn_pytorch(\n                value, spatial_shapes, sampling_locations, attention_weights)\n        if not self.batch_first:\n            output = output.permute(1, 0, 2)\n\n        return output\n\n\n\n@ATTENTION.register_module()\nclass MSIPM3D(BaseModule):\n    \"\"\"An attention module used in BEVFormer based on Deformable-Detr.\n    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.\n    <https://arxiv.org/pdf/2010.04159.pdf>`_.\n    Args:\n        embed_dims (int): The embedding dimension of Attention.\n            Default: 256.\n        num_heads (int): Parallel attention heads. Default: 64.\n        num_levels (int): The number of feature map used in\n            Attention. Default: 4.\n        num_points (int): The number of sampling points for\n            each query in each head. Default: 4.\n        im2col_step (int): The step used in image_to_column.\n            Default: 64.\n        dropout (float): A Dropout layer on `inp_identity`.\n            Default: 0.1.\n        batch_first (bool): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: None.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims=256,\n                 num_heads=8,\n                 num_levels=4,\n                 num_points=8,\n                 im2col_step=64,\n                 dropout=0.1,\n                 batch_first=True,\n                 norm_cfg=None,\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        if embed_dims % num_heads != 0:\n            raise ValueError(f'embed_dims must be divisible by num_heads, '\n                             f'but got {embed_dims} and {num_heads}')\n        dim_per_head = embed_dims // num_heads\n        self.norm_cfg = norm_cfg\n        self.batch_first = batch_first\n        self.output_proj = None\n        self.fp16_enabled = False\n\n        # you'd better set dim_per_head to a power of 2\n        # which is more efficient in the CUDA implementation\n        def _is_power_of_2(n):\n            if (not isinstance(n, int)) or (n < 0):\n                raise ValueError(\n                    'invalid input for _is_power_of_2: {} (type: {})'.format(\n                        n, type(n)))\n            return (n & (n - 1) == 0) and n != 0\n\n        if not _is_power_of_2(dim_per_head):\n            warnings.warn(\n                \"You'd better set embed_dims in \"\n                'MultiScaleDeformAttention to make '\n                'the dimension of each attention head a power of 2 '\n                'which is more efficient in our CUDA implementation.')\n\n        self.im2col_step = im2col_step\n        self.embed_dims = embed_dims\n        self.num_levels = num_levels\n        self.num_heads = num_heads\n        self.num_points = num_points\n        # self.sampling_offsets = nn.Linear(\n        #     embed_dims, num_heads * num_levels * num_points * 2)\n        # self.attention_weights = nn.Linear(embed_dims,\n        #                                    num_heads * num_levels * num_points)\n        self.value_proj = nn.Linear(embed_dims, embed_dims)\n\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\n        # constant_init(self.sampling_offsets, 0.)\n        thetas = torch.arange(\n            self.num_heads,\n            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)\n        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\n        grid_init = (grid_init /\n                     grid_init.abs().max(-1, keepdim=True)[0]).view(\n            self.num_heads, 1, 1,\n            2).repeat(1, self.num_levels, self.num_points, 1)\n        for i in range(self.num_points):\n            grid_init[:, :, i, :] *= i + 1\n\n        # self.sampling_offsets.bias.data = grid_init.view(-1)\n        self.fixed_sampling_offsets = nn.Parameter(grid_init.view(-1), requires_grad=False)\n        # constant_init(self.attention_weights, val=0., bias=0.)\n        xavier_init(self.value_proj, distribution='uniform', bias=0.)\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\n        self._is_init = True\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                **kwargs):\n        \"\"\"Forward Function of MultiScaleDeformAttention.\n        Args:\n            query (Tensor): Query of Transformer with shape\n                ( bs, num_query, embed_dims).\n            key (Tensor): The key tensor with shape\n                `(bs, num_key,  embed_dims)`.\n            value (Tensor): The value tensor with shape\n                `(bs, num_key,  embed_dims)`.\n            identity (Tensor): The tensor used for addition, with the\n                same shape as `query`. Default None. If None,\n                `query` will be used.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`. Default\n                None.\n            reference_points (Tensor):  The normalized reference\n                points with shape (bs, num_query, num_levels, 2),\n                all elements is range in [0, 1], top-left (0,0),\n                bottom-right (1, 1), including padding area.\n                or (N, Length_{query}, num_levels, 4), add\n                additional two dimensions is (w, h) to\n                form reference boxes.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_key].\n            spatial_shapes (Tensor): Spatial shape of features in\n                different levels. With shape (num_levels, 2),\n                last dimension represents (h, w).\n            level_start_index (Tensor): The start index of each level.\n                A tensor has shape ``(num_levels, )`` and can be represented\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\n        Returns:\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if value is None:\n            value = query\n        if identity is None:\n            identity = query\n        if query_pos is not None:\n            query = query + query_pos\n\n        if not self.batch_first:\n            # change to (bs, num_query ,embed_dims)\n            query = query.permute(1, 0, 2)\n            value = value.permute(1, 0, 2)\n\n        bs, num_query, _ = query.shape\n        bs, num_value, _ = value.shape\n        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value\n\n        value = self.value_proj(value)\n        if key_padding_mask is not None:\n            value = value.masked_fill(key_padding_mask[..., None], 0.0)\n        value = value.view(bs, num_value, self.num_heads, -1)\n        sampling_offsets = self.fixed_sampling_offsets.view(\n            1, 1, self.num_heads, self.num_levels, self.num_points, 2).repeat(\n            bs, num_query, 1, 1, 1,1)\n        # attention_weights = self.attention_weights(query).view(\n        #     bs, num_query, self.num_heads, self.num_levels * self.num_points)\n        attention_weights = query.new_ones((bs, num_query, self.num_heads, self.num_levels * self.num_points))\n        attention_weights = attention_weights.softmax(-1)\n        # import pdb;pdb.set_trace()\n        attention_weights = attention_weights.view(bs, num_query,\n                                                   self.num_heads,\n                                                   self.num_levels,\n                                                   self.num_points)\n\n        if reference_points.shape[-1] == 2:\n            \"\"\"\n            For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights.\n            After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image.\n            For each referent point, we sample `num_points` sampling points.\n            For `num_Z_anchors` reference points,  it has overall `num_points * num_Z_anchors` sampling points.\n            \"\"\"\n            offset_normalizer = torch.stack(\n                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)\n\n            bs, num_query, num_Z_anchors, xy = reference_points.shape\n            reference_points = reference_points[:, :, None, None, None, :, :]\n            sampling_offsets = sampling_offsets / \\\n                offset_normalizer[None, None, None, :, None, :]\n            bs, num_query, num_heads, num_levels, num_all_points, xy = sampling_offsets.shape\n            sampling_offsets = sampling_offsets.view(\n                bs, num_query, num_heads, num_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy)\n            sampling_locations = reference_points + sampling_offsets\n            bs, num_query, num_heads, num_levels, num_points, num_Z_anchors, xy = sampling_locations.shape\n            assert num_all_points == num_points * num_Z_anchors\n\n            sampling_locations = sampling_locations.view(\n                bs, num_query, num_heads, num_levels, num_all_points, xy)\n\n        elif reference_points.shape[-1] == 4:\n            assert False\n        else:\n            raise ValueError(\n                f'Last dim of reference_points must be'\n                f' 2 or 4, but get {reference_points.shape[-1]} instead.')\n\n        #  sampling_locations.shape: bs, num_query, num_heads, num_levels, num_all_points, 2\n        #  attention_weights.shape: bs, num_query, num_heads, num_levels, num_all_points\n        #\n\n        if torch.cuda.is_available() and value.is_cuda:\n            if value.dtype == torch.float16:\n                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32\n            else:\n                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32\n            output = MultiScaleDeformableAttnFunction.apply(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n        else:\n            output = multi_scale_deformable_attn_pytorch(\n                value, spatial_shapes, sampling_locations, attention_weights)\n        if not self.batch_first:\n            output = output.permute(1, 0, 2)\n\n        return output\n"
  },
  {
    "path": "plugin/models/backbones/bevformer/temporal_net.py",
    "content": "import torch\nimport torch.nn as nn\nfrom typing import Optional, Sequence, Tuple, Union\nfrom mmdet.models import NECKS\nfrom mmcv.cnn.utils import kaiming_init, constant_init\nfrom mmcv.cnn.resnet import conv3x3\nfrom torch import Tensor\n\nfrom einops import rearrange\n\n\nclass MyResBlock(nn.Module):\n    def __init__(self,\n                 inplanes: int,\n                 planes: int,\n                 stride: int = 1,\n                 dilation: int = 1,\n                 style: str = 'pytorch',\n                 with_cp: bool = False):\n        super().__init__()\n        assert style in ['pytorch', 'caffe']\n        self.conv1 = conv3x3(inplanes, planes, stride, dilation)\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = conv3x3(planes, planes)\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.stride = stride\n        self.dilation = dilation\n        assert not with_cp\n\n    def forward(self, x: Tensor) -> Tensor:\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\n@NECKS.register_module()\nclass TemporalNet(nn.Module):\n    def __init__(self, history_steps, hidden_dims, num_blocks):\n        super(TemporalNet, self).__init__()\n        self.history_steps = history_steps\n        self.hidden_dims = hidden_dims\n        self.num_blocks = num_blocks\n        \n        layers = []\n        \n        in_dims = (history_steps+1) * hidden_dims\n        self.conv_in = conv3x3(in_dims, hidden_dims, 1, 1)\n        self.bn = nn.BatchNorm2d(hidden_dims)\n        self.relu = nn.ReLU(inplace=True)        \n\n        for _ in range(self.num_blocks):\n            layers.append(MyResBlock(hidden_dims, hidden_dims))\n        self.res_layer = nn.Sequential(*layers) \n    \n\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                kaiming_init(m)\n    \n\n    def forward(self, history_feats, curr_feat):\n        input_feats = torch.cat([history_feats, curr_feat.unsqueeze(1)], dim=1)\n        input_feats = rearrange(input_feats, 'b t c h w -> b (t c) h w') \n\n        out = self.conv_in(input_feats)\n        out = self.bn(out)\n        out = self.relu(out)\n        out = self.res_layer(out)\n        if curr_feat.dim() == 3:\n            out = out.squeeze(0)\n\n        return out\n\n"
  },
  {
    "path": "plugin/models/backbones/bevformer/temporal_self_attention.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n\nfrom .multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32\nfrom mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch\nimport warnings\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import xavier_init, constant_init\nfrom mmcv.cnn.bricks.registry import ATTENTION\nimport math\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\nfrom mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning,\n                        to_2tuple)\n\nfrom mmcv.utils import ext_loader\next_module = ext_loader.load_ext(\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\n\n\n@ATTENTION.register_module()\nclass TemporalSelfAttention(BaseModule):\n    \"\"\"An attention module used in BEVFormer based on Deformable-Detr.\n\n    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.\n    <https://arxiv.org/pdf/2010.04159.pdf>`_.\n\n    Args:\n        embed_dims (int): The embedding dimension of Attention.\n            Default: 256.\n        num_heads (int): Parallel attention heads. Default: 64.\n        num_levels (int): The number of feature map used in\n            Attention. Default: 4.\n        num_points (int): The number of sampling points for\n            each query in each head. Default: 4.\n        im2col_step (int): The step used in image_to_column.\n            Default: 64.\n        dropout (float): A Dropout layer on `inp_identity`.\n            Default: 0.1.\n        batch_first (bool): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to True.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: None.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n        num_bev_queue (int): In this version, we only use one history BEV and one currenct BEV.\n         the length of BEV queue is 2.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims=256,\n                 num_heads=8,\n                 num_levels=4,\n                 num_points=4,\n                 num_bev_queue=2,\n                 im2col_step=64,\n                 dropout=0.1,\n                 batch_first=True,\n                 norm_cfg=None,\n                 init_cfg=None):\n\n        super().__init__(init_cfg)\n        if embed_dims % num_heads != 0:\n            raise ValueError(f'embed_dims must be divisible by num_heads, '\n                             f'but got {embed_dims} and {num_heads}')\n        dim_per_head = embed_dims // num_heads\n        self.norm_cfg = norm_cfg\n        self.dropout = nn.Dropout(dropout)\n        self.batch_first = batch_first\n        self.fp16_enabled = False\n\n        # you'd better set dim_per_head to a power of 2\n        # which is more efficient in the CUDA implementation\n        def _is_power_of_2(n):\n            if (not isinstance(n, int)) or (n < 0):\n                raise ValueError(\n                    'invalid input for _is_power_of_2: {} (type: {})'.format(\n                        n, type(n)))\n            return (n & (n - 1) == 0) and n != 0\n\n        if not _is_power_of_2(dim_per_head):\n            warnings.warn(\n                \"You'd better set embed_dims in \"\n                'MultiScaleDeformAttention to make '\n                'the dimension of each attention head a power of 2 '\n                'which is more efficient in our CUDA implementation.')\n\n        self.im2col_step = im2col_step\n        self.embed_dims = embed_dims\n        self.num_levels = num_levels\n        self.num_heads = num_heads\n        self.num_points = num_points\n        self.num_bev_queue = num_bev_queue\n        self.sampling_offsets = nn.Linear(\n            embed_dims*self.num_bev_queue, num_bev_queue*num_heads * num_levels * num_points * 2)\n        self.attention_weights = nn.Linear(embed_dims*self.num_bev_queue,\n                                           num_bev_queue*num_heads * num_levels * num_points)\n        self.value_proj = nn.Linear(embed_dims, embed_dims)\n        self.output_proj = nn.Linear(embed_dims, embed_dims)\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\n        constant_init(self.sampling_offsets, 0.)\n        thetas = torch.arange(\n            self.num_heads,\n            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)\n        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\n        grid_init = (grid_init /\n                     grid_init.abs().max(-1, keepdim=True)[0]).view(\n            self.num_heads, 1, 1,\n            2).repeat(1, self.num_levels*self.num_bev_queue, self.num_points, 1)\n\n        for i in range(self.num_points):\n            grid_init[:, :, i, :] *= i + 1\n\n        self.sampling_offsets.bias.data = grid_init.view(-1)\n        constant_init(self.attention_weights, val=0., bias=0.)\n        xavier_init(self.value_proj, distribution='uniform', bias=0.)\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\n        self._is_init = True\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                flag='decoder',\n\n                **kwargs):\n        \"\"\"Forward Function of MultiScaleDeformAttention.\n\n        Args:\n            query (Tensor): Query of Transformer with shape\n                (num_query, bs, embed_dims).\n            key (Tensor): The key tensor with shape\n                `(num_key, bs, embed_dims)`.\n            value (Tensor): The value tensor with shape\n                `(num_key, bs, embed_dims)`.\n            identity (Tensor): The tensor used for addition, with the\n                same shape as `query`. Default None. If None,\n                `query` will be used.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`. Default\n                None.\n            reference_points (Tensor):  The normalized reference\n                points with shape (bs, num_query, num_levels, 2),\n                all elements is range in [0, 1], top-left (0,0),\n                bottom-right (1, 1), including padding area.\n                or (N, Length_{query}, num_levels, 4), add\n                additional two dimensions is (w, h) to\n                form reference boxes.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_key].\n            spatial_shapes (Tensor): Spatial shape of features in\n                different levels. With shape (num_levels, 2),\n                last dimension represents (h, w).\n            level_start_index (Tensor): The start index of each level.\n                A tensor has shape ``(num_levels, )`` and can be represented\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\n\n        Returns:\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if value is None:\n            assert self.batch_first\n            bs, len_bev, c = query.shape\n            value = torch.stack([query, query], 1).reshape(bs*2, len_bev, c)\n\n            # value = torch.cat([query, query], 0)\n\n        if identity is None:\n            identity = query\n        if query_pos is not None:\n            query = query + query_pos\n        if not self.batch_first:\n            # change to (bs, num_query ,embed_dims)\n            query = query.permute(1, 0, 2)\n            value = value.permute(1, 0, 2)\n        bs,  num_query, embed_dims = query.shape\n        _, num_value, _ = value.shape\n        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value\n        assert self.num_bev_queue == 2\n\n        query = torch.cat([value[:bs], query], -1)\n        value = self.value_proj(value)\n\n        if key_padding_mask is not None:\n            value = value.masked_fill(key_padding_mask[..., None], 0.0)\n\n        value = value.reshape(bs*self.num_bev_queue,\n                              num_value, self.num_heads, -1)\n\n        sampling_offsets = self.sampling_offsets(query)\n        sampling_offsets = sampling_offsets.view(\n            bs, num_query, self.num_heads,  self.num_bev_queue, self.num_levels, self.num_points, 2)\n        attention_weights = self.attention_weights(query).view(\n            bs, num_query,  self.num_heads, self.num_bev_queue, self.num_levels * self.num_points)\n        attention_weights = attention_weights.softmax(-1)\n\n        attention_weights = attention_weights.view(bs, num_query,\n                                                   self.num_heads,\n                                                   self.num_bev_queue,\n                                                   self.num_levels,\n                                                   self.num_points)\n\n        attention_weights = attention_weights.permute(0, 3, 1, 2, 4, 5)\\\n            .reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points).contiguous()\n        sampling_offsets = sampling_offsets.permute(0, 3, 1, 2, 4, 5, 6)\\\n            .reshape(bs*self.num_bev_queue, num_query, self.num_heads, self.num_levels, self.num_points, 2)\n\n        if reference_points.shape[-1] == 2:\n            offset_normalizer = torch.stack(\n                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)\n            sampling_locations = reference_points[:, :, None, :, None, :] \\\n                + sampling_offsets \\\n                / offset_normalizer[None, None, None, :, None, :]\n\n        elif reference_points.shape[-1] == 4:\n            sampling_locations = reference_points[:, :, None, :, None, :2] \\\n                + sampling_offsets / self.num_points \\\n                * reference_points[:, :, None, :, None, 2:] \\\n                * 0.5\n        else:\n            raise ValueError(\n                f'Last dim of reference_points must be'\n                f' 2 or 4, but get {reference_points.shape[-1]} instead.')\n        if torch.cuda.is_available() and value.is_cuda:\n\n            # using fp16 deformable attention is unstable because it performs many sum operations\n            if value.dtype == torch.float16:\n                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32\n            else:\n                MultiScaleDeformableAttnFunction = MultiScaleDeformableAttnFunction_fp32\n            output = MultiScaleDeformableAttnFunction.apply(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n        else:\n\n            output = multi_scale_deformable_attn_pytorch(\n                value, spatial_shapes, sampling_locations, attention_weights)\n\n        # output shape (bs*num_bev_queue, num_query, embed_dims)\n        # (bs*num_bev_queue, num_query, embed_dims)-> (num_query, embed_dims, bs*num_bev_queue)\n        output = output.permute(1, 2, 0)\n\n        # fuse history value and current value\n        # (num_query, embed_dims, bs*num_bev_queue)-> (num_query, embed_dims, bs, num_bev_queue)\n        output = output.view(num_query, embed_dims, bs, self.num_bev_queue)\n        output = output.mean(-1)\n\n        # (num_query, embed_dims, bs)-> (bs, num_query, embed_dims)\n        output = output.permute(2, 0, 1)\n\n        output = self.output_proj(output)\n\n        if not self.batch_first:\n            output = output.permute(1, 0, 2)\n\n        return self.dropout(output) + identity\n"
  },
  {
    "path": "plugin/models/backbones/bevformer/transformer.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import xavier_init\nfrom mmcv.cnn.bricks.transformer import build_transformer_layer_sequence\nfrom mmcv.runner.base_module import BaseModule\n\nfrom mmdet.models.utils.builder import TRANSFORMER\nfrom torch.nn.init import normal_\nfrom mmcv.runner.base_module import BaseModule\nfrom torchvision.transforms.functional import rotate\nfrom .temporal_self_attention import TemporalSelfAttention\nfrom .spatial_cross_attention import MSDeformableAttention3D\nfrom mmcv.runner import force_fp32, auto_fp16\n\nfrom einops import rearrange\n\n\n@TRANSFORMER.register_module()\nclass PerceptionTransformer(BaseModule):\n    \"\"\"Implements the Detr3D transformer.\n    Args:\n        as_two_stage (bool): Generate query from encoder features.\n            Default: False.\n        num_feature_levels (int): Number of feature maps from FPN:\n            Default: 4.\n        two_stage_num_proposals (int): Number of proposals when set\n            `as_two_stage` as True. Default: 300.\n    \"\"\"\n\n    def __init__(self,\n                 num_feature_levels=4,\n                 num_cams=6,\n                 encoder=None,\n                 embed_dims=256,\n                 use_cams_embeds=True,\n                 **kwargs):\n        super().__init__(**kwargs)\n        self.encoder = build_transformer_layer_sequence(encoder)\n        # self.decoder = build_transformer_layer_sequence(decoder)\n        self.embed_dims = embed_dims\n        self.num_feature_levels = num_feature_levels\n        self.num_cams = num_cams\n        self.fp16_enabled = False\n\n        self.use_cams_embeds = use_cams_embeds\n\n        self.init_layers()\n        \n    def init_layers(self):\n        \"\"\"Initialize layers of the Detr3DTransformer.\"\"\"\n        self.level_embeds = nn.Parameter(torch.Tensor(\n            self.num_feature_levels, self.embed_dims))\n        self.cams_embeds = nn.Parameter(\n            torch.Tensor(self.num_cams, self.embed_dims))\n        # self.reference_points = nn.Linear(self.embed_dims, 3)\n\n    def init_weights(self):\n        \"\"\"Initialize the transformer weights.\"\"\"\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        for m in self.modules():\n            if isinstance(m, MSDeformableAttention3D) or isinstance(m, TemporalSelfAttention):\n                try:\n                    m.init_weight()\n                except AttributeError:\n                    m.init_weights()\n        normal_(self.level_embeds)\n        normal_(self.cams_embeds)\n        # xavier_init(self.reference_points, distribution='uniform', bias=0.)\n\n    # @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'prev_bev', 'bev_pos'))\n    def get_bev_features(\n            self,\n            mlvl_feats,\n            bev_queries,\n            bev_h,\n            bev_w,\n            bev_pos=None,\n            prop_bev=None,\n            prev_bev=None,\n            warped_history_bev=None,\n            **kwargs):\n        \"\"\"\n        obtain bev features.\n        \"\"\"\n\n        bs = mlvl_feats[0].size(0)\n        bev_queries = bev_queries.unsqueeze(1).repeat(1, bs, 1)\n        bev_pos = bev_pos.flatten(2).permute(2, 0, 1)\n\n        shift = bev_queries.new_tensor((0,0))[None].repeat(bs,1)\n\n        feat_flatten = []\n        spatial_shapes = []\n\n        for lvl, feat in enumerate(mlvl_feats):\n            bs, num_cam, c, h, w = feat.shape\n            spatial_shape = (h, w)\n            feat = feat.flatten(3).permute(1, 0, 3, 2)\n            if self.use_cams_embeds:\n                feat = feat + self.cams_embeds[:, None, None, :].to(feat.dtype)\n            feat = feat + self.level_embeds[None,\n                                            None, lvl:lvl + 1, :].to(feat.dtype)\n            spatial_shapes.append(spatial_shape)\n            feat_flatten.append(feat)\n\n        feat_flatten = torch.cat(feat_flatten, 2)\n        \n        spatial_shapes = torch.as_tensor(\n            spatial_shapes, dtype=torch.long, device=bev_pos.device)\n        level_start_index = torch.cat((spatial_shapes.new_zeros(\n            (1,)), spatial_shapes.prod(1).cumsum(0)[:-1]))\n\n        feat_flatten = feat_flatten.permute(\n            0, 2, 1, 3)  # (num_cam, H*W, bs, embed_dims)\n        \n        # Fuse the propagated bev features from the prev step\n        if prop_bev is not None:\n            prop_bev = rearrange(prop_bev, 'b c h w -> (h w) b c')\n            valid_mask = (prop_bev.sum(-1) > 0).to(bev_queries.dtype)[..., None]\n            bev_queries = bev_queries * (1 - valid_mask) + prop_bev * valid_mask \n        \n        bev_embed = self.encoder(\n            bev_queries,\n            feat_flatten,\n            feat_flatten,\n            bev_h=bev_h,\n            bev_w=bev_w,\n            bev_pos=bev_pos,\n            spatial_shapes=spatial_shapes,\n            level_start_index=level_start_index,\n            prev_bev=prev_bev,\n            shift=shift,\n            warped_history_bev=warped_history_bev,\n            **kwargs\n        )\n\n        return bev_embed\n\n    @auto_fp16(apply_to=('mlvl_feats', 'bev_queries', 'object_query_embed', 'prev_bev', 'bev_pos'))\n    def forward(self,\n                mlvl_feats,\n                bev_queries,\n                object_query_embed,\n                bev_h,\n                bev_w,\n                grid_length=[0.512, 0.512],\n                bev_pos=None,\n                reg_branches=None,\n                cls_branches=None,\n                prev_bev=None,\n                **kwargs):\n        \"\"\"Forward function for `Detr3DTransformer`.\n        Args:\n            mlvl_feats (list(Tensor)): Input queries from\n                different level. Each element has shape\n                [bs, num_cams, embed_dims, h, w].\n            bev_queries (Tensor): (bev_h*bev_w, c)\n            bev_pos (Tensor): (bs, embed_dims, bev_h, bev_w)\n            object_query_embed (Tensor): The query embedding for decoder,\n                with shape [num_query, c].\n            reg_branches (obj:`nn.ModuleList`): Regression heads for\n                feature maps from each decoder layer. Only would\n                be passed when `with_box_refine` is True. Default to None.\n        Returns:\n            tuple[Tensor]: results of decoder containing the following tensor.\n                - bev_embed: BEV features\n                - inter_states: Outputs from decoder. If\n                    return_intermediate_dec is True output has shape \\\n                      (num_dec_layers, bs, num_query, embed_dims), else has \\\n                      shape (1, bs, num_query, embed_dims).\n                - init_reference_out: The initial value of reference \\\n                    points, has shape (bs, num_queries, 4).\n                - inter_references_out: The internal value of reference \\\n                    points in decoder, has shape \\\n                    (num_dec_layers, bs,num_query, embed_dims)\n                - enc_outputs_class: The classification score of \\\n                    proposals generated from \\\n                    encoder's feature maps, has shape \\\n                    (batch, h*w, num_classes). \\\n                    Only would be returned when `as_two_stage` is True, \\\n                    otherwise None.\n                - enc_outputs_coord_unact: The regression results \\\n                    generated from encoder's feature maps., has shape \\\n                    (batch, h*w, 4). Only would \\\n                    be returned when `as_two_stage` is True, \\\n                    otherwise None.\n        \"\"\"\n\n        raise NotImplementedError"
  },
  {
    "path": "plugin/models/backbones/bevformer_backbone.py",
    "content": "import copy\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmdet.models import BACKBONES\nfrom mmcv.runner import force_fp32, auto_fp16\nfrom mmdet.models.utils import build_transformer\nfrom mmcv.cnn.bricks.transformer import FFN, build_positional_encoding\nfrom .bevformer.grid_mask import GridMask\nfrom mmdet3d.models import builder\nfrom contextlib import nullcontext\n\n\nclass UpsampleBlock(nn.Module):\n    def __init__(self, ins, outs):\n        super(UpsampleBlock, self).__init__()\n        self.gn = nn.GroupNorm(32, outs)\n        self.conv = nn.Conv2d(ins, outs, kernel_size=3,\n                              stride=1, padding=1)  # same\n        self.relu = nn.ReLU(inplace=True)\n    \n    def init_weights(self):\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n\n    def forward(self, x):\n\n        x = self.conv(x)\n        x = self.relu(self.gn(x))\n        x = self.upsample2x(x)\n\n        return x\n\n    def upsample2x(self, x):\n        _, _, h, w = x.shape\n        x = F.interpolate(x, size=(h*2, w*2),\n                          mode='bilinear', align_corners=True)\n        return x\n\n@BACKBONES.register_module()\nclass BEVFormerBackbone(nn.Module):\n    \"\"\"Head of Detr3D.\n    Args:\n        with_box_refine (bool): Whether to refine the reference points\n            in the decoder. Defaults to False.\n        as_two_stage (bool) : Whether to generate the proposal from\n            the outputs of encoder.\n        transformer (obj:`ConfigDict`): ConfigDict is used for building\n            the Encoder and Decoder.\n        bev_h, bev_w (int): spatial shape of BEV queries.\n    \"\"\"\n\n    def __init__(self,\n                 roi_size,\n                 bev_h,\n                 bev_w,\n                 img_backbone=None, \n                 img_neck=None,               \n                 transformer=None,\n                 positional_encoding=None,\n                 use_grid_mask=True,\n                 upsample=False,\n                 up_outdim=128,\n                 history_steps=None,\n                 **kwargs):\n        super(BEVFormerBackbone, self).__init__()\n\n        # image feature\n        self.default_ratio = 0.5\n        self.default_prob = 0.7\n        self.grid_mask = GridMask(\n            True, True, rotate=1, offset=False, ratio=self.default_ratio, mode=1, \n                prob=self.default_prob)\n        self.use_grid_mask = use_grid_mask\n\n        if img_backbone:\n            self.img_backbone = builder.build_backbone(img_backbone)\n        if img_neck is not None:\n            self.img_neck = builder.build_neck(img_neck)\n            self.with_img_neck = True\n        else:\n            self.with_img_neck = False\n\n        self.bev_h = bev_h\n        self.bev_w = bev_w\n\n        self.real_w = roi_size[0]\n        self.real_h = roi_size[1]\n    \n        self.positional_encoding = build_positional_encoding(\n            positional_encoding)\n        self.transformer = build_transformer(transformer)\n        self.embed_dims = self.transformer.embed_dims\n        \n        self.upsample = upsample\n        if self.upsample:\n            self.up = UpsampleBlock(self.transformer.embed_dims, up_outdim)\n        \n        self.history_steps = history_steps\n\n        self._init_layers()\n        self.init_weights()\n\n\n    def _init_layers(self):\n        \"\"\"Initialize classification branch and regression branch of head.\"\"\"\n        self.bev_embedding = nn.Embedding(\n            self.bev_h * self.bev_w, self.embed_dims)\n\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the DeformDETR head.\"\"\"\n        self.transformer.init_weights()\n        self.img_backbone.init_weights()\n        self.img_neck.init_weights()\n       \n        if self.upsample:\n            self.up.init_weights()\n    \n    # @auto_fp16(apply_to=('img'))\n    def extract_img_feat(self, img, img_metas, len_queue=None):\n        \"\"\"Extract features of images.\"\"\"\n        B = img.size(0)\n        if img is not None:\n            \n            # input_shape = img.shape[-2:]\n            # # update real input shape of each single img\n            # for img_meta in img_metas:\n            #     img_meta.update(input_shape=input_shape)\n\n            if img.dim() == 5 and img.size(0) == 1:\n                img = img.squeeze(0)\n            elif img.dim() == 5 and img.size(0) > 1:\n                B, N, C, H, W = img.size()\n                img = img.reshape(B * N, C, H, W)\n            if self.use_grid_mask:\n                img = self.grid_mask(img)\n\n            img_feats = self.img_backbone(img)\n            if isinstance(img_feats, dict):\n                img_feats = list(img_feats.values())\n        else:\n            return None\n        if self.with_img_neck:\n            img_feats = self.img_neck(img_feats)\n\n        img_feats_reshaped = []\n        for img_feat in img_feats:\n            BN, C, H, W = img_feat.size()\n            if len_queue is not None:\n                img_feats_reshaped.append(img_feat.view(int(B/len_queue), len_queue, int(BN / B), C, H, W))\n            else:\n                img_feats_reshaped.append(img_feat.view(B, int(BN / B), C, H, W))\n        \n        return img_feats_reshaped\n\n    def forward(self, img, img_metas, timestep, history_bev_feats, history_img_metas, all_history_coord, *args, prev_bev=None, \n                img_backbone_gradient=True, **kwargs):\n        \"\"\"Forward function.\n        Args:\n            mlvl_feats (tuple[Tensor]): Features from the upstream\n                network, each is a 5D-tensor with shape\n                (B, N, C, H, W).\n            prev_bev: previous bev featues\n        Returns:\n            all_cls_scores (Tensor): Outputs from the classification head, \\\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note \\\n                cls_out_channels should includes background.\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression \\\n                head with normalized coordinate format (cx, cy, w, l, cz, h, theta, vx, vy). \\\n                Shape [nb_dec, bs, num_query, 9].\n        \"\"\"\n        # Optionally turn off the gradient backprop for the 2D image backbones\n        # but always keep the gradients on for the BEV transformer part\n        backprop_context = torch.no_grad if img_backbone_gradient is False else nullcontext\n        with backprop_context():\n            mlvl_feats = self.extract_img_feat(img=img, img_metas=img_metas)\n\n        bs, num_cam, _, _, _ = mlvl_feats[0].shape\n        dtype = mlvl_feats[0].dtype\n        bev_queries = self.bev_embedding.weight.to(dtype)\n\n        # Prepare the transformed history bev features, add the bev prop fusion here\n        if len(history_bev_feats) > 0:\n            all_warped_history_feat = []\n            for b_i in range(bs):\n                history_coord = all_history_coord[b_i]\n                history_bev_feats_i = torch.stack([feats[b_i] for feats in history_bev_feats], 0)\n                warped_history_feat_i = F.grid_sample(history_bev_feats_i, \n                            history_coord, padding_mode='zeros', align_corners=False)\n                all_warped_history_feat.append(warped_history_feat_i)\n            all_warped_history_feat = torch.stack(all_warped_history_feat, dim=0) # BTCHW\n            prop_bev_feat = all_warped_history_feat[:, -1]\n        else:\n            all_warped_history_feat = None\n            prop_bev_feat = None\n\n        # pad the bev history buffer to fixed length\n        if len(history_bev_feats) < self.history_steps:\n            num_repeat = self.history_steps - len(history_bev_feats)\n            zero_bev_feats = torch.zeros([bs, bev_queries.shape[1], self.bev_h, self.bev_w]).to(bev_queries.device)\n            padding_history_bev_feats = torch.stack([zero_bev_feats,] * num_repeat, dim=1)\n            if all_warped_history_feat is not None:\n                all_warped_history_feat = torch.cat([padding_history_bev_feats, all_warped_history_feat], dim=1)\n            else:\n                all_warped_history_feat = padding_history_bev_feats\n        \n        bev_mask = torch.zeros((bs, self.bev_h, self.bev_w), device=bev_queries.device).to(dtype)\n        bev_pos = self.positional_encoding(bev_mask).to(dtype)\n\n        outs =  self.transformer.get_bev_features(\n                mlvl_feats,\n                bev_queries,\n                self.bev_h,\n                self.bev_w,\n                grid_length=(self.real_h / self.bev_h,\n                            self.real_w / self.bev_w),\n                bev_pos=bev_pos,\n                prop_bev=prop_bev_feat,\n                img_metas=img_metas,\n                prev_bev=prev_bev,\n                warped_history_bev=all_warped_history_feat,\n            )\n        \n        outs = outs.unflatten(1,(self.bev_h,self.bev_w)).permute(0,3,1,2).contiguous()\n        \n        if self.upsample:\n            outs = self.up(outs)\n        \n        return outs, mlvl_feats\n"
  },
  {
    "path": "plugin/models/heads/MapDetectorHead.py",
    "content": "import copy\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import Conv2d, Linear, build_activation_layer, bias_init_with_prob, xavier_init\nfrom mmcv.runner import force_fp32\nfrom mmcv.cnn.bricks.transformer import build_positional_encoding\nfrom mmdet.models.utils import build_transformer\nfrom mmdet.models import build_loss\n\nfrom mmdet.core import multi_apply, reduce_mean, build_assigner, build_sampler\nfrom mmdet.models import HEADS\nfrom mmdet.models.utils.transformer import inverse_sigmoid\n\nfrom einops import rearrange\n\n@HEADS.register_module(force=True)\nclass MapDetectorHead(nn.Module):\n\n    def __init__(self, \n                 num_queries,\n                 num_classes=3,\n                 in_channels=128,\n                 embed_dims=256,\n                 score_thr=0.1,\n                 num_points=20,\n                 coord_dim=2,\n                 roi_size=(60, 30),\n                 different_heads=True,\n                 predict_refine=False,\n                 bev_pos=None,\n                 sync_cls_avg_factor=True,\n                 bg_cls_weight=0.,\n                 trans_loss_weight=0.0,\n                 transformer=dict(),\n                 loss_cls=dict(),\n                 loss_reg=dict(),\n                 assigner=dict()\n                ):\n        super().__init__()\n        self.num_queries = num_queries\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.embed_dims = embed_dims\n        self.different_heads = different_heads\n        self.predict_refine = predict_refine\n        self.bev_pos = bev_pos\n        self.num_points = num_points\n        self.coord_dim = coord_dim\n        \n        self.sync_cls_avg_factor = sync_cls_avg_factor\n        self.bg_cls_weight = bg_cls_weight\n        \n        self.trans_loss_weight = trans_loss_weight\n        # NOTE: below is a simple MLP to transform the query from prev-frame to cur-frame,\n        # we moved the propagation part outside,\n            \n        self.register_buffer('roi_size', torch.tensor(roi_size, dtype=torch.float32))\n        origin = (-roi_size[0]/2, -roi_size[1]/2)\n        self.register_buffer('origin', torch.tensor(origin, dtype=torch.float32))\n\n        sampler_cfg = dict(type='PseudoSampler')\n        self.sampler = build_sampler(sampler_cfg, context=self)\n\n        self.transformer = build_transformer(transformer)\n\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_reg = build_loss(loss_reg)\n        self.assigner = build_assigner(assigner)\n\n        if self.loss_cls.use_sigmoid:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n        \n        self._init_embedding()\n        self._init_branch()\n        self.init_weights()\n\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the DeformDETR head.\"\"\"\n\n        for p in self.input_proj.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        \n        xavier_init(self.reference_points_embed, distribution='uniform', bias=0.)\n\n        self.transformer.init_weights()\n\n        # init prediction branch\n        for m in self.reg_branches:\n            for param in m.parameters():\n                if param.dim() > 1:\n                    nn.init.xavier_uniform_(param)\n\n        # focal loss init\n        if self.loss_cls.use_sigmoid:\n            bias_init = bias_init_with_prob(0.01)\n            if isinstance(self.cls_branches, nn.ModuleList):\n                for m in self.cls_branches:\n                    if hasattr(m, 'bias'):\n                        nn.init.constant_(m.bias, bias_init)\n            else:\n                m = self.cls_branches\n                nn.init.constant_(m.bias, bias_init)\n        \n        if hasattr(self, 'query_alpha'):\n            for m in self.query_alpha:\n                for param in m.parameters():\n                    if param.dim() > 1:\n                        nn.init.zeros_(param)\n\n    def _init_embedding(self):\n        positional_encoding = dict(\n            type='SinePositionalEncoding',\n            num_feats=self.embed_dims//2,\n            normalize=True\n        )\n        self.bev_pos_embed = build_positional_encoding(positional_encoding)\n\n        # query_pos_embed & query_embed\n        self.query_embedding = nn.Embedding(self.num_queries,\n                                            self.embed_dims)\n\n        self.reference_points_embed = nn.Linear(self.embed_dims, self.num_points * 2)\n        \n    def _init_branch(self,):\n        \"\"\"Initialize classification branch and regression branch of head.\"\"\"\n        self.input_proj = Conv2d(\n            self.in_channels, self.embed_dims, kernel_size=1)\n\n        cls_branch = Linear(self.embed_dims, self.cls_out_channels)\n\n        reg_branch = [\n            Linear(self.embed_dims, 2*self.embed_dims),\n            nn.LayerNorm(2*self.embed_dims),\n            nn.ReLU(),\n            Linear(2*self.embed_dims, 2*self.embed_dims),\n            nn.LayerNorm(2*self.embed_dims),\n            nn.ReLU(),\n            Linear(2*self.embed_dims, self.num_points * self.coord_dim),\n        ]\n        reg_branch = nn.Sequential(*reg_branch)\n\n        num_layers = self.transformer.decoder.num_layers\n        if self.different_heads:\n            cls_branches = nn.ModuleList(\n                [copy.deepcopy(cls_branch) for _ in range(num_layers)])\n            reg_branches = nn.ModuleList(\n                [copy.deepcopy(reg_branch) for _ in range(num_layers)])\n        else:\n            cls_branches = nn.ModuleList(\n                [cls_branch for _ in range(num_layers)])\n            reg_branches = nn.ModuleList(\n                [reg_branch for _ in range(num_layers)])\n\n        self.reg_branches = reg_branches\n        self.cls_branches = cls_branches\n\n    def _prepare_context(self, bev_features):\n        \"\"\"Prepare class label and vertex context.\"\"\"\n        device = bev_features.device\n\n        # Add 2D coordinate grid embedding\n        B, C, H, W = bev_features.shape\n        bev_mask = bev_features.new_zeros(B, H, W)\n        bev_pos_embeddings = self.bev_pos_embed(bev_mask) # (bs, embed_dims, H, W)\n        bev_features = self.input_proj(bev_features) + bev_pos_embeddings # (bs, embed_dims, H, W)\n    \n        assert list(bev_features.shape) == [B, self.embed_dims, H, W]\n        return bev_features\n\n    def forward_train(self, bev_features, img_metas, gts, track_query_info=None, memory_bank=None, return_matching=False):\n        '''\n        Args:\n            bev_feature (List[Tensor]): shape [B, C, H, W]\n                feature in bev view\n        Outs:\n            preds_dict (list[dict]):\n                lines (Tensor): Classification score of all\n                    decoder layers, has shape\n                    [bs, num_query, 2*num_points]\n                scores (Tensor):\n                    [bs, num_query,]\n        '''\n\n        bev_features = self._prepare_context(bev_features)\n\n        bs, C, H, W = bev_features.shape\n        img_masks = bev_features.new_zeros((bs, H, W))\n        # pos_embed = self.positional_encoding(img_masks)\n        pos_embed = None\n\n        query_embedding = self.query_embedding.weight[None, ...].repeat(bs, 1, 1) # [B, num_q, embed_dims]\n        input_query_num = self.num_queries\n\n        init_reference_points = self.reference_points_embed(query_embedding).sigmoid() # (bs, num_q, 2*num_pts)\n        init_reference_points = init_reference_points.view(-1, self.num_queries, self.num_points, 2) # (bs, num_q, num_pts, 2)\n        \n        assert list(init_reference_points.shape) == [bs, self.num_queries, self.num_points, 2]\n        assert list(query_embedding.shape) == [bs, self.num_queries, self.embed_dims]\n\n        # Prepare the propagated track queries, concat with the original dummy queries\n        if track_query_info is not None and 'track_query_hs_embeds' in track_query_info[0]:\n            new_query_embeds = []\n            new_init_ref_pts = []\n            for b_i in range(bs):\n                new_queries = torch.cat([track_query_info[b_i]['track_query_hs_embeds'], query_embedding[b_i], \n                           track_query_info[b_i]['pad_hs_embeds']], dim=0)\n                new_query_embeds.append(new_queries)\n                init_ref = rearrange(init_reference_points[b_i], 'n k c -> n (k c)', c=2)\n                new_ref = torch.cat([track_query_info[b_i]['trans_track_query_boxes'], init_ref, \n                           track_query_info[b_i]['pad_query_boxes']], dim=0)\n                new_ref = rearrange(new_ref, 'n (k c) -> n k c', c=2)\n                new_init_ref_pts.append(new_ref)\n                #print('length of track queries', track_query_info[b_i]['track_query_hs_embeds'].shape[0])\n\n\n            # concat to get the track+dummy queries\n            query_embedding = torch.stack(new_query_embeds, dim=0)\n            init_reference_points = torch.stack(new_init_ref_pts, dim=0)\n            query_kp_mask = torch.stack([t['query_padding_mask'] for t in track_query_info], dim=0)\n        else:\n            query_kp_mask = query_embedding.new_zeros((bs, self.num_queries), dtype=torch.bool)\n        \n        # outs_dec: (num_layers, num_qs, bs, embed_dims)\n        inter_queries, init_reference, inter_references = self.transformer(\n            mlvl_feats=[bev_features,],\n            mlvl_masks=[img_masks.type(torch.bool)],\n            query_embed=query_embedding,\n            mlvl_pos_embeds=[pos_embed], # not used\n            memory_query=None,\n            init_reference_points=init_reference_points,\n            reg_branches=self.reg_branches,\n            cls_branches=self.cls_branches,\n            predict_refine=self.predict_refine,\n            query_key_padding_mask=query_kp_mask, # mask used in self-attn,\n            memory_bank=memory_bank,\n        )\n\n        outputs = []\n        for i, (queries) in enumerate(inter_queries):\n            reg_points = inter_references[i] # (bs, num_q, num_points, 2)\n            bs = reg_points.shape[0]\n            reg_points = reg_points.view(bs, -1, 2*self.num_points) # (bs, num_q, 2*num_points)\n\n            scores = self.cls_branches[i](queries) # (bs, num_q, num_classes)\n\n            reg_points_list = []\n            scores_list = []\n            for i in range(len(scores)):\n                # padding queries should not be output\n                reg_points_list.append(reg_points[i])\n                scores_list.append(scores[i])\n\n            pred_dict = {\n                'lines': reg_points_list,\n                'scores': scores_list\n            }\n            if return_matching:\n                pred_dict['hs_embeds'] = queries\n            outputs.append(pred_dict)\n\n        # Pass in the track query information to massage the cost matrix\n        loss_dict, det_match_idxs, det_match_gt_idxs, gt_info_list, matched_reg_cost = \\\n                self.loss(gts=gts, preds=outputs, track_info=track_query_info)\n\n        if return_matching:\n            return loss_dict, outputs[-1], det_match_idxs[-1], det_match_gt_idxs[-1], matched_reg_cost[-1], gt_info_list[-1]\n        else:\n            return outputs, loss_dict, det_match_idxs, det_match_gt_idxs, gt_info_list\n    \n    def forward_test(self, bev_features, img_metas, track_query_info=None, memory_bank=None):\n        '''\n        Args:\n            bev_feature (List[Tensor]): shape [B, C, H, W]\n                feature in bev view\n        Outs:\n            preds_dict (list[dict]):\n                lines (Tensor): Classification score of all\n                    decoder layers, has shape\n                    [bs, num_query, 2*num_points]\n                scores (Tensor):\n                    [bs, num_query,]\n        '''\n\n        bev_features = self._prepare_context(bev_features)\n\n        bs, C, H, W = bev_features.shape\n        assert bs == 1, 'Only support bs=1 per-gpu for inference'\n        \n        img_masks = bev_features.new_zeros((bs, H, W))\n        # pos_embed = self.positional_encoding(img_masks)\n        pos_embed = None\n\n        query_embedding = self.query_embedding.weight[None, ...].repeat(bs, 1, 1) # [B, num_q, embed_dims]\n        input_query_num = self.num_queries\n        # num query: self.num_query + self.topk\n        \n        init_reference_points = self.reference_points_embed(query_embedding).sigmoid() # (bs, num_q, 2*num_pts)\n        init_reference_points = init_reference_points.view(-1, self.num_queries, self.num_points, 2) # (bs, num_q, num_pts, 2)\n        \n        assert list(init_reference_points.shape) == [bs, input_query_num, self.num_points, 2]\n        assert list(query_embedding.shape) == [bs, input_query_num, self.embed_dims]\n\n        # Prepare the propagated track queries, concat with the original dummy queries\n        if track_query_info is not None and 'track_query_hs_embeds' in track_query_info[0]:\n            prev_hs_embed = torch.stack([t['track_query_hs_embeds'] for t in track_query_info])\n            prev_boxes = torch.stack([t['trans_track_query_boxes'] for t in track_query_info])\n            prev_boxes = rearrange(prev_boxes, 'b n (k c) -> b n k c', c=2)\n\n            # concat to get the track+dummy queries\n            query_embedding = torch.cat([prev_hs_embed, query_embedding], dim=1)\n            init_reference_points = torch.cat([prev_boxes, init_reference_points], dim=1)\n            \n        query_kp_mask = query_embedding.new_zeros((bs, query_embedding.shape[1]), dtype=torch.bool)\n\n        # outs_dec: (num_layers, num_qs, bs, embed_dims)\n        inter_queries, init_reference, inter_references = self.transformer(\n            mlvl_feats=[bev_features,],\n            mlvl_masks=[img_masks.type(torch.bool)],\n            query_embed=query_embedding,\n            mlvl_pos_embeds=[pos_embed], # not used\n            memory_query=None,\n            init_reference_points=init_reference_points,\n            reg_branches=self.reg_branches,\n            cls_branches=self.cls_branches,\n            predict_refine=self.predict_refine,\n            query_key_padding_mask=query_kp_mask, # mask used in self-attn,\n            memory_bank=memory_bank,\n        )\n\n        outputs = []\n        for i_query, (queries) in enumerate(inter_queries):\n            reg_points = inter_references[i_query] # (bs, num_q, num_points, 2)\n            bs = reg_points.shape[0]\n            reg_points = reg_points.view(bs, -1, 2*self.num_points) # (bs, num_q, 2*num_points)\n            scores = self.cls_branches[i_query](queries) # (bs, num_q, num_classes)\n\n            reg_points_list = []\n            scores_list = []\n            for i in range(len(scores)):\n                # padding queries should not be output\n                reg_points_list.append(reg_points[i])\n                scores_list.append(scores[i])\n\n            pred_dict = {\n                'lines': reg_points_list,\n                'scores': scores_list,\n                'hs_embeds': queries,\n            }\n            outputs.append(pred_dict)\n\n        return outputs\n\n    @force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))\n    def _get_target_single(self,\n                           score_pred,\n                           lines_pred,\n                           gt_labels,\n                           gt_lines,\n                           track_info=None,\n                           gt_bboxes_ignore=None):\n        \"\"\"\n            Compute regression and classification targets for one image.\n            Outputs from a single decoder layer of a single feature level are used.\n            Args:\n                score_pred (Tensor): Box score logits from a single decoder layer\n                    for one image. Shape [num_query, cls_out_channels].\n                lines_pred (Tensor):\n                    shape [num_query, 2*num_points]\n                gt_labels (torch.LongTensor)\n                    shape [num_gt, ]\n                gt_lines (Tensor):\n                    shape [num_gt, 2*num_points].\n                \n            Returns:\n                tuple[Tensor]: a tuple containing the following for one sample.\n                    - labels (LongTensor): Labels of each image.\n                        shape [num_query, 1]\n                    - label_weights (Tensor]): Label weights of each image.\n                        shape [num_query, 1]\n                    - lines_target (Tensor): Lines targets of each image.\n                        shape [num_query, num_points, 2]\n                    - lines_weights (Tensor): Lines weights of each image.\n                        shape [num_query, num_points, 2]\n                    - pos_inds (Tensor): Sampled positive indices for each image.\n                    - neg_inds (Tensor): Sampled negative indices for each image.\n        \"\"\"\n        num_pred_lines = len(lines_pred)\n        # assigner and sampler\n        \n        # We massage the matching cost here using the track info, following\n        # the 3-type supervision of TrackFormer/MOTR\n        assign_result, gt_permute_idx, matched_reg_cost = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),\n                                             gts=dict(lines=gt_lines,\n                                                      labels=gt_labels, ),\n                                             track_info=track_info,\n                                             gt_bboxes_ignore=gt_bboxes_ignore)\n        sampling_result = self.sampler.sample(\n            assign_result, lines_pred, gt_lines)\n        num_gt = len(gt_lines)\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        pos_gt_inds = sampling_result.pos_assigned_gt_inds\n\n        labels = gt_lines.new_full(\n                (num_pred_lines, ), self.num_classes, dtype=torch.long) # (num_q, )\n        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]\n        label_weights = gt_lines.new_ones(num_pred_lines) # (num_q, )\n\n        lines_target = torch.zeros_like(lines_pred) # (num_q, 2*num_pts)\n        lines_weights = torch.zeros_like(lines_pred) # (num_q, 2*num_pts)\n        \n        if num_gt > 0:\n            if gt_permute_idx is not None: # using permute invariant label\n                # gt_permute_idx: (num_q, num_gt)\n                # pos_inds: which query is positive\n                # pos_gt_inds: which gt each pos pred is assigned\n                # single_matched_gt_permute_idx: which permute order is matched\n                single_matched_gt_permute_idx = gt_permute_idx[\n                    pos_inds, pos_gt_inds\n                ]\n                lines_target[pos_inds] = gt_lines[pos_gt_inds, single_matched_gt_permute_idx].type(\n                    lines_target.dtype) # (num_q, 2*num_pts)\n            else:\n                lines_target[pos_inds] = sampling_result.pos_gt_bboxes.type(\n                    lines_target.dtype) # (num_q, 2*num_pts)\n        \n        lines_weights[pos_inds] = 1.0 # (num_q, 2*num_pts)\n\n        # normalization\n        # n = lines_weights.sum(-1, keepdim=True) # (num_q, 1)\n        # lines_weights = lines_weights / n.masked_fill(n == 0, 1) # (num_q, 2*num_pts)\n        # [0, ..., 0] for neg ind and [1/npts, ..., 1/npts] for pos ind\n\n        return (labels, label_weights, lines_target, lines_weights,\n                pos_inds, neg_inds, pos_gt_inds, matched_reg_cost)\n\n    # @force_fp32(apply_to=('preds', 'gts'))\n    def get_targets(self, preds, gts, track_info=None, gt_bboxes_ignore_list=None):\n        \"\"\"\n            Compute regression and classification targets for a batch image.\n            Outputs from a single decoder layer of a single feature level are used.\n            Args:\n                preds (dict): \n                    - lines (Tensor): shape (bs, num_queries, 2*num_points)\n                    - scores (Tensor): shape (bs, num_queries, num_class_channels)\n                gts (dict):\n                    - class_label (list[Tensor]): tensor shape (num_gts, )\n                    - lines (list[Tensor]): tensor shape (num_gts, 2*num_points)\n                gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                    boxes which can be ignored for each image. Default None.\n            Returns:\n                tuple: a tuple containing the following targets.\n                    - labels_list (list[Tensor]): Labels for all images.\n                    - label_weights_list (list[Tensor]): Label weights for all \\\n                        images.\n                    - lines_targets_list (list[Tensor]): Lines targets for all \\\n                        images.\n                    - lines_weight_list (list[Tensor]): Lines weights for all \\\n                        images.\n                    - num_total_pos (int): Number of positive samples in all \\\n                        images.\n                    - num_total_neg (int): Number of negative samples in all \\\n                        images.\n        \"\"\"\n        assert gt_bboxes_ignore_list is None, \\\n            'Only supports for gt_bboxes_ignore setting to None.'\n\n        # format the inputs\n        gt_labels = gts['labels']\n        gt_lines = gts['lines']\n\n        lines_pred = preds['lines']\n\n        if track_info is None:\n            track_info = [track_info for _ in range(len(gt_labels))]\n\n        (labels_list, label_weights_list,\n        lines_targets_list, lines_weights_list,\n        pos_inds_list, neg_inds_list,pos_gt_inds_list, matched_reg_cost) = multi_apply(\n            self._get_target_single, preds['scores'], lines_pred,\n            gt_labels, gt_lines, track_info, gt_bboxes_ignore=gt_bboxes_ignore_list)\n        \n        num_total_pos = sum((inds.numel() for inds in pos_inds_list))\n        num_total_neg = sum((inds.numel() for inds in neg_inds_list))\n\n        if track_info[0] is not None:\n            # remove the padding elements from the neg counting\n            padding_mask = torch.cat([t['query_padding_mask'] for t in track_info], dim=0)\n            num_padding = padding_mask.sum()\n            num_total_neg -= num_padding\n        \n        new_gts = dict(\n            labels=labels_list, # list[Tensor(num_q, )], length=bs\n            label_weights=label_weights_list, # list[Tensor(num_q, )], length=bs, all ones\n            lines=lines_targets_list, # list[Tensor(num_q, 2*num_pts)], length=bs\n            lines_weights=lines_weights_list, # list[Tensor(num_q, 2*num_pts)], length=bs\n        )\n\n        return new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list, matched_reg_cost\n\n    # @force_fp32(apply_to=('preds', 'gts'))\n    def loss_single(self,\n                    preds,\n                    gts,\n                    track_info=None,\n                    gt_bboxes_ignore_list=None,\n                    reduction='none'):\n        \"\"\"\n            Loss function for outputs from a single decoder layer of a single\n            feature level.\n            Args:\n                preds (dict): \n                    - lines (Tensor): shape (bs, num_queries, 2*num_points)\n                    - scores (Tensor): shape (bs, num_queries, num_class_channels)\n                gts (dict):\n                    - class_label (list[Tensor]): tensor shape (num_gts, )\n                    - lines (list[Tensor]): tensor shape (num_gts, 2*num_points)\n                gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                    boxes which can be ignored for each image. Default None.\n            Returns:\n                dict[str, Tensor]: A dictionary of loss components for outputs from\n                    a single decoder layer.\n        \"\"\"\n\n        # Get target for each sample\n        new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list, matched_reg_cost =\\\n            self.get_targets(preds, gts, track_info, gt_bboxes_ignore_list)\n\n        # Batched all data\n        # for k, v in new_gts.items():\n        #     new_gts[k] = torch.stack(v, dim=0) # tensor (bs, num_q, ...)\n\n        # construct weighted avg_factor to match with the official DETR repo\n        cls_avg_factor = num_total_pos * 1.0 + \\\n            num_total_neg * self.bg_cls_weight\n        \n        if self.sync_cls_avg_factor:\n            cls_avg_factor = reduce_mean(\n                preds['scores'][0].new_tensor([cls_avg_factor]))\n        cls_avg_factor = max(cls_avg_factor, 1)\n\n        if track_info is not None:\n            cat_padding_mask = torch.cat([t['query_padding_mask'] for t in track_info], dim=0)\n            padding_loss_mask = ~cat_padding_mask\n\n        # Classification loss\n        # since the inputs needs the second dim is the class dim, we permute the prediction.\n        pred_scores = torch.cat(preds['scores'], dim=0) # (bs*num_q, cls_out_channles)\n        cls_scores = pred_scores.reshape(-1, self.cls_out_channels) # (bs*num_q, cls_out_channels)\n        cls_labels = torch.cat(new_gts['labels'], dim=0).reshape(-1) # (bs*num_q, )\n        cls_weights = torch.cat(new_gts['label_weights'], dim=0).reshape(-1) # (bs*num_q, )\n        if track_info is not None:\n            cls_weights = cls_weights * padding_loss_mask.float()\n        \n        loss_cls = self.loss_cls(\n            cls_scores, cls_labels, cls_weights, avg_factor=cls_avg_factor)\n        \n        # Compute the average number of gt boxes across all gpus, for\n        # normalization purposes\n        num_total_pos = loss_cls.new_tensor([num_total_pos])\n        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()\n\n        pred_lines = torch.cat(preds['lines'], dim=0)\n        gt_lines = torch.cat(new_gts['lines'], dim=0)\n        line_weights = torch.cat(new_gts['lines_weights'], dim=0)\n        if track_info is not None:\n            line_weights = line_weights * padding_loss_mask[:, None].float()\n\n        assert len(pred_lines) == len(gt_lines)\n        assert len(gt_lines) == len(line_weights)\n\n        loss_reg = self.loss_reg(\n            pred_lines, gt_lines, line_weights, avg_factor=num_total_pos)\n\n        loss_dict = dict(\n            cls=loss_cls,\n            reg=loss_reg,\n        )\n\n        new_gts_info = {\n            'labels': new_gts['labels'],\n            'lines': new_gts['lines'],\n        }\n\n        return loss_dict, pos_inds_list, pos_gt_inds_list, matched_reg_cost, new_gts_info\n    \n    @force_fp32(apply_to=('gt_lines_list', 'preds_dicts'))\n    def loss(self,\n             gts,\n             preds,\n             gt_bboxes_ignore=None,\n             track_info=None,\n             reduction='mean',\n            ):\n        \"\"\"\n            Loss Function.\n            Args:\n                gts (list[dict]): list length: num_layers\n                    dict {\n                        'label': list[tensor(num_gts, )], list length: batchsize,\n                        'line': list[tensor(num_gts, 2*num_points)], list length: batchsize,\n                        ...\n                    }\n                preds (list[dict]): list length: num_layers\n                    dict {\n                        'lines': tensor(bs, num_queries, 2*num_points),\n                        'scores': tensor(bs, num_queries, class_out_channels),\n                    }\n                    \n                gt_bboxes_ignore (list[Tensor], optional): Bounding boxes\n                    which can be ignored for each image. Default None.\n            Returns:\n                dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            f'{self.__class__.__name__} only supports ' \\\n            f'for gt_bboxes_ignore setting to None.'\n\n        track_info = [track_info for _ in range(len(gts))]\n        # Since there might have multi layer\n        losses, pos_inds_lists, pos_gt_inds_lists, matched_reg_costs, gt_info_list = multi_apply(\n            self.loss_single, preds, gts, track_info, reduction=reduction)\n\n        # Format the losses\n        loss_dict = dict()\n        # loss from the last decoder layer\n        for k, v in losses[-1].items():\n            loss_dict[k] = v\n        \n        # Loss from other decoder layers\n        num_dec_layer = 0\n        for loss in losses[:-1]:\n            for k, v in loss.items():\n                loss_dict[f'd{num_dec_layer}.{k}'] = v\n            num_dec_layer += 1\n\n        return loss_dict, pos_inds_lists, pos_gt_inds_lists, gt_info_list, matched_reg_costs\n    \n    def post_process(self, preds_dict, tokens, track_dict=None, thr=0.0):\n        lines = preds_dict['lines'] # List[Tensor(num_queries, 2*num_points)]\n        bs = len(lines)\n        scores = preds_dict['scores'] # (bs, num_queries, 3)\n\n        results = []\n        for i in range(bs):\n            tmp_vectors = lines[i]\n            # set up the prop_flags\n            tmp_prop_flags = torch.zeros(tmp_vectors.shape[0]).bool()\n            tmp_prop_flags[-100:] = 0\n            tmp_prop_flags[:-100] = 1\n            num_preds, num_points2 = tmp_vectors.shape\n            tmp_vectors = tmp_vectors.view(num_preds, num_points2//2, 2)\n\n            if self.loss_cls.use_sigmoid:\n                tmp_scores, tmp_labels = scores[i].max(-1)\n                tmp_scores = tmp_scores.sigmoid()\n                pos = tmp_scores > thr\n            else:\n                assert self.num_classes + 1 == self.cls_out_channels\n                tmp_scores, tmp_labels = scores[i].max(-1)\n                bg_cls = self.cls_out_channels\n                pos = tmp_labels != bg_cls\n\n            tmp_vectors = tmp_vectors[pos]\n            tmp_scores = tmp_scores[pos]\n            tmp_labels = tmp_labels[pos]\n            tmp_prop_flags = tmp_prop_flags[pos]\n\n            if len(tmp_scores) == 0:\n                single_result = {\n                'vectors': [],\n                'scores': [],\n                'labels': [],\n                'props': [],\n                'token': tokens[i]\n            }\n            else:\n                single_result = {\n                    'vectors': tmp_vectors.detach().cpu().numpy(),\n                    'scores': tmp_scores.detach().cpu().numpy(),\n                    'labels': tmp_labels.detach().cpu().numpy(),\n                    'props': tmp_prop_flags.detach().cpu().numpy(),\n                    'token': tokens[i]\n                }\n\n            # also save the tracking information for analyzing\n            if track_dict is not None and len(track_dict['lines'])>0:\n                tmp_track_scores = track_dict['scores'][i]\n                tmp_track_vectors = track_dict['lines'][i]\n                tmp_track_scores, tmp_track_labels = tmp_track_scores.max(-1)\n                tmp_track_scores = tmp_track_scores.sigmoid()\n                single_result['track_scores'] = tmp_track_scores.detach().cpu().numpy()\n                single_result['track_vectors'] = tmp_track_vectors.detach().cpu().numpy()\n                single_result['track_labels'] = tmp_track_labels.detach().cpu().numpy()\n            else:\n                single_result['track_scores'] = []\n                single_result['track_vectors'] = []\n                single_result['track_labels'] = []\n\n            results.append(single_result)\n    \n        return results\n    \n    def prepare_temporal_propagation(self, preds_dict, scene_name, local_idx, memory_bank=None, \n                        thr_track=0.1, thr_det=0.5):\n        lines = preds_dict['lines'] # List[Tensor(num_queries, 2*num_points)]\n        queries = preds_dict['hs_embeds']\n        bs = len(lines)\n        assert bs == 1, 'now only support bs=1 for temporal-evolving inference'\n        scores = preds_dict['scores'] # (bs, num_queries, 3)\n\n        first_frame = local_idx == 0\n\n        tmp_vectors = lines[0]\n        tmp_queries = queries[0]\n\n        # focal loss\n        if self.loss_cls.use_sigmoid:\n            tmp_scores, tmp_labels = scores[0].max(-1)\n            tmp_scores = tmp_scores.sigmoid()\n            pos_track = tmp_scores[:-100] > thr_track\n            pos_det = tmp_scores[-100:] > thr_det\n            pos = torch.cat([pos_track, pos_det], dim=0)\n        else:\n            raise RuntimeError('The experiment uses sigmoid for cls outputs')\n\n        pos_vectors = tmp_vectors[pos]\n        pos_labels = tmp_labels[pos]\n        pos_queries = tmp_queries[pos]\n        pos_scores = tmp_scores[pos]\n\n        if first_frame:\n            global_ids = torch.arange(len(pos_vectors))\n            num_instance = len(pos_vectors)\n        else:\n            prop_ids = self.prop_info['global_ids']\n            prop_num_instance = self.prop_info['num_instance']\n            global_ids_track = prop_ids[pos_track]\n            num_newborn = int(pos_det.sum())\n            global_ids_newborn = torch.arange(num_newborn) + prop_num_instance\n            global_ids = torch.cat([global_ids_track, global_ids_newborn])\n            num_instance = prop_num_instance + num_newborn\n            \n        self.prop_info = {\n            'vectors': pos_vectors,\n            'queries': pos_queries,\n            'scores': pos_scores,\n            'labels': pos_labels,\n            'scene_name': scene_name,\n            'local_idx': local_idx,\n            'global_ids': global_ids,\n            'num_instance': num_instance,\n        }\n\n        if memory_bank is not None:\n            if first_frame:\n                num_tracks = 0\n            else:\n                num_tracks = self.prop_active_tracks\n            pos_out_inds = torch.where(pos)[0]\n            prev_out = {\n                'hs_embeds': queries,\n                'scores': scores,\n            }\n            memory_bank.update_memory(0, first_frame, pos_out_inds, prev_out, num_tracks, local_idx, memory_bank.curr_t)\n            self.prop_active_tracks = len(pos_out_inds)\n        \n        save_pos_results = {\n            'vectors': pos_vectors.cpu().numpy(),\n            'scores': pos_scores.cpu().numpy(),\n            'labels': pos_labels.cpu().numpy(),\n            'global_ids': global_ids.cpu().numpy(),\n            'scene_name': scene_name,\n            'local_idx': local_idx,\n            'num_instance': num_instance,\n        }\n\n        return save_pos_results\n    \n    def get_track_info(self, scene_name, local_idx):\n        prop_info = self.prop_info\n        assert prop_info['scene_name'] == scene_name and (prop_info['local_idx']+1 == local_idx or \\\n            prop_info['local_idx'] == local_idx)\n            \n        vectors = prop_info['vectors']\n        queries = prop_info['queries']\n        device = queries.device\n\n        target = {}\n        target['track_query_hs_embeds'] = queries\n        target['track_query_boxes'] = vectors\n        track_info = [target, ]\n\n        return track_info\n    \n    def get_self_iter_track_query(self, preds_dict):\n        num_tracks = self.prop_active_tracks\n\n        lines = preds_dict['lines'] # List[Tensor(num_queries, 2*num_points)]\n        queries = preds_dict['hs_embeds']\n        bs = len(lines)\n        assert bs == 1, 'now only support bs=1 for temporal-evolving inference'\n        scores = preds_dict['scores'] # (bs, num_queries, 3)\n\n        queries = queries[0][:num_tracks]\n        vectors = lines[0][:num_tracks]\n\n        target = {}\n        target['track_query_hs_embeds'] = queries\n        target['track_query_boxes'] = vectors\n        track_info = [target, ]\n        return track_info\n\n\n    \n    def clear_temporal_cache(self):\n        self.prop_info = None\n\n    def train(self, *args, **kwargs):\n        super().train(*args, **kwargs)\n    \n    def eval(self):\n        super().eval()\n\n    def forward(self, *args, return_loss=True, **kwargs):\n        if return_loss:\n            return self.forward_train(*args, **kwargs)\n        else:\n            return self.forward_test(*args, **kwargs)"
  },
  {
    "path": "plugin/models/heads/MapSegHead.py",
    "content": "import copy\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import Conv2d, Linear, build_activation_layer, bias_init_with_prob, xavier_init\nfrom mmcv.runner import force_fp32\nfrom mmcv.cnn.bricks.transformer import build_positional_encoding\nfrom mmdet.models import build_loss\n\nfrom mmdet.models import HEADS\n\nfrom einops import repeat\n\n\n@HEADS.register_module(force=True)\nclass MapSegHead(nn.Module):\n\n    def __init__(self, \n                 num_classes=3,\n                 in_channels=256,\n                 embed_dims=256,\n                 bev_size=(100,50),\n                 canvas_size=(200,100),\n                 loss_seg=dict(),\n                 loss_dice=dict(),\n        ):\n        super().__init__()\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.embed_dims = embed_dims\n        self.bev_size = bev_size\n        self.canvas_size = canvas_size\n\n        self.loss_seg = build_loss(loss_seg)\n        self.loss_dice = build_loss(loss_dice)\n\n        if self.loss_seg.use_sigmoid:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n\n        assert canvas_size[0] % bev_size[0] == 0, 'canvas size must be a multiple of the bev size'\n        self.num_up_blocks = int(np.log2(canvas_size[0] // bev_size[0]))\n\n        self.conv_in = nn.Conv2d(in_channels, embed_dims, kernel_size=3, padding=1, bias=False)\n        self.relu = nn.ReLU(inplace=True)\n\n        self.conv_mid_layers = nn.ModuleList([])\n        self.downsample_layers = nn.ModuleList([])\n        for _ in range(self.num_up_blocks):\n            conv_mid = nn.Sequential(\n                nn.Upsample(scale_factor=2, mode='nearest'),\n                nn.Conv2d(embed_dims, embed_dims, kernel_size=3, padding=1),\n                nn.ReLU(inplace=True),\n            )\n            self.conv_mid_layers.append(conv_mid)\n            self.downsample_layers.append(nn.Upsample(scale_factor=0.5, mode='bilinear'))\n\n        self.conv_out = nn.Conv2d(embed_dims, self.cls_out_channels, kernel_size=1, padding=0)\n        \n\n        self.init_weights()\n    \n    def init_weights(self):\n        if self.loss_seg.use_sigmoid:\n            bias_init = bias_init_with_prob(0.01)\n            m = self.conv_out\n            nn.init.constant_(m.bias, bias_init)\n    \n    def forward_train(self, bev_features, gts, history_coords):\n        x = self.relu(self.conv_in(bev_features))\n        for conv_mid in self.conv_mid_layers:\n            x = conv_mid(x)\n        preds = self.conv_out(x)\n\n        seg_loss = self.loss_seg(preds, gts)\n        dice_loss = self.loss_dice(preds, gts)\n        \n        # downsample the features to the original bev size\n        seg_feats = x\n        for downsample in self.downsample_layers:\n            seg_feats = downsample(seg_feats)\n\n        return preds, seg_feats, seg_loss, dice_loss\n        \n    def forward_test(self, bev_features):\n        x = self.relu(self.conv_in(bev_features))\n        for conv_mid in self.conv_mid_layers:\n            x = conv_mid(x)\n        preds = self.conv_out(x)\n        seg_feats = x\n        for downsample in self.downsample_layers:\n            seg_feats = downsample(seg_feats)\n        return preds, seg_feats\n    \n    def train(self, *args, **kwargs):\n        super().train(*args, **kwargs)\n    \n    def eval(self):\n        super().eval()\n\n    def forward(self, *args, return_loss=True, **kwargs):\n        if return_loss:\n            return self.forward_train(*args, **kwargs)\n        else:\n            return self.forward_test(*args, **kwargs)"
  },
  {
    "path": "plugin/models/heads/__init__.py",
    "content": "from .MapDetectorHead import MapDetectorHead\nfrom .MapSegHead import MapSegHead\n"
  },
  {
    "path": "plugin/models/heads/base_map_head.py",
    "content": "from abc import ABCMeta, abstractmethod\n\nimport torch.nn as nn\nfrom mmcv.runner import auto_fp16\nfrom mmcv.utils import print_log\n\nfrom mmdet.utils import get_root_logger\n\n\nclass BaseMapHead(nn.Module, metaclass=ABCMeta):\n    \"\"\"Base class for mappers.\"\"\"\n\n    def __init__(self):\n        super(BaseMapHead, self).__init__()\n        self.fp16_enabled = False\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize the weights in detector.\n        Args:\n            pretrained (str, optional): Path to pre-trained weights.\n                Defaults to None.\n        \"\"\"\n        if pretrained is not None:\n            logger = get_root_logger()\n            print_log(f'load model from: {pretrained}', logger=logger)\n\n    @auto_fp16(apply_to=('img', ))\n    def forward(self, *args, **kwargs):\n        pass\n        \n    @abstractmethod\n    def loss(self, pred, gt):\n        '''\n        Compute loss\n        Output:\n            dict(\n                loss: torch.Tensor\n                log_vars: dict(\n                    str: float,\n                )\n                num_samples: int\n            )\n        '''\n        return\n        \n    @abstractmethod\n    def post_process(self, pred):\n        '''\n        convert model predictions to vectorized outputs\n        the output format should be consistent with the evaluation function\n        '''\n        return\n"
  },
  {
    "path": "plugin/models/losses/__init__.py",
    "content": "from .detr_loss import LinesL1Loss, MasksLoss, LenLoss\nfrom .seg_loss import MaskFocalLoss, MaskDiceLoss"
  },
  {
    "path": "plugin/models/losses/detr_loss.py",
    "content": "import torch\nfrom torch import nn as nn\nfrom torch.nn import functional as F\nfrom mmdet.models.losses import l1_loss, smooth_l1_loss\nfrom mmdet.models.losses.utils import weighted_loss\nimport mmcv\n\nfrom mmdet.models.builder import LOSSES\n\n\n@LOSSES.register_module()\nclass LinesL1Loss(nn.Module):\n\n    def __init__(self, reduction='mean', loss_weight=1.0, beta=0.5):\n        \"\"\"\n            L1 loss. The same as the smooth L1 loss\n            Args:\n                reduction (str, optional): The method to reduce the loss.\n                    Options are \"none\", \"mean\" and \"sum\".\n                loss_weight (float, optional): The weight of loss.\n        \"\"\"\n\n        super().__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.beta = beta\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n        Args:\n            pred (torch.Tensor): The prediction.\n                shape: [bs, ...]\n            target (torch.Tensor): The learning target of the prediction.\n                shape: [bs, ...]\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None. \n                it's useful when the predictions are not all valid.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        if self.beta > 0:\n            loss = smooth_l1_loss(\n                pred, target, weight, reduction=reduction, avg_factor=avg_factor, beta=self.beta)\n        \n        else:\n            loss = l1_loss(\n                pred, target, weight, reduction=reduction, avg_factor=avg_factor)\n        \n        num_points = pred.shape[-1] // 2\n        loss = loss / num_points\n\n        return loss*self.loss_weight\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef bce(pred, label, class_weight=None):\n    \"\"\"\n        pred: B,nquery,npts\n        label: B,nquery,npts\n    \"\"\"\n\n    if label.numel() == 0:\n        return pred.sum() * 0\n    assert pred.size() == label.size()\n\n    loss = F.binary_cross_entropy_with_logits(\n        pred, label.float(), pos_weight=class_weight, reduction='none')\n\n    return loss\n\n\n@LOSSES.register_module()\nclass MasksLoss(nn.Module):\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(MasksLoss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n        Args:\n            xxx\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        loss = bce(pred, target, weight, reduction=reduction,\n                   avg_factor=avg_factor)\n\n        return loss*self.loss_weight\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef ce(pred, label, class_weight=None):\n    \"\"\"\n        pred: B*nquery,npts\n        label: B*nquery,\n    \"\"\"\n\n    if label.numel() == 0:\n        return pred.sum() * 0\n\n    loss = F.cross_entropy(\n        pred, label, weight=class_weight, reduction='none')\n\n    return loss\n\n\n@LOSSES.register_module()\nclass LenLoss(nn.Module):\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(LenLoss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n        Args:\n            xxx\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        loss = ce(pred, target, weight, reduction=reduction,\n                   avg_factor=avg_factor)\n\n        return loss*self.loss_weight"
  },
  {
    "path": "plugin/models/losses/seg_loss.py",
    "content": "import torch\nfrom torch import nn as nn\nfrom torch.nn import functional as F\nimport mmcv\n\nfrom mmdet.models.builder import LOSSES\nfrom mmdet.models.losses import FocalLoss, weight_reduce_loss\n\nfrom einops import rearrange\n\n\ndef py_sigmoid_focal_loss(pred,\n                          target,\n                          weight=None,\n                          gamma=2.0,\n                          alpha=0.25,\n                          reduction='mean',\n                          avg_factor=None):\n    \"\"\"PyTorch version of `Focal Loss <https://arxiv.org/abs/1708.02002>`_.\n\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, C), C is the\n            number of classes\n        target (torch.Tensor): The learning label of the prediction.\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 2.0.\n        alpha (float, optional): A balanced form for Focal Loss.\n            Defaults to 0.25.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'.\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n    pred_sigmoid = pred.sigmoid()\n    target = target.type_as(pred)\n    pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)\n    focal_weight = (alpha * target + (1 - alpha) *\n                    (1 - target)) * pt.pow(gamma)\n    loss = F.binary_cross_entropy_with_logits(\n        pred, target, reduction='none') * focal_weight\n    if weight is not None:\n        if weight.shape != loss.shape:\n            if weight.size(0) == loss.size(0):\n                # For most cases, weight is of shape (num_priors, ),\n                #  which means it does not have the second axis num_class\n                weight = weight.view(-1, 1)\n            else:\n                # Sometimes, weight per anchor per class is also needed. e.g.\n                #  in FSAF. But it may be flattened of shape\n                #  (num_priors x num_class, ), while loss is still of shape\n                #  (num_priors, num_class).\n                assert weight.numel() == loss.numel()\n                weight = weight.view(loss.size(0), -1)\n        assert weight.ndim == loss.ndim\n    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\n@LOSSES.register_module()\nclass MaskFocalLoss(FocalLoss):\n    def __init__(self,**kwargs):\n        super(MaskFocalLoss, self).__init__(**kwargs)\n    \n    def forward(self, \n                pred, \n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if not self.use_sigmoid:\n            raise NotImplementedError\n        \n        num_classes = pred.size(1)\n        loss = 0\n        for index in range(num_classes):\n            loss += self.loss_weight * py_sigmoid_focal_loss(\n                pred[:,index],\n                target[:,index],\n                weight,\n                gamma=self.gamma,\n                alpha=self.alpha,\n                reduction=reduction,\n                avg_factor=avg_factor)\n\n        loss /= num_classes\n        return loss * self.loss_weight\n\n\n@LOSSES.register_module()\nclass MaskDiceLoss(nn.Module):\n    \"\"\"Dice Loss PyTorch\n        Created by: Zhang Shuai\n        Email: shuaizzz666@gmail.com\n        dice_loss = 1 - 2*p*t / (p^2 + t^2). p and t represent predict and target.\n    Args:\n        weight: An array of shape [C,]\n        predict: A float32 tensor of shape [N, C, *], for Semantic segmentation task is [N, C, H, W]\n        target: A int64 tensor of shape [N, *], for Semantic segmentation task is [N, H, W]\n    Return:\n        diceloss\n    \"\"\"\n    def __init__(self, loss_weight):\n        super(MaskDiceLoss, self).__init__()\n        self.smooth = 1e-5\n        self.loss_weight = loss_weight\n\n    def forward(self, pred, target):\n        bs, num_classes = pred.shape[:2]\n        pred = rearrange(pred, 'b n h w -> b n (h w)')\n        target = rearrange(target, 'b n h w -> b n (h w)')\n        pred = pred.sigmoid()\n        intersection = torch.sum(pred * target, dim=2)  # (N, C)\n        union = torch.sum(pred.pow(2), dim=2) + torch.sum(target, dim=2)  # (N, C)\n        ## p^2 + t^2 >= 2*p*t, target_onehot^2 == target_onehot\n        dice_coef = (2 * intersection + self.smooth) / (union + self.smooth)  # (N, C)\n        dice_loss = 1 - torch.mean(dice_coef)  # 1\n        \n        loss = self.loss_weight * dice_loss\n        return loss"
  },
  {
    "path": "plugin/models/mapers/MapTracker.py",
    "content": "\"\"\"\n    MapTracker main module, adapted from StreamMapNet\n\"\"\"\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nfrom mmdet3d.models.builder import (build_backbone, build_head)\n\nfrom .base_mapper import BaseMapper, MAPPERS\nfrom ..utils.query_update import MotionMLP\nfrom copy import deepcopy\nfrom mmdet.core import multi_apply\n\nfrom einops import rearrange, repeat\nfrom scipy.spatial.transform import Rotation as R\n\nfrom .vector_memory import VectorInstanceMemory\n\n\n@MAPPERS.register_module()\nclass MapTracker(BaseMapper):\n\n    def __init__(self,\n                 bev_h,\n                 bev_w,\n                 roi_size,\n                 backbone_cfg=dict(),\n                 head_cfg=dict(),\n                 neck_cfg=None,\n                 seg_cfg=None,\n                 model_name=None, \n                 pretrained=None,\n                 history_steps=None,\n                 test_time_history_steps=None,\n                 mem_select_dist_ranges=[0,0,0,0],\n                 skip_vector_head=False,\n                 freeze_bev=False,\n                 freeze_bev_iters=None,\n                 track_fp_aug=True,\n                 use_memory=False,\n                 mem_len=None,\n                 mem_warmup_iters=-1,\n                 **kwargs):\n        super().__init__()\n\n        #Attribute\n        self.model_name = model_name\n        self.last_epoch = None\n  \n        self.backbone = build_backbone(backbone_cfg)\n\n        if neck_cfg is not None:\n            self.neck = build_head(neck_cfg)\n        else:\n            self.neck = nn.Identity()\n        \n        self.head = build_head(head_cfg)\n        self.num_decoder_layers = self.head.transformer.decoder.num_layers\n        self.skip_vector_head = skip_vector_head\n        self.freeze_bev = freeze_bev # whether freeze bev related parameters\n        self.freeze_bev_iters = freeze_bev_iters # whether freeze bev related parameters\n        self.track_fp_aug = track_fp_aug\n        self.use_memory = use_memory\n        self.mem_warmup_iters = mem_warmup_iters\n\n        # the track query propagation module, using relative pose\n        c_dim = 7 # quaternion for rotation (4) + translation (3)\n        self.query_propagate = MotionMLP(c_dim=c_dim, f_dim=self.head.embed_dims, identity=True)\n\n        # BEV semantic seg head\n        self.seg_decoder = build_head(seg_cfg)\n        \n        # BEV \n        self.bev_h = bev_h\n        self.bev_w = bev_w\n        self.roi_size = roi_size\n        self.history_steps = history_steps\n\n        self.mem_len = mem_len\n\n        # Set up test time memory selection hyper-parameters\n        if test_time_history_steps is None:\n            self.test_time_history_steps = history_steps\n        else:\n            self.test_time_history_steps = test_time_history_steps\n        self.mem_select_dist_ranges = mem_select_dist_ranges\n\n        # vector instance memory module\n        if self.use_memory:\n            self.memory_bank = VectorInstanceMemory(\n                dim_in=head_cfg.embed_dims,\n                number_ins=head_cfg.num_queries,\n                bank_size=mem_len,\n                mem_len=mem_len,\n                mem_select_dist_ranges=self.mem_select_dist_ranges,\n            )\n\n        xmin, xmax = -roi_size[0]/2, roi_size[0]/2\n        ymin, ymax = -roi_size[1]/2, roi_size[1]/2\n        x = torch.linspace(xmin, xmax, bev_w)\n        y = torch.linspace(ymax, ymin, bev_h)\n        y, x = torch.meshgrid(y, x)\n        z = torch.zeros_like(x)\n        ones = torch.ones_like(x)\n        plane = torch.stack([x, y, z, ones], dim=-1)\n        self.register_buffer('plane', plane.double())\n        \n        self.init_weights(pretrained)\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize model weights.\"\"\"\n        if pretrained:\n            import logging\n            logger = logging.getLogger()\n            from mmcv.runner import load_checkpoint\n            load_checkpoint(self, pretrained, strict=False, logger=logger)\n        else:\n            try:\n                self.neck.init_weights()\n            except AttributeError:\n                pass\n\n    def temporal_propagate(self, curr_bev_feats, img_metas, all_history_curr2prev, all_history_prev2curr, use_memory,\n                           track_query_info=None, timestep=None, get_trans_loss=False):\n        '''\n        Args:\n            curr_bev_feat: torch.Tensor of shape [B, neck_input_channels, H, W]\n            img_metas: current image metas (List of #bs samples)\n            bev_memory: where to load and store (training and testing use different buffer)\n            pose_memory: where to load and store (training and testing use different buffer)\n\n        Out:\n            fused_bev_feat: torch.Tensor of shape [B, neck_input_channels, H, W]\n        '''\n\n        bs = curr_bev_feats.size(0)\n\n        if get_trans_loss: # init the trans_loss related variables here\n            trans_reg_loss = curr_bev_feats.new_zeros((1,))\n            trans_cls_loss = curr_bev_feats.new_zeros((1,))\n            back_trans_reg_loss = curr_bev_feats.new_zeros((1,))\n            back_trans_cls_loss = curr_bev_feats.new_zeros((1,))\n            num_pos = 0\n            num_tracks = 0\n\n        if use_memory:\n            self.memory_bank.clear_dict()\n            \n        for b_i in range(bs):\n            curr_e2g_trans = self.plane.new_tensor(img_metas[b_i]['ego2global_translation'], dtype=torch.float64)\n            curr_e2g_rot = self.plane.new_tensor(img_metas[b_i]['ego2global_rotation'], dtype=torch.float64)\n\n            if use_memory:\n                self.memory_bank.curr_rot[b_i] = curr_e2g_rot\n                self.memory_bank.curr_trans[b_i] = curr_e2g_trans\n                if self.memory_bank.curr_t > 0:\n                    self.memory_bank.trans_memory_bank(self.query_propagate, b_i, img_metas[b_i])\n\n            # transform the track queries\n            if track_query_info is not None:\n                history_curr2prev_matrix = all_history_curr2prev[b_i]\n                history_prev2curr_matrix = all_history_prev2curr[b_i]\n\n                track_pts = track_query_info[b_i]['track_query_boxes'].clone()\n                track_pts = rearrange(track_pts, 'n (k c) -> n k c', c=2)\n                # from (0, 1) to (-30, 30) or (-15, 15), prep for transform\n                track_pts = self._denorm_lines(track_pts)\n\n                # Transform the track ref-points using relative pose between prev and curr\n                N, num_points = track_pts.shape[0], track_pts.shape[1]\n                track_pts = torch.cat([\n                    track_pts,\n                    track_pts.new_zeros((N, num_points, 1)), # z-axis\n                    track_pts.new_ones((N, num_points, 1)) # 4-th dim\n                ], dim=-1) # (num_prop, num_pts, 4)\n\n                pose_matrix = history_prev2curr_matrix[-1].float()[:3]\n                rot_mat = pose_matrix[:, :3].cpu().numpy()\n                rot = R.from_matrix(rot_mat)\n                translation = pose_matrix[:, 3] \n                trans_matrix = history_prev2curr_matrix[-1].clone()\n\n                # Add training-time perturbation here for the transformation matrix\n                if self.training:\n                    rot, translation = self.add_noise_to_pose(rot, translation)            \n                    trans_matrix[:3, :3] = torch.tensor(rot.as_matrix()).to(trans_matrix.device)\n                    trans_matrix[:3, 3] = torch.tensor(translation).to(trans_matrix.device)\n\n                trans_track_pts = torch.einsum('lk,ijk->ijl', trans_matrix, track_pts.double()).float()\n                trans_track_pts = trans_track_pts[..., :2]\n                trans_track_pts = self._norm_lines(trans_track_pts)\n                trans_track_pts = torch.clip(trans_track_pts, min=0., max=1.)\n                trans_track_pts = rearrange(trans_track_pts, 'n k c -> n (k c)', c=2)\n                track_query_info[b_i]['trans_track_query_boxes'] = trans_track_pts\n                \n                prop_q = track_query_info[b_i]['track_query_hs_embeds']\n\n                rot_quat = torch.tensor(rot.as_quat()).float().to(pose_matrix.device)\n                pose_info = torch.cat([rot_quat.view(-1), translation], dim=0)                \n\n                track_query_updated = self.query_propagate(\n                    prop_q, # (topk, embed_dims)\n                    pose_info.repeat(len(prop_q), 1)\n                )\n                # Do not let future-frame loss backprop through the track queries\n                track_query_info[b_i]['track_query_hs_embeds'] = track_query_updated.clone().detach()\n\n                if get_trans_loss:\n                    pred = self.head.reg_branches[-1](track_query_updated).sigmoid() # (num_prop, 2*num_pts)\n                    pred_scores = self.head.cls_branches[-1](track_query_updated)\n                    assert list(pred.shape) == [N, 2*num_points]\n\n                    gt_pts = track_query_info[b_i]['track_query_gt_lines'].clone()\n                    gt_labels = track_query_info[b_i]['track_query_gt_labels'].clone()\n                    weights = gt_pts.new_ones((N, 2*num_points))\n                    weights_labels = gt_labels.new_ones((N,))\n                    bg_idx = gt_labels == 3\n                    num_pos = num_pos + (N - bg_idx.sum())\n                    num_tracks += len(gt_labels)\n                    weights[bg_idx, :] = 0.0\n                \n                    gt_pts = rearrange(gt_pts, 'n (k c) -> n k c', c=2)\n                    denormed_targets = self._denorm_lines(gt_pts)\n                    denormed_targets = torch.cat([\n                        denormed_targets,\n                        denormed_targets.new_zeros((N, num_points, 1)), # z-axis\n                        denormed_targets.new_ones((N, num_points, 1)) # 4-th dim\n                    ], dim=-1) # (num_prop, num_pts, 4)\n                    assert list(denormed_targets.shape) == [N, num_points, 4]\n\n                    curr_targets = torch.einsum('lk,ijk->ijl', trans_matrix.float(), denormed_targets)\n                    curr_targets = curr_targets[..., :2]\n                    normed_targets = self._norm_lines(curr_targets)\n                    normed_targets = rearrange(normed_targets, 'n k c -> n (k c)', c=2)\n                    # set the weight of invalid normed targets to 0 (outside current bev frame)\n                    invalid_bev_mask = (normed_targets <= 0) | (normed_targets>=1)\n                    weights[invalid_bev_mask] = 0\n                    # (num_prop, 2*num_pts)\n                    trans_reg_loss += self.head.loss_reg(pred, normed_targets, weights, avg_factor=1.0)\n                    if len(gt_labels) > 0:\n                        trans_score = self.head.loss_cls(pred_scores, gt_labels, weights_labels, avg_factor=1.0)\n                    else:\n                        trans_score = 0.0\n                    trans_cls_loss += trans_score\n\n                    # backward trans loss\n                    pose_matrix_inv = torch.inverse(trans_matrix).float()[:3]\n                    rot_mat_inv = pose_matrix_inv[:, :3].cpu().numpy()\n\n                    rot_inv = R.from_matrix(rot_mat_inv)\n                    rot_quat_inv = torch.tensor(rot_inv.as_quat()).float().to(pose_matrix_inv.device)\n                    translation_inv = pose_matrix_inv[:, 3]\n                    pose_info_inv = torch.cat([rot_quat_inv.view(-1), translation_inv], dim=0)                \n                    track_query_backtrans = self.query_propagate(\n                        track_query_updated, # (topk, embed_dims)\n                        pose_info_inv.repeat(len(prop_q), 1)\n                    )\n                    pred_backtrans = self.head.reg_branches[-1](track_query_backtrans).sigmoid() # (num_prop, 2*num_pts)\n                    pred_scores_backtrans = self.head.cls_branches[-1](track_query_backtrans)\n                    prev_gt_pts = track_query_info[b_i]['track_query_gt_lines']\n                    back_trans_reg_loss += self.head.loss_reg(pred_backtrans, prev_gt_pts, weights, avg_factor=1.0)\n                    if len(gt_labels) > 0:\n                        trans_score_bak = self.head.loss_cls(pred_scores_backtrans, gt_labels, weights_labels, avg_factor=1.0)\n                    else:\n                        trans_score_bak = 0.0\n                    back_trans_cls_loss += trans_score_bak\n\n        if get_trans_loss:\n            trans_loss = self.head.trans_loss_weight * (trans_reg_loss / (num_pos + 1e-10) + \n                            trans_cls_loss / (num_tracks + 1e-10))\n            back_trans_loss = self.head.trans_loss_weight * (back_trans_reg_loss / (num_pos + 1e-10) +\n                                    back_trans_cls_loss / (num_tracks + 1e-10))\n            trans_loss_dict = {\n                'f_trans': trans_loss,\n                'b_trans': back_trans_loss,\n            }\n            return trans_loss_dict\n    \n    def add_noise_to_pose(self, rot, trans):\n        rot_euler = rot.as_euler('zxy')\n        # 0.08 mean is around 5-degree, 3-sigma is 15-degree\n        noise_euler = np.random.randn(*list(rot_euler.shape)) * 0.08\n        rot_euler += noise_euler\n        noisy_rot = R.from_euler('zxy', rot_euler)\n\n        # error within 0.25 meter\n        noise_trans = torch.randn_like(trans) * 0.25\n        noise_trans[2] = 0\n        noisy_trans = trans + noise_trans\n\n        return noisy_rot, noisy_trans\n\n    def process_history_info(self, img_metas, history_img_metas):\n        bs = len(img_metas)\n        all_history_curr2prev = []\n        all_history_prev2curr = []\n        all_history_coord = []\n\n        if len(history_img_metas) == 0:\n            return all_history_curr2prev, all_history_prev2curr, all_history_coord\n\n        for b_i in range(bs):\n            history_e2g_trans = torch.stack([self.plane.new_tensor(prev[b_i]['ego2global_translation'], dtype=torch.float64) for prev in history_img_metas], dim=0)\n            history_e2g_rot = torch.stack([self.plane.new_tensor(prev[b_i]['ego2global_rotation'], dtype=torch.float64) for prev in history_img_metas], dim=0)\n            \n            curr_e2g_trans = self.plane.new_tensor(img_metas[b_i]['ego2global_translation'], dtype=torch.float64)\n            curr_e2g_rot = self.plane.new_tensor(img_metas[b_i]['ego2global_rotation'], dtype=torch.float64)\n\n            # Do the coords transformation for all features in the history buffer\n            ## Prepare the transformation matrix\n            history_g2e_matrix = torch.stack([torch.eye(4, dtype=torch.float64, device=history_e2g_trans.device),]*len(history_e2g_trans), dim=0)\n            history_g2e_matrix[:, :3, :3] = torch.transpose(history_e2g_rot, 1, 2)\n            history_g2e_matrix[:, :3, 3] = -torch.bmm(torch.transpose(history_e2g_rot, 1, 2), history_e2g_trans[..., None]).squeeze(-1)\n\n            curr_g2e_matrix = torch.eye(4, dtype=torch.float64, device=history_e2g_trans.device)\n            curr_g2e_matrix[:3, :3] = curr_e2g_rot.T\n            curr_g2e_matrix[:3, 3] = -(curr_e2g_rot.T @ curr_e2g_trans)\n\n            curr_e2g_matrix = torch.eye(4, dtype=torch.float64, device=history_e2g_trans.device)\n            curr_e2g_matrix[:3, :3] = curr_e2g_rot\n            curr_e2g_matrix[:3, 3] = curr_e2g_trans\n\n            history_e2g_matrix = torch.stack([torch.eye(4, dtype=torch.float64, device=history_e2g_trans.device),]*len(history_e2g_trans), dim=0)\n            history_e2g_matrix[:, :3, :3] = history_e2g_rot\n            history_e2g_matrix[:, :3, 3] = history_e2g_trans\n\n            history_curr2prev_matrix = torch.bmm(history_g2e_matrix, repeat(curr_e2g_matrix,'n1 n2 -> r n1 n2', r=len(history_g2e_matrix)))\n            history_prev2curr_matrix = torch.bmm(repeat(curr_g2e_matrix, 'n1 n2 -> r n1 n2', r=len(history_e2g_matrix)), history_e2g_matrix)\n\n            history_coord = torch.einsum('nlk,ijk->nijl', history_curr2prev_matrix, self.plane).float()[..., :2]\n\n            # from (-30, 30) or (-15, 15) to (-1, 1)\n            history_coord[..., 0] = history_coord[..., 0] / (self.roi_size[0]/2)\n            history_coord[..., 1] = -history_coord[..., 1] / (self.roi_size[1]/2)\n\n            all_history_curr2prev.append(history_curr2prev_matrix)\n            all_history_prev2curr.append(history_prev2curr_matrix)\n            all_history_coord.append(history_coord)\n        \n        return all_history_curr2prev, all_history_prev2curr, all_history_coord\n        \n\n    def forward_train(self, img, vectors, semantic_mask, points=None, img_metas=None, all_prev_data=None,\n                      all_local2global_info=None, **kwargs):\n        '''\n        Args:\n            img: torch.Tensor of shape [B, N, 3, H, W]\n                N: number of cams\n            vectors: list[list[Tuple(lines, length, label)]]\n                - lines: np.array of shape [num_points, 2]. \n                - length: int\n                - label: int\n                len(vectors) = batch_size\n                len(vectors[_b]) = num of lines in sample _b\n            img_metas: \n                img_metas['lidar2img']: [B, N, 4, 4]\n        Out:\n            loss, log_vars, num_sample\n        '''\n        #  prepare labels and images\n        gts, img, img_metas, valid_idx, points = self.batch_data(\n            vectors, img, img_metas, img.device, points)\n        bs = img.shape[0]\n\n        _use_memory = self.use_memory and self.num_iter > self.mem_warmup_iters\n        \n        if all_prev_data is not None:\n            num_prev_frames = len(all_prev_data)        \n            all_gts_prev, all_img_prev, all_img_metas_prev, all_semantic_mask_prev  = [], [], [], []\n            for prev_data in all_prev_data:\n                gts_prev, img_prev, img_metas_prev, valid_idx_prev, _ = self.batch_data(\n                    prev_data['vectors'], prev_data['img'], prev_data['img_metas'], img.device      \n                )\n                all_gts_prev.append(gts_prev)\n                all_img_prev.append(img_prev)\n                all_img_metas_prev.append(img_metas_prev)\n                all_semantic_mask_prev.append(prev_data['semantic_mask'])\n        else:\n            num_prev_frames = 0\n\n        assert points is None\n\n        if self.skip_vector_head:\n            backprop_backbone_ids = [0, num_prev_frames] # first and last frame train the backbone (bev pretrain)\n        else:\n            backprop_backbone_ids = [num_prev_frames, ] # only the last frame trains the backbone (all other settings)\n\n        track_query_info = None\n        all_loss_dict_prev = []\n        all_trans_loss = []\n        all_outputs_prev = []\n\n        self.tracked_query_length = {}\n\n        if _use_memory:\n            self.memory_bank.set_bank_size(self.mem_len)\n            self.memory_bank.init_memory(bs=bs)\n\n        # History records for bev features\n        history_bev_feats = []\n        history_img_metas = []\n        \n        gt_semantic = torch.flip(semantic_mask, [2,])\n\n        # Iterate through all prev frames\n        for t in range(num_prev_frames):\n            # Backbone for prev\n            img_backbone_gradient = (t in backprop_backbone_ids)\n\n            all_history_curr2prev, all_history_prev2curr, all_history_coord =  \\\n                    self.process_history_info(all_img_metas_prev[t], history_img_metas)\n\n            _bev_feats, mlvl_feats = self.backbone(all_img_prev[t], all_img_metas_prev[t], t, history_bev_feats, \n                        history_img_metas, all_history_coord, points=None, \n                        img_backbone_gradient=img_backbone_gradient)\n\n            # Neck for prev\n            bev_feats = self.neck(_bev_feats)\n\n            if _use_memory:\n                self.memory_bank.curr_t = t\n            \n            # Transform prev-frame feature & pts to curr frame\n            if self.skip_vector_head or t == 0:\n                self.temporal_propagate(bev_feats, all_img_metas_prev[t], all_history_curr2prev, \n                        all_history_prev2curr, _use_memory, track_query_info, timestep=t, get_trans_loss=False)\n            else:\n                trans_loss_dict = self.temporal_propagate(bev_feats, all_img_metas_prev[t], all_history_curr2prev, \n                        all_history_prev2curr, _use_memory, track_query_info, timestep=t, get_trans_loss=True)\n\n                ########################################################\n                # Debugging use: visualize the first-frame track query. and the corresponding \n                # ground-truth information     \n                # Do this for every timestep > 0\n                #self._viz_temporal_supervision(outputs_prev, track_query_info, gts_next[-1], gts_prev[-1], \n                #                gts_semantic_curr, gts_semantic_prev, img_metas_next, img_metas_prev, t)\n                #import pdb; pdb.set_trace()\n                ########################################################\n            \n            img_metas_prev = all_img_metas_prev[t]\n            img_metas_next = all_img_metas_prev[t+1] if t < num_prev_frames-1 else img_metas\n            gts_prev = all_gts_prev[t]\n            gts_next = all_gts_prev[t+1] if t!=num_prev_frames-1 else gts\n            gts_semantic_prev = torch.flip(all_semantic_mask_prev[t], [2,])\n            gts_semantic_curr = torch.flip(all_semantic_mask_prev[t+1], [2,]) if t!=num_prev_frames-1 else gt_semantic\n\n            local2global_prev = all_local2global_info[t]\n            local2global_next = all_local2global_info[t+1]\n\n            # Compute the semantic segmentation loss\n            seg_preds, seg_feats, seg_loss, seg_dice_loss = self.seg_decoder(bev_feats, gts_semantic_prev,\n                    all_history_coord, return_loss=True)\n\n            # Save the history \n            history_bev_feats.append(bev_feats)\n            history_img_metas.append(all_img_metas_prev[t])\n            if len(history_bev_feats) > self.history_steps:\n                history_bev_feats.pop(0)\n                history_img_metas.pop(0)\n            \n            if not self.skip_vector_head:\n                # Prepare the two-frame instance matching info\n                gt_cur2prev, gt_prev2cur = self.get_two_frame_matching(local2global_prev, local2global_next, \n                                                                       gts_prev, gts_next)\n                if t == 0:\n                    memory_bank = None\n                else:\n                    memory_bank = self.memory_bank if _use_memory else None\n                # 1). Compute the loss for prev frame\n                # 2). Get the matching results for computing the track query to next frame\n                loss_dict_prev, outputs_prev, prev_inds_list, prev_gt_inds_list, prev_matched_reg_cost, \\\n                    prev_gt_list = self.head(\n                                        bev_features=bev_feats, \n                                        img_metas=img_metas_prev, \n                                        gts=gts_prev,\n                                        track_query_info=track_query_info,\n                                        memory_bank=memory_bank,\n                                        return_loss=True,\n                                        return_matching=True)\n                all_outputs_prev.append(outputs_prev)\n\n                if t > 0:\n                    all_trans_loss.append(trans_loss_dict)\n\n                # Do the query prop and negative sampling, prepare the corrpespnding\n                # updated G.T. labels. The prepared queries will be passed to the model,\n                # and combind with the original queries inside the head model\n                pos_th = 0.4\n                track_query_info = self.prepare_track_queries_and_targets(gts_next, prev_inds_list, \n                    prev_gt_inds_list, prev_matched_reg_cost, prev_gt_list, outputs_prev, gt_cur2prev, gt_prev2cur, \n                    img_metas_prev, _use_memory, pos_th=pos_th, timestep=t)\n            else:\n                loss_dict_prev = {}\n\n            loss_dict_prev['seg'] = seg_loss\n            loss_dict_prev['seg_dice'] = seg_dice_loss\n\n            all_loss_dict_prev.append(loss_dict_prev)\n\n        if _use_memory:\n            self.memory_bank.curr_t = num_prev_frames\n\n        # NOTE: we separate the last frame to be consistent with single-frame only setting)\n        # Backbone for curr\n        img_backbone_gradient = num_prev_frames in backprop_backbone_ids\n\n        all_history_curr2prev, all_history_prev2curr, all_history_coord = self.process_history_info(img_metas, history_img_metas)\n\n        _bev_feats, mlvl_feats = self.backbone(img, img_metas, num_prev_frames, history_bev_feats, history_img_metas, all_history_coord,\n                    points=None, img_backbone_gradient=img_backbone_gradient)\n        # Neck for curr\n        bev_feats = self.neck(_bev_feats)\n\n        if self.skip_vector_head or num_prev_frames == 0:\n            # Transform prev-frame feature & pts to curr frame using the relative pose\n            assert track_query_info is None\n            self.temporal_propagate(bev_feats, img_metas, all_history_curr2prev, \n                        all_history_prev2curr, _use_memory, track_query_info, timestep=num_prev_frames, get_trans_loss=False)\n        else:\n            trans_loss_dict = self.temporal_propagate(bev_feats, img_metas, all_history_curr2prev, \n                        all_history_prev2curr, _use_memory, track_query_info, timestep=num_prev_frames, get_trans_loss=True)            \n            all_trans_loss.append(trans_loss_dict)\n\n            ########################################################\n            # Debugging use: visualize the first-frame track query. and the corresponding \n            # ground-truth information     \n            # Do this for every timestep > 0\n            #assert num_prev_frames > 0\n            #self._viz_temporal_supervision(outputs_prev, track_query_info, gts_next[-1], gts_prev[-1], gt_semantic,\n            #        gts_semantic_prev, img_metas_next, img_metas_prev, timestep=num_prev_frames)\n            #import pdb; pdb.set_trace()\n            ########################################################\n\n        seg_preds, seg_feats, seg_loss, seg_dice_loss = self.seg_decoder(bev_feats, gt_semantic, \n                all_history_coord, return_loss=True)\n        \n        if not self.skip_vector_head:\n            memory_bank = self.memory_bank if _use_memory else None\n            # 3. run the head again and compute the loss for the second frame\n            preds_list, loss_dict, det_match_idxs, det_match_gt_idxs, gt_list = self.head(\n                bev_features=bev_feats, \n                img_metas=img_metas, \n                gts=gts,\n                track_query_info=track_query_info,\n                memory_bank=memory_bank,\n                return_loss=True)\n        else:\n            loss_dict = {}\n        \n        loss_dict['seg'] = seg_loss\n        loss_dict['seg_dice'] = seg_dice_loss\n\n        # format loss, average over all frames (2 frames for now)\n        loss = 0\n        losses_t = []\n        for loss_dict_t in (all_loss_dict_prev + [loss_dict,]):\n            loss_t = 0\n            for name, var in loss_dict_t.items():\n                loss_t = loss_t + var\n            losses_t.append(loss_t)\n            loss += loss_t\n        \n        for trans_loss_dict_t in all_trans_loss:\n            trans_loss_t = trans_loss_dict_t['f_trans'] + trans_loss_dict_t['b_trans']\n            loss += trans_loss_t\n        \n        # update the log\n        log_vars = {k: v.item() for k, v in loss_dict.items()}\n\n        for t, loss_dict_t in enumerate(all_loss_dict_prev):\n            log_vars_t = {k+'_t{}'.format(t): v.item() for k, v in loss_dict_t.items()}\n            log_vars.update(log_vars_t)\n        \n        for t, loss_t in enumerate(losses_t):\n            log_vars.update({'total_t{}'.format(t): loss_t.item()})\n        \n        for t, trans_loss_dict_t in enumerate(all_trans_loss):\n            log_vars_t = {k+'_t{}'.format(t): v.item() for k, v in trans_loss_dict_t.items()}\n            log_vars.update(log_vars_t)\n        \n        log_vars.update({'total': loss.item()})\n        num_sample = img.size(0)\n        return loss, log_vars, num_sample\n\n    @torch.no_grad()\n    def forward_test(self, img, points=None, img_metas=None, seq_info=None, **kwargs):\n        '''\n            inference pipeline\n        '''\n\n        assert img.shape[0] == 1, 'Only support bs=1 per-gpu for inference'\n\n        tokens = []\n        for img_meta in img_metas:\n            tokens.append(img_meta['token'])\n        \n        scene_name, local_idx, seq_length  = seq_info[0]\n        first_frame = (local_idx == 0)\n        img_metas[0]['local_idx'] = local_idx\n    \n        if first_frame:\n            if self.use_memory:\n                self.memory_bank.set_bank_size(self.test_time_history_steps)\n                #self.memory_bank.set_bank_size(self.mem_len)\n                self.memory_bank.init_memory(bs=1)\n            self.history_bev_feats_all = []\n            self.history_img_metas_all = []\n        \n        if self.use_memory:\n            self.memory_bank.curr_t = local_idx\n        \n        selected_mem_ids = self.select_memory_entries(self.history_img_metas_all, img_metas)\n        history_img_metas = [self.history_img_metas_all[idx] for idx in selected_mem_ids]\n        history_bev_feats = [self.history_bev_feats_all[idx] for idx in selected_mem_ids]\n\n        all_history_curr2prev, all_history_prev2curr, all_history_coord =  \\\n                    self.process_history_info(img_metas, history_img_metas)\n\n        _bev_feats, mlvl_feats = self.backbone(img, img_metas, local_idx, history_bev_feats, history_img_metas,\n                        all_history_coord, points=points)\n        \n        img_shape = [_bev_feats.shape[2:] for i in range(_bev_feats.shape[0])]\n        # Neck\n        bev_feats = self.neck(_bev_feats)\n\n        if self.skip_vector_head or first_frame:\n            self.temporal_propagate(bev_feats, img_metas, all_history_curr2prev, \\\n                    all_history_prev2curr, self.use_memory, track_query_info=None)\n            seg_preds, seg_feats = self.seg_decoder(bev_features=bev_feats, return_loss=False)\n            if not self.skip_vector_head:\n                preds_list = self.head(bev_feats, img_metas=img_metas, return_loss=False)\n            track_dict = None\n        else:\n            # Using the saved prev-frame output to prepare the track query inputs\n            track_query_info = self.head.get_track_info(scene_name, local_idx)\n            # Transform prev-frame feature & pts to curr frame using the relative pose\n            self.temporal_propagate(bev_feats, img_metas, all_history_curr2prev, \n                all_history_prev2curr, self.use_memory, track_query_info)\n            seg_preds, seg_feats = self.seg_decoder(bev_features=bev_feats, return_loss=False)\n\n            # Run the vector map decoder with instance-level memory\n            memory_bank = self.memory_bank if self.use_memory else None\n            preds_list = self.head(bev_feats, img_metas=img_metas, \n                        track_query_info=track_query_info, memory_bank=memory_bank,\n                        return_loss=False)\n            track_dict = self._process_track_query_info(track_query_info)\n            \n        if not self.skip_vector_head:\n            # take predictions from the last layer\n            preds_dict = preds_list[-1]\n        else:\n            preds_dict = None\n\n        # Save the BEV and meta-info history \n        self.history_bev_feats_all.append(bev_feats)\n        self.history_img_metas_all.append(img_metas)\n\n        if len(self.history_bev_feats_all) > self.test_time_history_steps:\n            self.history_bev_feats_all.pop(0)\n            self.history_img_metas_all.pop(0)\n        \n        if not self.skip_vector_head:\n            memory_bank = self.memory_bank if self.use_memory else None\n            thr_det = 0.4 if first_frame else 0.6\n            pos_results = self.head.prepare_temporal_propagation(preds_dict, scene_name, local_idx, \n                                        memory_bank, thr_track=0.5, thr_det=thr_det)\n    \n        if not self.skip_vector_head:\n            results_list = self.head.post_process(preds_dict, tokens, track_dict)\n            results_list[0]['pos_results'] = pos_results\n            results_list[0]['meta'] = img_metas[0]\n        else:\n            results_list = [{'vectors': [],\n                'scores': [],\n                'labels': [],\n                'props': [],\n                'token': token} for token in tokens]\n\n        # Add the segmentation preds to the results to be saved\n        for b_i in range(len(results_list)):\n            tmp_scores, tmp_labels = seg_preds[b_i].max(0)\n            tmp_scores = tmp_scores.sigmoid()\n            preds_i = torch.zeros(tmp_labels.shape, dtype=torch.uint8).to(tmp_scores.device)\n            pos_ids = tmp_scores >= 0.4\n            preds_i[pos_ids] = tmp_labels[pos_ids].type(torch.uint8) + 1\n            preds_i = preds_i.cpu().numpy()\n            results_list[b_i]['semantic_mask'] = preds_i\n            if 'token' not in results_list[b_i]:\n                results_list[b_i]['token'] = tokens[b_i]\n\n        return results_list\n\n    def batch_data(self, vectors, imgs, img_metas, device, points=None):\n        bs = len(vectors)\n        # filter none vector's case\n        num_gts = []\n        for idx in range(bs):\n            num_gts.append(sum([len(v) for k, v in vectors[idx].items()]))\n        valid_idx = [i for i in range(bs) if num_gts[i] > 0]\n        assert len(valid_idx) == bs # make sure every sample has gts\n\n        all_labels_list = []\n        all_lines_list = []\n        all_gt2local = []\n        all_local2gt = []\n        for idx in range(bs):\n            labels = []\n            lines = []\n            gt2local = []\n            local2gt = {}\n            for label, _lines in vectors[idx].items():\n                for _ins_id, _line in enumerate(_lines):\n                    labels.append(label)\n                    gt2local.append([label, _ins_id])\n                    local2gt[(label, _ins_id)] = len(lines)\n                    if len(_line.shape) == 3: # permutation\n                        num_permute, num_points, coords_dim = _line.shape\n                        lines.append(torch.tensor(_line).reshape(num_permute, -1)) # (38, 40)\n                    elif len(_line.shape) == 2:\n                        lines.append(torch.tensor(_line).reshape(-1)) # (40, )\n                    else:\n                        assert False\n\n            all_labels_list.append(torch.tensor(labels, dtype=torch.long).to(device))\n            all_lines_list.append(torch.stack(lines).float().to(device))\n            all_gt2local.append(gt2local)\n            all_local2gt.append(local2gt)\n\n        gts = {\n            'labels': all_labels_list,\n            'lines': all_lines_list,\n            'gt2local': all_gt2local,\n            'local2gt': all_local2gt,\n        }\n\n        gts = [deepcopy(gts) for _ in range(self.num_decoder_layers)]\n\n        return gts, imgs, img_metas, valid_idx, points\n    \n    def get_two_frame_matching(self, local2global_prev, local2global_curr, gts_prev, gts):\n        \"\"\"\n        Get the G.T. matching between the two frames\n        Terminology: (1). local --> local idx inside each category;\n                    (2). global --> global instance id inside category\n                    (3). gt --> index in the flattened G.T. sequence\n        Args:\n            prev_ins_ids (_type_): global ids (pre-prepared) for prev frame\n            curr_ins_ids (_type_): global ids (pre-prepared) for curr frame\n            gts_prev (_type_): processed G.T. for prev frame\n            gts (_type_): processed G.T. for curr frame\n        \"\"\"\n        bs = len(local2global_prev)\n        gt2local_curr = gts[-1]['gt2local'] # don't need the per-block supervision, just take one\n        gt2local_prev = gts_prev[-1]['gt2local']\n        local2gt_prev = gts_prev[-1]['local2gt']\n\n        # the comma is to take the single-element output from multi_apply\n        global2local_prev, = multi_apply(self._reverse_id_mapping, local2global_prev)\n\n        all_gt_cur2prev, all_gt_prev2cur = multi_apply(self._compute_cur2prev, gt2local_curr, gt2local_prev, local2gt_prev, \n                                        local2global_curr, global2local_prev)\n        \n        return all_gt_cur2prev, all_gt_prev2cur\n    \n    def _compute_cur2prev(self, gt2local_curr, gt2local_prev, local2gt_prev, \n                          local2global_curr, global2local_prev):\n        cur2prev = torch.zeros(len(gt2local_curr))\n        prev2cur = torch.zeros(len(gt2local_prev))\n        prev2cur[:] = -1\n        for gt_idx_curr in range(len(gt2local_curr)):\n            label = gt2local_curr[gt_idx_curr][0]\n            local_idx = gt2local_curr[gt_idx_curr][1]\n            seq_id = local2global_curr[label][local_idx]\n            if seq_id in global2local_prev[label]:\n                local_id_prev = global2local_prev[label][seq_id]\n                gt_idx_prev = local2gt_prev[(label, local_id_prev)]\n            else:\n                gt_idx_prev = -1\n            cur2prev[gt_idx_curr] = gt_idx_prev\n            if gt_idx_prev != -1: # there is a positive match in prev frame\n                prev2cur[gt_idx_prev] = gt_idx_curr # update the information\n            \n        return cur2prev, prev2cur\n                \n    def _reverse_id_mapping(self, id_mapping):\n        reversed_mapping = {}\n        for label, mapping in id_mapping.items():\n            r_map = {v:k for k,v in mapping.items()}\n            reversed_mapping[label] = r_map\n        return reversed_mapping,\n\n    def prepare_track_queries_and_targets(self, gts, prev_inds_list, prev_gt_inds_list, prev_matched_reg_cost,\n                     prev_gt_list, prev_out, gt_cur2prev, gt_prev2cur, metas_prev, use_memory, pos_th=0.4, timestep=None):\n        bs = len(prev_inds_list)\n        device = prev_out['lines'][0].device\n\n        targets = []\n        for b_i in range(bs):\n            results = {}\n            for key, val in gts[-1].items():\n                results[key] = val[b_i]\n            targets.append(results)\n                \n        # for each sample in the batch\n        for b_i, (target, prev_out_ind, prev_target_ind) in enumerate(zip(targets, prev_inds_list, prev_gt_inds_list)):\n            scene_seq_id = metas_prev[b_i]['local_idx']\n\n            scores = prev_out['scores'][b_i].detach()\n            scores, labels = scores.max(-1)\n            scores = scores.sigmoid()\n\n            match_cost = prev_matched_reg_cost[b_i]\n            target_prev2cur = gt_prev2cur[b_i].to(device)\n            target['prev_target_ind'] = prev_target_ind # record the matched g.t. index\n            target['prev_out_ind'] = prev_out_ind\n            target['gt_prev2cur'] = target_prev2cur\n            assert len(target_prev2cur) == len(prev_gt_inds_list[b_i])\n\n            # 1). filter the ones with low scores, create FN; \n            prev_pos_scores = scores[prev_out_ind]\n            score_filter_mask = prev_pos_scores >= pos_th\n\n            keep_mask = score_filter_mask\n            prev_out_ind_filtered = prev_out_ind[keep_mask]\n            prev_target_ind_filtered = prev_target_ind[keep_mask]\n            \n            target_prev2cur = target_prev2cur[prev_target_ind_filtered]\n            target_ind_matching = (target_prev2cur != -1) # -1 means no matching g.t. in curr frame\n            # matched g.t. index in the current frame\n            target_ind_matched_idx = target_prev2cur[target_prev2cur!=-1]\n\n            target['track_query_match_ids'] = target_ind_matched_idx\n            \n            if timestep == 0:\n                pad_bound = self.head.num_queries\n            else:\n                pad_bound = self.tracked_query_length[b_i] + self.head.num_queries\n                \n            not_prev_out_ind = torch.arange(prev_out['lines'][b_i].shape[0]).to(device)\n            not_prev_out_ind = torch.tensor([\n                ind.item()\n                for ind in not_prev_out_ind\n                if ind not in prev_out_ind and ind < pad_bound])\n            \n            # Get all non-matched pred with >0.5 conf score, serve as FP\n            neg_scores = scores[not_prev_out_ind]\n            neg_score_mask = neg_scores >= pos_th\n            # Randomly pick 10% neg output instances and serve as FP\n            _rand_insert = torch.rand([len(neg_scores)]).to(device)\n\n            if self.track_fp_aug:\n                rand_insert_mask = _rand_insert >= 0.95\n                fp_select_mask = neg_score_mask | rand_insert_mask\n            else:\n                fp_select_mask = neg_score_mask\n\n            false_out_ind = not_prev_out_ind[fp_select_mask]\n\n            prev_out_ind_final = torch.tensor(prev_out_ind_filtered.tolist() + false_out_ind.tolist()).long()\n            target_ind_matching = torch.cat([\n                target_ind_matching,\n                torch.tensor([False, ] * len(false_out_ind)).bool().to(device)\n            ])\n\n            target_prev2cur_aug = torch.cat([\n                target_prev2cur,\n                torch.tensor([-1, ] * len(false_out_ind)).to(device)\n            ])\n            target['track_to_cur_gt_ids'] = target_prev2cur_aug\n\n            # track query masks\n            track_queries_mask = torch.ones_like(target_ind_matching).bool()\n            track_queries_fal_pos_mask = torch.zeros_like(target_ind_matching).bool()\n            track_queries_fal_pos_mask[~target_ind_matching] = True\n\n            # set prev frame info\n            target['track_query_hs_embeds'] = prev_out['hs_embeds'][b_i, prev_out_ind_final]\n            target['track_query_boxes'] = prev_out['lines'][b_i][prev_out_ind_final].detach()\n            tmp_labels = labels[prev_out_ind_final]\n            tmp_scores = scores[prev_out_ind_final]\n            target['track_query_labels'] = tmp_labels\n            target['track_query_scores'] = tmp_scores\n\n            # Prepare the G.T. line coords for the track queries, used in the transformation loss\n            prev_gt_lines = prev_gt_list['lines'][b_i] \n            prev_gt_labels = prev_gt_list['labels'][b_i] \n            target['track_query_gt_lines'] = prev_gt_lines[prev_out_ind_final]\n            target['track_query_gt_labels'] = prev_gt_labels[prev_out_ind_final]\n\n            target['track_queries_mask'] = torch.cat([\n                track_queries_mask,\n                torch.tensor([False, ] * self.head.num_queries).to(device)\n            ]).bool()\n\n            target['track_queries_fal_pos_mask'] = torch.cat([\n                track_queries_fal_pos_mask,\n                torch.tensor([False, ] * self.head.num_queries).to(device)\n            ]).bool()\n\n            if use_memory:\n                is_first_frame = (timestep == 0)\n                num_tracks = 0 if timestep == 0 else self.tracked_query_length[b_i]\n                self.memory_bank.update_memory(b_i, is_first_frame, prev_out_ind_final, prev_out, num_tracks, scene_seq_id, timestep)\n        \n        targets = self._batchify_tracks(targets)\n        return targets\n    \n    def _batchify_tracks(self, targets):\n        lengths = [len(t['track_queries_mask']) for t in targets]\n        max_len = max(lengths)\n        device = targets[0]['track_query_hs_embeds'].device\n        for b_i in range(len(lengths)):\n            target = targets[b_i]\n            padding_len = max_len - lengths[b_i]\n            pad_hs_embeds = torch.zeros([padding_len, target['track_query_hs_embeds'].shape[1]]).to(device)\n            pad_query_boxes = torch.zeros([padding_len, target['track_query_boxes'].shape[1]]).to(device)\n            query_padding_mask = torch.zeros([max_len]).bool().to(device)\n            query_padding_mask[lengths[b_i]:] = True\n            target['pad_hs_embeds'] = pad_hs_embeds\n            target['pad_query_boxes'] = pad_query_boxes\n            target['query_padding_mask'] = query_padding_mask\n            self.tracked_query_length[b_i] = lengths[b_i] - self.head.num_queries\n        return targets\n        \n    def train(self, *args, **kwargs):\n        super().train(*args, **kwargs)\n        if self.freeze_bev:\n            self._freeze_bev()\n        elif self.freeze_bev_iters is not None and self.num_iter < self.freeze_bev_iters:\n            self._freeze_bev()\n        else:\n            self._unfreeze_bev()\n\n    def eval(self):\n        super().eval()\n        \n    def _freeze_bev(self,):\n        \"\"\"Freeze all bev-related backbone parameters, including the backbone and the seg head\n        \"\"\"\n        for param in self.backbone.parameters():\n            param.requires_grad = False\n        for param in self.seg_decoder.parameters():\n            param.requires_grad = False\n    \n    def _unfreeze_bev(self,):\n        \"\"\"unfreeze all bev-related backbone parameters, including the backbone and the seg head\n        \"\"\"\n        for param in self.backbone.parameters():\n            param.requires_grad = True\n        for param in self.seg_decoder.parameters():\n            param.requires_grad = True\n    \n    def _denorm_lines(self, line_pts):\n        \"\"\"from (0,1) to the BEV space in meters\"\"\"\n        line_pts[..., 0] = line_pts[..., 0] * self.roi_size[0] \\\n                        - self.roi_size[0] / 2 \n        line_pts[..., 1] = line_pts[..., 1] * self.roi_size[1] \\\n                        - self.roi_size[1] / 2 \n        return line_pts\n\n    def _norm_lines(self, line_pts):\n        \"\"\"from the BEV space in meters to (0,1) \"\"\"\n        line_pts[..., 0] = (line_pts[..., 0] + self.roi_size[0] / 2) \\\n                                        / self.roi_size[0] \n        line_pts[..., 1] = (line_pts[..., 1] + self.roi_size[1] / 2) \\\n                                        / self.roi_size[1] \n        return line_pts\n\n    def _process_track_query_info(self, track_info):\n        bs = len(track_info)\n        all_scores = []\n        all_lines = []\n        for b_i in range(bs):\n            embeds = track_info[b_i]['track_query_hs_embeds']\n            scores = self.head.cls_branches[-1](embeds)\n            coords = self.head.reg_branches[-1](embeds).sigmoid()\n            coords = rearrange(coords, 'n1 (n2 n3) -> n1 n2 n3', n3=2)\n            all_scores.append(scores)\n            all_lines.append(coords)\n        track_results = {\n            'lines': all_lines,\n            'scores': all_scores,\n        }\n        return track_results\n    \n    def select_memory_entries(self, history_metas, curr_meta):\n        \"\"\"\n        Only used at test time, to select a subset from the long history bank\n        \"\"\"\n        if len(history_metas) <= self.history_steps:\n            return np.arange(len(history_metas))\n        else:\n            history_e2g_trans = np.array([item[0]['ego2global_translation'] for item in history_metas])[:, :2]\n            curr_e2g_trans = np.array(curr_meta[0]['ego2global_translation'])[:2]\n            dists = np.linalg.norm(history_e2g_trans - curr_e2g_trans[None, :], axis=1)\n\n            sorted_indices = np.argsort(dists)\n            sorted_dists = dists[sorted_indices]\n            covered = np.zeros_like(sorted_indices).astype(np.bool)\n            selected_ids = []\n            for dist_range in self.mem_select_dist_ranges[::-1]:\n                outter_valid_flags = (sorted_dists >= dist_range) & ~covered\n                if outter_valid_flags.any():\n                    pick_id = np.where(outter_valid_flags)[0][0]     \n                    covered[pick_id:] = True\n                else:\n                    inner_valid_flags = (sorted_dists < dist_range) & ~covered\n                    if inner_valid_flags.any():\n                        pick_id = np.where(inner_valid_flags)[0][-1]\n                        covered[pick_id] = True\n                    else:\n                        return np.arange(len(history_metas))[-4:]\n                selected_ids.append(pick_id)\n\n            selected_mem_ids = sorted_indices[np.array(selected_ids)]\n\n            return selected_mem_ids\n\n    #####################################################################\n    # \n    # Debugging visualization of the temporal propagation supervision\n    # \n    ##################################################################### \n\n    def _viz_temporal_supervision(self, outputs_prev, all_track_info, gts, gts_prev, semantic_mask, \n                                  semantic_mask_prev, img_metas, img_metas_prev, timestep):\n        \"\"\"For debugging use: draw the visualization of the track queries and the corresponding\n        matched G.T. information...\"\"\"\n        import os\n        from ..utils.renderer_track import Renderer\n        viz_dir = './viz/debug_noisy_trans'\n        if not os.path.exists(viz_dir):\n            os.makedirs(viz_dir)\n        cat2id = {\n            'ped_crossing': 0,\n            'divider': 1,\n            'boundary': 2,\n        }\n        renderer = Renderer(cat2id, self.roi_size, 'nusc')\n\n        for b_i in range(len(all_track_info)):\n            track_info = all_track_info[b_i]\n            # prev pred info\n            prev_pred_lines = outputs_prev['lines'][b_i]\n            prev_pred_scores = outputs_prev['scores'][b_i]\n            prev_target_inds = track_info['prev_target_ind']\n            prev_out_inds = track_info['prev_out_ind']\n            gt_prev2cur = track_info['gt_prev2cur']\n            prev_scores, prev_labels = prev_pred_scores.max(-1)\n            prev_scores = prev_scores.sigmoid()\n            prev_lines = rearrange(prev_pred_lines[prev_out_inds], 'n (k c) -> n k c', c=2)\n            prev_labels = prev_labels[prev_out_inds]\n            prev_lines = self._denorm_lines(prev_lines)\n            prev_scores = prev_scores[prev_out_inds]\n            out_path_prev = os.path.join(viz_dir, f't={timestep}_{b_i}_prev.png')\n            renderer.render_bev_from_vectors(prev_lines, prev_labels, out_path_prev, \n                id_info=prev_target_inds, score_info=prev_scores)\n\n            # gt info\n            gt_labels = gts['labels'][b_i]\n            gt_lines = torch.clip(gts['lines'][b_i][:, 0], 0, 1)\n            gt_lines = rearrange(gt_lines, 'n (k c) -> n k c', c=2)\n            gt_lines = self._denorm_lines(gt_lines)\n            out_path_gt = os.path.join(viz_dir, f't={timestep}_{b_i}_gt.png')\n            gt_ids = np.arange(len(gt_lines))\n            renderer.render_bev_from_vectors(gt_lines, gt_labels, out_path_gt, id_info=gt_ids)\n            gt_semantic = semantic_mask[b_i].cpu().numpy()\n            out_path_gt_semantic = os.path.join(viz_dir, f't={timestep}_{b_i}_gt_semantic.png')\n            renderer.render_bev_from_mask(gt_semantic, out_path_gt_semantic)\n\n            # gt info for prev frame\n            gt_labels = gts_prev['labels'][b_i]\n            gt_lines = torch.clip(gts_prev['lines'][b_i][:, 0], 0, 1)\n            gt_lines = rearrange(gt_lines, 'n (k c) -> n k c', c=2)\n            gt_lines = self._denorm_lines(gt_lines)\n            out_path_gt = os.path.join(viz_dir, f't={timestep}_{b_i}_prev_gt.png')\n            gt_ids = np.arange(len(gt_lines))\n            renderer.render_bev_from_vectors(gt_lines, gt_labels, out_path_gt, id_info=gt_ids)\n            gt_semantic = semantic_mask_prev[b_i].cpu().numpy()\n            out_path_gt_semantic = os.path.join(viz_dir, f't={timestep}_{b_i}_prev_gt_semantic.png')\n            renderer.render_bev_from_mask(gt_semantic, out_path_gt_semantic)\n\n            # track query info\n            track_to_cur_gt_ids = track_info['track_to_cur_gt_ids']\n            trans_track_lines = track_info['trans_track_query_boxes']\n            trans_track_lines = rearrange(trans_track_lines, 'n (k c) -> n k c', c=2)\n            trans_track_lines = self._denorm_lines(trans_track_lines)\n            #tp_track_mask = ~track_info['track_queries_fal_pos_mask'][:-100]\n            trans_track_lines = trans_track_lines\n            track_labels = track_info['track_query_labels']\n            track_scores = track_info['track_query_scores']\n            out_path_track = os.path.join(viz_dir, f't={timestep}_{b_i}_track.png')\n            renderer.render_bev_from_vectors(trans_track_lines, track_labels, out_path_track, \n                id_info=track_to_cur_gt_ids, score_info=track_scores)\n"
  },
  {
    "path": "plugin/models/mapers/__init__.py",
    "content": "from .MapTracker import MapTracker"
  },
  {
    "path": "plugin/models/mapers/base_mapper.py",
    "content": "from abc import ABCMeta, abstractmethod\n\nimport torch.nn as nn\nfrom mmcv.runner import auto_fp16\nfrom mmcv.utils import print_log\n\nfrom mmdet.utils import get_root_logger\nfrom mmdet3d.models.builder import DETECTORS\n\nMAPPERS = DETECTORS\n\nclass BaseMapper(nn.Module, metaclass=ABCMeta):\n    \"\"\"Base class for mappers.\"\"\"\n\n    def __init__(self):\n        super(BaseMapper, self).__init__()\n        self.fp16_enabled = False\n\n    @property\n    def with_neck(self):\n        \"\"\"bool: whether the detector has a neck\"\"\"\n        return hasattr(self, 'neck') and self.neck is not None\n\n    # TODO: these properties need to be carefully handled\n    # for both single stage & two stage detectors\n    @property\n    def with_shared_head(self):\n        \"\"\"bool: whether the detector has a shared head in the RoI Head\"\"\"\n        return hasattr(self, 'roi_head') and self.roi_head.with_shared_head\n\n    @property\n    def with_bbox(self):\n        \"\"\"bool: whether the detector has a bbox head\"\"\"\n        return ((hasattr(self, 'roi_head') and self.roi_head.with_bbox)\n                or (hasattr(self, 'bbox_head') and self.bbox_head is not None))\n\n    @property\n    def with_mask(self):\n        \"\"\"bool: whether the detector has a mask head\"\"\"\n        return ((hasattr(self, 'roi_head') and self.roi_head.with_mask)\n                or (hasattr(self, 'mask_head') and self.mask_head is not None))\n\n    #@abstractmethod\n    def extract_feat(self, imgs):\n        \"\"\"Extract features from images.\"\"\"\n        pass\n\n    def forward_train(self, *args, **kwargs):\n        pass\n\n    #@abstractmethod\n    def simple_test(self, img, img_metas, **kwargs):\n        pass\n\n    #@abstractmethod\n    def aug_test(self, imgs, img_metas, **kwargs):\n        \"\"\"Test function with test time augmentation.\"\"\"\n        pass\n\n    def init_weights(self, pretrained=None):\n        \"\"\"Initialize the weights in detector.\n\n        Args:\n            pretrained (str, optional): Path to pre-trained weights.\n                Defaults to None.\n        \"\"\"\n        if pretrained is not None:\n            logger = get_root_logger()\n            print_log(f'load model from: {pretrained}', logger=logger)\n\n    def forward_test(self, *args, **kwargs):\n        \"\"\"\n        Args:\n        \"\"\"\n        if True:\n            self.simple_test()\n        else:\n            self.aug_test()\n\n    # @auto_fp16(apply_to=('img', ))\n    def forward(self, *args, return_loss=True, **kwargs):\n        \"\"\"Calls either :func:`forward_train` or :func:`forward_test` depending\n        on whether ``return_loss`` is ``True``.\n\n        Note this setting will change the expected inputs. When\n        ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor\n        and List[dict]), and when ``resturn_loss=False``, img and img_meta\n        should be double nested (i.e.  List[Tensor], List[List[dict]]), with\n        the outer list indicating test time augmentations.\n        \"\"\"\n        if return_loss:\n            return self.forward_train(*args, **kwargs)\n        else:\n            kwargs.pop('rescale')\n            return self.forward_test(*args, **kwargs)\n\n    def train_step(self, data_dict, optimizer):\n        \"\"\"The iteration step during training.\n\n        This method defines an iteration step during training, except for the\n        back propagation and optimizer updating, which are done in an optimizer\n        hook. Note that in some complicated cases or models, the whole process\n        including back propagation and optimizer updating is also defined in\n        this method, such as GAN.\n\n        Args:\n            data_dict (dict): The output of dataloader.\n            optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of\n                runner is passed to ``train_step()``. This argument is unused\n                and reserved.\n\n        Returns:\n            dict: It should contain at least 3 keys: ``loss``, ``log_vars``, \\\n                ``num_samples``.\n\n                - ``loss`` is a tensor for back propagation, which can be a \\\n                weighted sum of multiple losses.\n                - ``log_vars`` contains all the variables to be sent to the\n                logger.\n                - ``num_samples`` indicates the batch size (when the model is \\\n                DDP, it means the batch size on each GPU), which is used for \\\n                averaging the logs.\n        \"\"\"\n        loss, log_vars, num_samples = self(**data_dict)\n        \n        outputs = dict(\n            loss=loss, log_vars=log_vars, num_samples=num_samples)\n\n        return outputs\n\n    def val_step(self, data, optimizer):\n        \"\"\"The iteration step during validation.\n\n        This method shares the same signature as :func:`train_step`, but used\n        during val epochs. Note that the evaluation after training epochs is\n        not implemented with this method, but an evaluation hook.\n        \"\"\"\n        loss, log_vars, num_samples = self(**data)\n        \n        outputs = dict(\n            loss=loss, log_vars=log_vars, num_samples=num_samples)\n\n        return outputs\n\n    def show_result(self,\n                    **kwargs):\n        img = None\n        return img"
  },
  {
    "path": "plugin/models/mapers/vector_memory.py",
    "content": "import torch\nfrom torch import nn\n\nfrom einops import repeat, rearrange\nfrom scipy.spatial.transform import Rotation as R\nimport numpy as np\n\n\ndef get_emb(sin_inp):\n    \"\"\"\n    Gets a base embedding for one dimension with sin and cos intertwined\n    \"\"\"\n    emb = torch.stack((sin_inp.sin(), sin_inp.cos()), dim=-1)\n    return torch.flatten(emb, -2, -1)\n\n\nclass PositionalEncoding1D(nn.Module):\n    def __init__(self, channels):\n        \"\"\"\n        :param channels: The last dimension of the tensor you want to apply pos emb to.\n        \"\"\"\n        super(PositionalEncoding1D, self).__init__()\n        self.org_channels = channels\n        channels = int(np.ceil(channels / 2) * 2)\n        self.channels = channels\n        inv_freq = 1.0 / (10000 ** (torch.arange(0, channels, 2).float() / channels))\n        self.register_buffer(\"inv_freq\", inv_freq)\n        self.register_buffer(\"cached_penc\", None)\n\n    def forward(self, tensor):\n        \"\"\"\n        :param tensor: A 3d tensor of size (batch_size, x, ch)\n        :return: Positional Encoding Matrix of size (batch_size, x, ch)\n        \"\"\"\n        if len(tensor.shape) != 3:\n            raise RuntimeError(\"The input tensor has to be 3d!\")\n\n        if self.cached_penc is not None and self.cached_penc.shape == tensor.shape:\n            return self.cached_penc\n\n        self.cached_penc = None\n        batch_size, x, orig_ch = tensor.shape\n        pos_x = torch.arange(x, device=tensor.device).type(self.inv_freq.type())\n        sin_inp_x = torch.einsum(\"i,j->ij\", pos_x, self.inv_freq)\n        emb_x = get_emb(sin_inp_x)\n        emb = torch.zeros((x, self.channels), device=tensor.device).type(tensor.type())\n        emb[:, : self.channels] = emb_x\n\n        self.cached_penc = emb[None, :, :orig_ch].repeat(batch_size, 1, 1)\n        return self.cached_penc\n\n\nclass VectorInstanceMemory(nn.Module):\n\n    def __init__(self,\n                 dim_in, number_ins, bank_size, mem_len, mem_select_dist_ranges\n                 ):\n        super().__init__()\n        self.max_number_ins = 3 * number_ins # make sure this is not exceeded at initial training when results could be quite random\n        self.bank_size = bank_size\n        self.mem_len = mem_len\n        self.dim_in = dim_in\n        self.mem_select_dist_ranges = mem_select_dist_ranges\n\n        p_enc_1d = PositionalEncoding1D(dim_in)\n        fake_tensor = torch.zeros((1, 1000, dim_in)) # suppose all sequences are shorter than 1000\n        self.cached_pe = p_enc_1d(fake_tensor)[0]\n\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n    \n    def set_bank_size(self, bank_size):\n        self.bank_size = bank_size\n\n    def init_memory(self, bs):\n        self.mem_bank = torch.zeros((self.bank_size, bs, self.max_number_ins, self.dim_in), dtype=torch.float32).cuda()\n        self.mem_bank_seq_id = torch.zeros((self.bank_size, bs, self.max_number_ins), dtype=torch.long).cuda()\n        self.mem_bank_trans = torch.zeros((self.bank_size, bs,  3),dtype=torch.float32).cuda()\n        self.mem_bank_rot = torch.zeros((self.bank_size, bs, 3, 3),dtype=torch.float32).cuda()\n        self.batch_mem_embeds_dict = {}\n        self.batch_mem_relative_pe_dict = {}\n        self.batch_key_padding_dict = {}\n        self.curr_rot = torch.zeros((bs,3,3),dtype=torch.float32).cuda()\n        self.curr_trans = torch.zeros((bs,3),dtype=torch.float32).cuda()\n        self.gt_lines_info = {}\n\n        # memory recording information\n        self.instance2mem = [{} for _ in range(bs)]\n        self.num_ins = [0 for _ in range(bs)]\n        self.active_mem_ids = [None for _ in range(bs)]\n        self.valid_track_idx = [None for _ in range(bs)]\n        self.random_bev_masks = [None for _ in range(bs)]\n        init_entry_length = torch.tensor([0]*self.max_number_ins).long()\n        self.mem_entry_lengths = [init_entry_length.clone() for _ in range(bs)]\n\n    def update_memory(self, batch_i, is_first_frame, propagated_ids, prev_out, num_tracks, \n                      seq_idx, timestep):\n        if is_first_frame:\n            mem_instance_ids = torch.arange(propagated_ids.shape[0])\n            track2mem_info = {i: i for i in range(len(propagated_ids))}\n            num_instances = len(propagated_ids)\n        else:\n            track2mem_info_prev = self.instance2mem[batch_i]\n            track2mem_info = {}\n            num_instances = self.num_ins[batch_i]\n            for pred_i, propagated_id in enumerate(propagated_ids):\n                if propagated_id < num_tracks: # existing tracks\n                    track2mem_info[pred_i] = track2mem_info_prev[propagated_id.item()]\n                else: # newborn instances\n                    track2mem_info[pred_i] = num_instances\n                    num_instances += 1\n            mem_instance_ids = torch.tensor([track2mem_info[item] for item in range(len(propagated_ids))]).long()\n        \n        assert num_instances < self.max_number_ins, 'Number of instances larger than mem size!'\n\n        #NOTE: put information into the memory, need to detach the scores to block gradient backprop \n        # from future time steps\n        prev_embeddings = prev_out['hs_embeds'][batch_i]\n        prev_scores = prev_out['scores'][batch_i]\n        prev_scores, prev_labels = prev_scores.max(-1)\n        prev_scores = prev_scores.sigmoid().detach()\n        \n        mem_lens_per_ins = self.mem_entry_lengths[batch_i][mem_instance_ids]\n\n        # insert information into mem bank\n        for ins_idx, mem_id in enumerate(mem_instance_ids):\n            if mem_lens_per_ins[ins_idx] < self.bank_size:\n                self.mem_bank[mem_lens_per_ins[ins_idx], batch_i, mem_id] = prev_embeddings[propagated_ids[ins_idx]]\n                self.mem_bank_seq_id[mem_lens_per_ins[ins_idx], batch_i, mem_id] = seq_idx\n            else:\n                self.mem_bank[:self.bank_size-1, batch_i, mem_id] = self.mem_bank[1:self.bank_size, batch_i, mem_id]\n                self.mem_bank[-1, batch_i, mem_id] = prev_embeddings[propagated_ids[ins_idx]]\n                self.mem_bank_seq_id[:self.bank_size-1, batch_i, mem_id] = self.mem_bank_seq_id[1:self.bank_size, batch_i, mem_id]\n                self.mem_bank_seq_id[-1, batch_i, mem_id] = seq_idx\n\n        if self.curr_t < self.bank_size:\n            self.mem_bank_rot[self.curr_t, batch_i] = self.curr_rot[batch_i]\n            self.mem_bank_trans[self.curr_t, batch_i] = self.curr_trans[batch_i]\n        else:\n            self.mem_bank_rot[:self.bank_size-1, batch_i] = self.mem_bank_rot[1:, batch_i].clone()\n            self.mem_bank_rot[-1, batch_i] = self.curr_rot[batch_i]\n            self.mem_bank_trans[:self.bank_size-1, batch_i] = self.mem_bank_trans[1:, batch_i].clone()\n            self.mem_bank_trans[-1, batch_i] = self.curr_trans[batch_i]\n\n        # Update the mem recording information\n        self.instance2mem[batch_i] = track2mem_info\n        self.num_ins[batch_i] = num_instances\n        self.mem_entry_lengths[batch_i][mem_instance_ids] += 1\n        self.active_mem_ids[batch_i] = mem_instance_ids.long().to(propagated_ids.device)\n        active_mem_entry_lens = self.mem_entry_lengths[batch_i][self.active_mem_ids[batch_i]]\n        self.valid_track_idx[batch_i] = torch.where(active_mem_entry_lens >= 1)[0]\n\n        #print('Active memory ids:', self.active_mem_ids[batch_i])\n        #print('Memory entry lens:', active_mem_entry_lens)\n        #print('Valid track idx:', self.valid_track_idx[batch_i])\n\n    def prepare_transformation_batch(self,history_e2g_trans,history_e2g_rot,curr_e2g_trans,curr_e2g_rot):\n        history_g2e_matrix = torch.stack([torch.eye(4, dtype=torch.float64, device=history_e2g_trans.device),]*len(history_e2g_trans), dim=0)\n        history_g2e_matrix[:, :3, :3] = torch.transpose(history_e2g_rot, 1, 2)\n        history_g2e_matrix[:, :3, 3] = -torch.bmm(torch.transpose(history_e2g_rot, 1, 2), history_e2g_trans[..., None]).squeeze(-1)\n\n        curr_g2e_matrix = torch.eye(4, dtype=torch.float64, device=history_e2g_trans.device)\n        curr_g2e_matrix[:3, :3] = curr_e2g_rot.T\n        curr_g2e_matrix[:3, 3] = -(curr_e2g_rot.T @ curr_e2g_trans)\n\n        curr_e2g_matrix = torch.eye(4, dtype=torch.float64, device=history_e2g_trans.device)\n        curr_e2g_matrix[:3, :3] = curr_e2g_rot\n        curr_e2g_matrix[:3, 3] = curr_e2g_trans\n\n        history_e2g_matrix = torch.stack([torch.eye(4, dtype=torch.float64, device=history_e2g_trans.device),]*len(history_e2g_trans), dim=0)\n        history_e2g_matrix[:, :3, :3] = history_e2g_rot\n        history_e2g_matrix[:, :3, 3] = history_e2g_trans\n\n        history_curr2prev_matrix = torch.bmm(history_g2e_matrix, repeat(curr_e2g_matrix,'n1 n2 -> r n1 n2', r=len(history_g2e_matrix)))\n        history_prev2curr_matrix = torch.bmm(repeat(curr_g2e_matrix, 'n1 n2 -> r n1 n2', r=len(history_e2g_matrix)), history_e2g_matrix)\n        \n        return history_curr2prev_matrix, history_prev2curr_matrix\n\n    def clear_dict(self,):\n        self.batch_mem_embeds_dict = {}\n        self.batch_mem_relative_pe_dict = {}\n        self.batch_key_padding_dict = {}\n\n    def trans_memory_bank(self, query_prop, b_i, metas):\n        seq_id = metas['local_idx']\n        \n        active_mem_ids = self.active_mem_ids[b_i]\n        mem_entry_lens = self.mem_entry_lengths[b_i][active_mem_ids]\n        num_track_ins = len(active_mem_ids)\n        valid_mem_len = min(self.curr_t, self.mem_len)\n        valid_bank_size = min(self.curr_t, self.bank_size)\n        mem_trans = self.mem_bank_trans[:, b_i]\n        mem_rots = self.mem_bank_rot[:, b_i]\n\n        if self.training:\n            # Note: at training time, bank_size must be the same as mem_len, no selection needed\n            assert self.mem_len == self.bank_size, 'at training time, bank_size must be the same as mem_len'\n            mem_embeds = self.mem_bank[:, b_i, active_mem_ids]\n            mem_seq_ids = self.mem_bank_seq_id[:, b_i, active_mem_ids]\n        else:\n            # at test time, the bank size can be much longer, and we need the selection strategy\n            mem_embeds = torch.zeros_like(self.mem_bank[:self.mem_len, b_i, active_mem_ids])\n            mem_seq_ids = torch.zeros_like(self.mem_bank_seq_id[:self.mem_len, b_i, active_mem_ids])\n\n        # Put information into mem embeddings and pos_ids, prepare for attention-fusion\n        # Also prepare the pose information for the query propagation\n        all_pose_select_indices = []\n        all_select_indices = []\n        for idx, active_idx in enumerate(active_mem_ids):\n            effective_len = mem_entry_lens[idx]\n            valid_mem_trans = mem_trans[:valid_bank_size]\n            trunc_eff_len = min(effective_len, self.bank_size)\n            valid_pose_ids = torch.arange(valid_bank_size-trunc_eff_len, valid_bank_size)\n            #print('ins {}, valid pose ids {}'.format(idx, valid_pose_ids))\n            if effective_len <= self.mem_len:\n                select_indices = torch.arange(effective_len)\n            else:\n                select_indices = self.select_memory_entries(valid_mem_trans[-trunc_eff_len:], metas)\n            pose_select_indices = valid_pose_ids[select_indices]\n            mem_embeds[:len(select_indices), idx] = self.mem_bank[select_indices, b_i, active_idx]\n            mem_seq_ids[:len(select_indices), idx] = self.mem_bank_seq_id[select_indices, b_i, active_idx]\n            all_pose_select_indices.append(pose_select_indices)\n            all_select_indices.append(select_indices)\n        \n        # prepare mem padding mask\n        key_padding_mask = torch.ones((self.mem_len, num_track_ins)).bool().cuda()\n        padding_trunc_loc = torch.clip(mem_entry_lens, max=self.mem_len)\n        for ins_i in range(num_track_ins):\n            key_padding_mask[:padding_trunc_loc[ins_i], ins_i] = False\n        key_padding_mask = key_padding_mask.T\n\n        # prepare relative seq idx gap\n        relative_seq_idx = torch.zeros_like(mem_embeds[:,:,0]).long()\n        relative_seq_idx[:valid_mem_len] = seq_id - mem_seq_ids[:valid_mem_len]\n        relative_seq_pe = self.cached_pe[relative_seq_idx].to(mem_embeds.device)\n\n        # prepare relative pose information for each active instance\n        curr2prev_matrix, prev2curr_matrix = self.prepare_transformation_batch(mem_trans[:valid_bank_size],\n            mem_rots[:valid_bank_size], self.curr_trans[b_i], self.curr_rot[b_i])\n        pose_matrix = prev2curr_matrix.float()[:,:3]\n        rot_mat = pose_matrix[..., :3].cpu().numpy()\n        rot = R.from_matrix(rot_mat)\n        translation = pose_matrix[..., 3] \n\n        if self.training:\n            rot, translation = self.add_noise_to_pose(rot, translation)\n\n        rot_quat = torch.tensor(rot.as_quat()).float().to(pose_matrix.device)\n        pose_info = torch.cat([rot_quat, translation], dim=1)\n        pose_info_per_ins = torch.zeros((valid_mem_len, num_track_ins, pose_info.shape[1])).to(pose_info.device)\n\n        for ins_idx in range(num_track_ins):\n            pose_select_indices = all_pose_select_indices[ins_idx]\n            pose_info_per_ins[:len(pose_select_indices), ins_idx] = pose_info[pose_select_indices]\n\n        mem_embeds_new = mem_embeds.clone()\n        mem_embeds_valid = rearrange(mem_embeds[:valid_mem_len], 't n c -> (t n) c')\n        pose_info_per_ins = rearrange(pose_info_per_ins, 't n c -> (t n) c')\n        mem_embeds_prop = query_prop(\n            mem_embeds_valid,\n            pose_info_per_ins\n        )\n        mem_embeds_new[:valid_mem_len] = rearrange(mem_embeds_prop, '(t n) c -> t n c', t=valid_mem_len)\n\n        self.batch_mem_embeds_dict[b_i] = mem_embeds_new.clone().detach()\n        self.batch_mem_relative_pe_dict[b_i] = relative_seq_pe\n        self.batch_key_padding_dict[b_i] = key_padding_mask\n    \n    def add_noise_to_pose(self, rot, trans):\n        rot_euler = rot.as_euler('zxy')\n        # 0.08 mean is around 5-degree, 3-sigma is 15-degree\n        noise_euler = np.random.randn(*list(rot_euler.shape)) * 0.08\n        rot_euler += noise_euler\n        noisy_rot = R.from_euler('zxy', rot_euler)\n\n        # error within 0.25 meter\n        noise_trans = torch.randn_like(trans) * 0.25\n        noise_trans[:, 2] = 0\n        noisy_trans = trans + noise_trans\n\n        return noisy_rot, noisy_trans\n\n    def select_memory_entries(self, mem_trans, curr_meta):\n        history_e2g_trans = mem_trans[:, :2].cpu().numpy()\n        curr_e2g_trans = np.array(curr_meta['ego2global_translation'][:2])\n        dists = np.linalg.norm(history_e2g_trans - curr_e2g_trans[None, :], axis=1)\n\n        sorted_indices = np.argsort(dists)\n        sorted_dists = dists[sorted_indices]\n        covered = np.zeros_like(sorted_indices).astype(np.bool)\n        selected_ids = []\n        for dist_range in self.mem_select_dist_ranges[::-1]:\n            outter_valid_flags = (sorted_dists >= dist_range) & ~covered\n            if outter_valid_flags.any():\n                pick_id = np.where(outter_valid_flags)[0][0]     \n                covered[pick_id:] = True\n            else:\n                inner_valid_flags = (sorted_dists < dist_range) & ~covered\n                if inner_valid_flags.any():\n                    pick_id = np.where(inner_valid_flags)[0][-1]\n                    covered[pick_id] = True\n                else:\n                    # return the mem_len closest one, but in the order of far -> close\n                    return np.array(sorted_indices[:4][::-1])\n            selected_ids.append(pick_id)\n\n        selected_mem_ids = sorted_indices[np.array(selected_ids)]\n        return selected_mem_ids\n        \n    \n            \n"
  },
  {
    "path": "plugin/models/necks/__init__.py",
    "content": "from .gru import ConvGRU"
  },
  {
    "path": "plugin/models/necks/gru.py",
    "content": "import torch\nimport torch.nn as nn\nfrom mmdet.models import NECKS\nfrom mmcv.cnn.utils import kaiming_init, constant_init\n\n\n@NECKS.register_module()\nclass ConvGRU(nn.Module):\n    def __init__(self, out_channels):\n        super(ConvGRU, self).__init__()\n        kernel_size = 1\n        padding = kernel_size // 2\n        self.convz = nn.Conv2d(2*out_channels, \n            out_channels, kernel_size=kernel_size, padding=padding, bias=False)\n        self.convr = nn.Conv2d(2*out_channels, \n            out_channels, kernel_size=kernel_size, padding=padding, bias=False)\n        self.convq = nn.Conv2d(2*out_channels, \n            out_channels, kernel_size=kernel_size, padding=padding, bias=False)\n        self.ln = nn.LayerNorm(out_channels)\n        self.zero_out = nn.Conv2d(out_channels, out_channels, 1, 1, bias=True)\n        \n\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                kaiming_init(m)\n        nn.init.zeros_(self.zero_out.weight)\n        nn.init.zeros_(self.zero_out.bias)\n        \n    def forward(self, h, x):\n        if len(h.shape) == 3:\n            h = h.unsqueeze(0)\n        if len(x.shape) == 3:\n            x = x.unsqueeze(0)\n        \n        hx = torch.cat([h, x], dim=1) # [1, 2c, h, w]\n        z = torch.sigmoid(self.convz(hx))\n        r = torch.sigmoid(self.convr(hx))\n        new_x = torch.cat([r * h, x], dim=1) # [1, 2c, h, w]\n        q = self.convq(new_x)\n\n        out = ((1 - z) * h + z * q) # (1, C, H, W)\n        out = self.ln(out.permute(0, 2, 3, 1)).permute(0, 3, 1, 2).contiguous()\n        out = self.zero_out(out)\n        out = out + x\n        out = out.squeeze(0)\n\n        return out\n"
  },
  {
    "path": "plugin/models/transformer_utils/CustomMSDeformableAttention.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n\nfrom mmcv.ops.multi_scale_deform_attn import multi_scale_deformable_attn_pytorch\nimport mmcv\nimport cv2 as cv\nimport copy\nimport warnings\nfrom matplotlib import pyplot as plt\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import xavier_init, constant_init\nfrom mmcv.cnn.bricks.registry import (ATTENTION,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import TransformerLayerSequence\nimport math\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\nfrom mmcv.utils import (ConfigDict, build_from_cfg, deprecated_api_warning,\n                        to_2tuple)\n\nfrom mmcv.utils import ext_loader\nfrom mmcv.ops.multi_scale_deform_attn import (MultiScaleDeformableAttnFunction,\n                                              multi_scale_deformable_attn_pytorch)\nfrom .fp16_dattn import MultiScaleDeformableAttnFunctionFp32\n\n@ATTENTION.register_module()\nclass CustomMSDeformableAttention(BaseModule):\n    \"\"\"An attention module used in Deformable-Detr.\n\n    `Deformable DETR: Deformable Transformers for End-to-End Object Detection.\n    <https://arxiv.org/pdf/2010.04159.pdf>`_.\n\n    Args:\n        embed_dims (int): The embedding dimension of Attention.\n            Default: 256.\n        num_heads (int): Parallel attention heads. Default: 64.\n        num_levels (int): The number of feature map used in\n            Attention. Default: 4.\n        num_points (int): The number of sampling points for\n            each query in each head. Default: 4.\n        im2col_step (int): The step used in image_to_column.\n            Default: 64.\n        dropout (float): A Dropout layer on `inp_identity`.\n            Default: 0.1.\n        batch_first (bool): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: None.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims=256,\n                 num_heads=8,\n                 num_levels=4,\n                 num_points=4,\n                 im2col_step=64,\n                 dropout=0.1,\n                 use_sampling_offsets=True,\n                 batch_first=False,\n                 norm_cfg=None,\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        if embed_dims % num_heads != 0:\n            raise ValueError(f'embed_dims must be divisible by num_heads, '\n                             f'but got {embed_dims} and {num_heads}')\n        dim_per_head = embed_dims // num_heads\n        self.norm_cfg = norm_cfg\n        self.dropout = nn.Dropout(dropout)\n        self.batch_first = batch_first\n        self.fp16_enabled = False\n\n        # you'd better set dim_per_head to a power of 2\n        # which is more efficient in the CUDA implementation\n        def _is_power_of_2(n):\n            if (not isinstance(n, int)) or (n < 0):\n                raise ValueError(\n                    'invalid input for _is_power_of_2: {} (type: {})'.format(\n                        n, type(n)))\n            return (n & (n - 1) == 0) and n != 0\n\n        if not _is_power_of_2(dim_per_head):\n            warnings.warn(\n                \"You'd better set embed_dims in \"\n                'MultiScaleDeformAttention to make '\n                'the dimension of each attention head a power of 2 '\n                'which is more efficient in our CUDA implementation.')\n\n        self.im2col_step = im2col_step\n        self.embed_dims = embed_dims\n        self.num_levels = num_levels\n        self.num_heads = num_heads\n        self.num_points = num_points\n        self.use_sampling_offsets = use_sampling_offsets\n        if use_sampling_offsets:\n            self.sampling_offsets = nn.Linear(\n                embed_dims, num_heads * num_levels * num_points * 2)\n        self.attention_weights = nn.Linear(embed_dims,\n                                           num_heads * num_levels * num_points)\n        self.value_proj = nn.Linear(embed_dims, embed_dims)\n        self.output_proj = nn.Linear(embed_dims, embed_dims)\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\n        if self.use_sampling_offsets:\n            constant_init(self.sampling_offsets, 0.)\n            thetas = torch.arange(\n                self.num_heads,\n                dtype=torch.float32) * (2.0 * math.pi / self.num_heads)\n            grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\n            grid_init = (grid_init /\n                        grid_init.abs().max(-1, keepdim=True)[0]).view(\n                self.num_heads, 1, 1,\n                2).repeat(1, self.num_levels, self.num_points, 1)\n            for i in range(self.num_points):\n                grid_init[:, :, i, :] *= i + 1\n\n            self.sampling_offsets.bias.data = grid_init.view(-1)\n        constant_init(self.attention_weights, val=0., bias=0.)\n        xavier_init(self.value_proj, distribution='uniform', bias=0.)\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\n        self._is_init = True\n\n    @deprecated_api_warning({'residual': 'identity'},\n                            cls_name='MultiScaleDeformableAttention')\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                flag='decoder',\n                **kwargs):\n        \"\"\"Forward Function of MultiScaleDeformAttention.\n\n        Args:\n            query (Tensor): Query of Transformer with shape\n                (num_query, bs, embed_dims).\n            key (Tensor): The key tensor with shape\n                `(num_key, bs, embed_dims)`.\n            value (Tensor): The value tensor with shape\n                `(num_key, bs, embed_dims)`.\n            identity (Tensor): The tensor used for addition, with the\n                same shape as `query`. Default None. If None,\n                `query` will be used.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`. Default\n                None.\n            reference_points (Tensor):  The normalized reference\n                points with shape (bs, num_query, num_levels, num_points, 2),\n                all elements is range in [0, 1], top-left (0,0),\n                bottom-right (1, 1), including padding area.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_key].\n            spatial_shapes (Tensor): Spatial shape of features in\n                different levels. With shape (num_levels, 2),\n                last dimension represents (h, w).\n            level_start_index (Tensor): The start index of each level.\n                A tensor has shape ``(num_levels, )`` and can be represented\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\n\n        Returns:\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if value is None:\n            value = query\n\n        if identity is None:\n            identity = query\n        if query_pos is not None:\n            query = query + query_pos\n        if not self.batch_first:\n            # change to (bs, num_query ,embed_dims)\n            query = query.permute(1, 0, 2)\n            value = value.permute(1, 0, 2)\n\n        bs, num_query, _ = query.shape\n        bs, num_value, _ = value.shape\n        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value\n\n        value = self.value_proj(value)\n        if key_padding_mask is not None:\n            value = value.masked_fill(key_padding_mask[..., None], 0.0)\n        value = value.view(bs, num_value, self.num_heads, -1)\n\n        if self.use_sampling_offsets:\n            sampling_offsets = self.sampling_offsets(query).view(\n                bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)\n        else:\n            sampling_offsets = query.new_zeros((bs, num_query, self.num_heads, self.num_levels, self.num_points, 2))\n        \n        attention_weights = self.attention_weights(query).view(\n            bs, num_query, self.num_heads, self.num_levels * self.num_points)\n        attention_weights = attention_weights.softmax(-1)\n\n        attention_weights = attention_weights.view(bs, num_query,\n                                                   self.num_heads,\n                                                   self.num_levels,\n                                                   self.num_points)\n        \n        # TODO: try remove sampling offsets\n        offset_normalizer = torch.stack(\n            [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) # changed to (h, w)\n        _, _, num_points, _ = reference_points.shape\n        # (bs, num_queries, num_pts, 2) ->\n        # (bs, num_queries, num_heads, num_lvls, num_pts, 2) \n        reference_points = reference_points[:, :, None, None, :, :]\n        # reference_points[..., 1:2] = -reference_points[..., 1:2]\n        sampling_locations = reference_points + \\\n            (sampling_offsets # (bs, num_queries, num_heads, num_lvls, num_pts, 2) \n            / offset_normalizer[None, None, None, :, None, :])\n        assert list(sampling_locations.shape) == [bs, num_query, self.num_heads, self.num_levels, num_points, 2]\n        \n        if torch.cuda.is_available() and value.is_cuda:\n            # using fp16 deformable attention is unstable because it performs many sum operations\n            output = MultiScaleDeformableAttnFunctionFp32.apply(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n        else:\n            output = multi_scale_deformable_attn_pytorch(\n                value, spatial_shapes, sampling_locations, attention_weights)\n\n        output = self.output_proj(output)\n\n        if not self.batch_first:\n            # (num_query, bs ,embed_dims)\n            output = output.permute(1, 0, 2)\n\n        return self.dropout(output) + identity\n"
  },
  {
    "path": "plugin/models/transformer_utils/MapTransformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\nimport warnings\nimport copy\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import build_activation_layer, build_norm_layer, xavier_init\nfrom mmcv.cnn.bricks.registry import (TRANSFORMER_LAYER,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import (BaseTransformerLayer,\n                                         TransformerLayerSequence,\n                                         build_transformer_layer)\nfrom mmcv.runner.base_module import BaseModule, ModuleList\n\nfrom mmdet.models.utils.builder import TRANSFORMER\n\nfrom mmdet.models.utils.transformer import Transformer\n\nfrom .CustomMSDeformableAttention import CustomMSDeformableAttention\nfrom mmdet.models.utils.transformer import inverse_sigmoid\n\n    \n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass MapTransformerDecoder_new(BaseModule):\n    \"\"\"Implements the decoder in DETR transformer.\n    Args:\n        return_intermediate (bool): Whether to return intermediate outputs.\n        coder_norm_cfg (dict): Config of last normalization layer. Default:\n            `LN`.\n    \"\"\"\n\n    def __init__(self, \n                 transformerlayers=None, \n                 num_layers=None, \n                 prop_add_stage=0,\n                 return_intermediate=True,\n                 init_cfg=None):\n        \n        super().__init__(init_cfg)\n        if isinstance(transformerlayers, dict):\n            transformerlayers = [\n                copy.deepcopy(transformerlayers) for _ in range(num_layers)\n            ]\n        else:\n            assert isinstance(transformerlayers, list) and \\\n                   len(transformerlayers) == num_layers\n        self.num_layers = num_layers\n        self.layers = ModuleList()\n        for i in range(num_layers):\n            self.layers.append(build_transformer_layer(transformerlayers[i]))\n        self.embed_dims = self.layers[0].embed_dims\n        self.pre_norm = self.layers[0].pre_norm\n        self.return_intermediate = return_intermediate\n        self.prop_add_stage = prop_add_stage\n        assert prop_add_stage >= 0  and prop_add_stage < num_layers\n\n    def forward(self,\n                query,\n                key,\n                value,\n                query_pos,\n                key_padding_mask,\n                query_key_padding_mask,\n                reference_points,\n                spatial_shapes,\n                level_start_index,\n                reg_branches,\n                cls_branches,\n                predict_refine,\n                memory_bank=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoder`.\n        Args:\n            query (Tensor): Input query with shape\n                `(num_query, bs, embed_dims)`.\n            reference_points (Tensor): The reference\n                points of offset. has shape (bs, num_query, num_points, 2).\n            valid_ratios (Tensor): The radios of valid\n                points on the feature map, has shape\n                (bs, num_levels, 2)\n            reg_branch: (obj:`nn.ModuleList`): Used for\n                refining the regression results. Only would\n                be passed when with_box_refine is True,\n                otherwise would be passed a `None`.\n        Returns:\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\n                return_intermediate is `False`, otherwise it has shape\n                [num_layers, num_query, bs, embed_dims].\n        \"\"\"\n        num_queries, bs, embed_dims = query.shape\n        output = query\n        intermediate = []\n        intermediate_reference_points = []\n\n        for lid, layer in enumerate(self.layers):\n            tmp = reference_points.clone()\n            tmp[..., 1:2] = 1.0 - reference_points[..., 1:2] # reverse y-axis\n\n            output = layer(\n                output,\n                key,\n                value,\n                query_pos=query_pos,\n                key_padding_mask=key_padding_mask,\n                reference_points=tmp,\n                spatial_shapes=spatial_shapes,\n                level_start_index=level_start_index,\n                query_key_padding_mask=query_key_padding_mask,\n                memory_bank=memory_bank,\n                **kwargs)\n            \n            reg_points = reg_branches[lid](output.permute(1, 0, 2)) # (bs, num_q, 2*num_points)\n            bs, num_queries, num_points2 = reg_points.shape\n            reg_points = reg_points.view(bs, num_queries, num_points2//2, 2) # range (0, 1)\n            \n            if predict_refine:\n                new_reference_points = reg_points + inverse_sigmoid(\n                    reference_points\n                )\n                new_reference_points = new_reference_points.sigmoid()\n            else:\n                new_reference_points = reg_points.sigmoid() # (bs, num_q, num_points, 2)\n            \n            reference_points = new_reference_points.clone().detach()\n\n            if self.return_intermediate:\n                intermediate.append(output.permute(1, 0, 2)) # [(bs, num_q, embed_dims)]\n                intermediate_reference_points.append(new_reference_points) # (bs, num_q, num_points, 2)\n\n        if self.return_intermediate:\n            return intermediate, intermediate_reference_points\n\n        return output, reference_points\n\n@TRANSFORMER_LAYER.register_module()\nclass MapTransformerLayer(BaseTransformerLayer):\n    \"\"\"Base `TransformerLayer` for vision transformer.\n\n    It can be built from `mmcv.ConfigDict` and support more flexible\n    customization, for example, using any number of `FFN or LN ` and\n    use different kinds of `attention` by specifying a list of `ConfigDict`\n    named `attn_cfgs`. It is worth mentioning that it supports `prenorm`\n    when you specifying `norm` as the first element of `operation_order`.\n    More details about the `prenorm`: `On Layer Normalization in the\n    Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .\n\n    Args:\n        attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for `self_attention` or `cross_attention` modules,\n            The order of the configs in the list should be consistent with\n            corresponding attentions in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config. Default: None.\n        ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):\n            Configs for FFN, The order of the configs in the list should be\n            consistent with corresponding ffn in operation_order.\n            If it is a dict, all of the attention modules in operation_order\n            will be built with this config.\n        operation_order (tuple[str]): The execution order of operation\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\n            Support `prenorm` when you specifying first element as `norm`.\n            Default：None.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='LN').\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n        batch_first (bool): Key, Query and Value are shape\n            of (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 attn_cfgs=None,\n                 ffn_cfgs=dict(\n                     type='FFN',\n                     embed_dims=256,\n                     feedforward_channels=1024,\n                     num_fcs=2,\n                     ffn_drop=0.,\n                     act_cfg=dict(type='ReLU', inplace=True),\n                 ),\n                 operation_order=None,\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None,\n                 batch_first=False,\n                 **kwargs):\n\n        super().__init__(\n            attn_cfgs=attn_cfgs,\n            ffn_cfgs=ffn_cfgs,\n            operation_order=operation_order,\n            norm_cfg=norm_cfg,\n            init_cfg=init_cfg,\n            batch_first=batch_first,\n            **kwargs\n        )\n\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                memory_query=None,\n                query_pos=None,\n                key_pos=None,\n                attn_masks=None,\n                query_key_padding_mask=None,\n                key_padding_mask=None,\n                memory_bank=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoderLayer`.\n\n        **kwargs contains some specific arguments of attentions.\n\n        Args:\n            query (Tensor): The input query with shape\n                [num_queries, bs, embed_dims] if\n                self.batch_first is False, else\n                [bs, num_queries embed_dims].\n            key (Tensor): The key tensor with shape [num_keys, bs,\n                embed_dims] if self.batch_first is False, else\n                [bs, num_keys, embed_dims] .\n            value (Tensor): The value tensor with same shape as `key`.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`.\n                Default: None.\n            attn_masks (List[Tensor] | None): 2D Tensor used in\n                calculation of corresponding attention. The length of\n                it should equal to the number of `attention` in\n                `operation_order`. Default: None.\n            query_key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_queries]. Only used in `self_attn` layer.\n                Defaults to None.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_keys]. Default: None.\n\n        Returns:\n            Tensor: forwarded results with shape [num_queries, bs, embed_dims].\n        \"\"\"\n\n        if memory_bank is not None:\n            bs = query.shape[1]\n            all_valid_track_idx = []\n            for b_i in range(bs):\n                all_valid_track_idx.append(memory_bank.valid_track_idx[b_i])\n        \n        norm_index = 0\n        attn_index = 0\n        ffn_index = 0\n        identity = query\n        if attn_masks is None:\n            attn_masks = [None for _ in range(self.num_attn)]\n        elif isinstance(attn_masks, torch.Tensor):\n            attn_masks = [\n                copy.deepcopy(attn_masks) for _ in range(self.num_attn)\n            ]\n            warnings.warn(f'Use same attn_mask in all attentions in '\n                          f'{self.__class__.__name__} ')\n        else:\n            assert len(attn_masks) == self.num_attn, f'The length of ' \\\n                        f'attn_masks {len(attn_masks)} must be equal ' \\\n                        f'to the number of attention in ' \\\n                        f'operation_order {self.num_attn}'\n        \n        for layer in self.operation_order:\n            if layer == 'self_attn':\n                if memory_query is None:\n                    temp_key = temp_value = query\n                else:\n                    temp_key = temp_value = torch.cat([memory_query, query], dim=0)\n                \n                query = self.attentions[attn_index](\n                    query,\n                    temp_key,\n                    temp_value,\n                    identity if self.pre_norm else None,\n                    query_pos=query_pos,\n                    key_pos=query_pos,\n                    attn_mask=attn_masks[attn_index],\n                    key_padding_mask=query_key_padding_mask,\n                    **kwargs)\n                attn_index += 1\n                identity = query\n\n            elif layer == 'norm':\n                query = self.norms[norm_index](query)\n                norm_index += 1\n\n            elif layer == 'cross_attn':\n                if attn_index == 1:\n                    query_bev = self.attentions[attn_index](\n                        query,\n                        key,\n                        value,\n                        identity if self.pre_norm else None,\n                        query_pos=query_pos,\n                        key_pos=key_pos,\n                        attn_mask=attn_masks[attn_index],\n                        key_padding_mask=key_padding_mask,\n                        **kwargs)\n                    attn_index += 1\n                else:\n                    # Memory cross attention\n                    assert attn_index == 2\n                    if memory_bank is not None:\n                        bs = query.shape[1]\n                        query_i_list = []\n                        for b_i in range(bs):\n                            valid_track_idx = all_valid_track_idx[b_i] \n                            query_i = query[:, b_i].clone()\n                            query_i = query_i[None,:]\n                            if len(valid_track_idx) != 0:\n                                mem_embeds = memory_bank.batch_mem_embeds_dict[b_i][:, valid_track_idx, :]\n                                mem_key_padding_mask = memory_bank.batch_key_padding_dict[b_i][valid_track_idx]\n                                mem_key_pos = memory_bank.batch_mem_relative_pe_dict[b_i][:, valid_track_idx]\n\n                                query_i[:, valid_track_idx] = self.attentions[attn_index](\n                                        query_i[:,valid_track_idx],\n                                        mem_embeds,\n                                        mem_embeds,\n                                        identity=None,\n                                        query_pos=None,\n                                        key_pos=mem_key_pos,\n                                        attn_mask=None,\n                                        key_padding_mask=mem_key_padding_mask,\n                                        **kwargs)\n\n                            query_i_list.append(query_i[0])\n                        query_memory = torch.stack(query_i_list).permute(1, 0, 2)\n                    else:\n                        query_memory = torch.zeros_like(query_bev)\n\n                    query = query_memory + query_bev\n                    identity = query\n                    attn_index += 1\n\n            elif layer == 'ffn':\n                query = self.ffns[ffn_index](\n                    query, identity if self.pre_norm else None)\n                ffn_index += 1\n\n        return query\n\n@TRANSFORMER.register_module()\nclass MapTransformer(Transformer):\n    \"\"\"Implements the DeformableDETR transformer.\n    Args:\n        as_two_stage (bool): Generate query from encoder features.\n            Default: False.\n        num_feature_levels (int): Number of feature maps from FPN:\n            Default: 4.\n        two_stage_num_proposals (int): Number of proposals when set\n            `as_two_stage` as True. Default: 300.\n    \"\"\"\n\n    def __init__(self,\n                 num_feature_levels=1,\n                 num_points=20,\n                 coord_dim=2,\n                 **kwargs):\n        super().__init__(**kwargs)\n        self.num_feature_levels = num_feature_levels\n        self.embed_dims = self.encoder.embed_dims\n        self.coord_dim = coord_dim\n        self.num_points = num_points\n        self.init_layers()\n\n    def init_layers(self):\n        \"\"\"Initialize layers of the DeformableDetrTransformer.\"\"\"\n        # self.level_embeds = nn.Parameter(\n        #     torch.Tensor(self.num_feature_levels, self.embed_dims))\n\n    def init_weights(self):\n        \"\"\"Initialize the transformer weights.\"\"\"\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        for m in self.modules():\n            if isinstance(m, CustomMSDeformableAttention):\n                m.init_weights()\n\n    def forward(self,\n                mlvl_feats,\n                mlvl_masks,\n                query_embed,\n                mlvl_pos_embeds,\n                init_reference_points,\n                reg_branches=None,\n                cls_branches=None,\n                memory_query=None,\n                memory_bank=None,\n                **kwargs):\n        \"\"\"Forward function for `Transformer`.\n        Args:\n            mlvl_feats (list(Tensor)): Input queries from\n                different level. Each element has shape\n                [bs, embed_dims, h, w].\n            mlvl_masks (list(Tensor)): The key_padding_mask from\n                different level used for encoder and decoder,\n                each element has shape  [bs, h, w].\n            query_embed (Tensor): The query embedding for decoder,\n                with shape [num_query, c].\n            mlvl_pos_embeds (list(Tensor)): The positional encoding\n                of feats from different level, has the shape\n                 [bs, embed_dims, h, w].\n            reg_branches (obj:`nn.ModuleList`): Regression heads for\n                feature maps from each decoder layer. Only would\n                be passed when\n                `with_box_refine` is True. Default to None.\n            cls_branches (obj:`nn.ModuleList`): Classification heads\n                for feature maps from each decoder layer. Only would\n                 be passed when `as_two_stage`\n                 is True. Default to None.\n        Returns:\n            tuple[Tensor]: results of decoder containing the following tensor.\n                - inter_states: Outputs from decoder. If\n                    return_intermediate_dec is True output has shape \\\n                      (num_dec_layers, bs, num_query, embed_dims), else has \\\n                      shape (1, bs, num_query, embed_dims).\n                - init_reference_out: The initial value of reference \\\n                    points, has shape (bs, num_queries, 4).\n                - inter_references_out: The internal value of reference \\\n                    points in decoder, has shape \\\n                    (num_dec_layers, bs,num_query, embed_dims)\n                - enc_outputs_class: The classification score of \\\n                    proposals generated from \\\n                    encoder's feature maps, has shape \\\n                    (batch, h*w, num_classes). \\\n                    Only would be returned when `as_two_stage` is True, \\\n                    otherwise None.\n                - enc_outputs_coord_unact: The regression results \\\n                    generated from encoder's feature maps., has shape \\\n                    (batch, h*w, 4). Only would \\\n                    be returned when `as_two_stage` is True, \\\n                    otherwise None.\n        \"\"\"\n\n        feat_flatten = []\n        mask_flatten = []\n        # lvl_pos_embed_flatten = []\n        spatial_shapes = []\n        for lvl, (feat, mask, pos_embed) in enumerate(\n                zip(mlvl_feats, mlvl_masks, mlvl_pos_embeds)):\n            bs, c, h, w = feat.shape\n            spatial_shape = (h, w)\n            spatial_shapes.append(spatial_shape)\n            feat = feat.flatten(2).transpose(1, 2)\n            mask = mask.flatten(1)\n            feat_flatten.append(feat)\n            mask_flatten.append(mask)\n        feat_flatten = torch.cat(feat_flatten, 1)\n        mask_flatten = torch.cat(mask_flatten, 1)\n        spatial_shapes = torch.as_tensor(\n            spatial_shapes, dtype=torch.long, device=feat_flatten.device)\n        level_start_index = torch.cat((spatial_shapes.new_zeros(\n            (1, )), spatial_shapes.prod(1).cumsum(0)[:-1]))\n        \n        feat_flatten = feat_flatten.permute(1, 0, 2)  # (H*W, bs, embed_dims)\n\n        # decoder\n        query = query_embed.permute(1, 0, 2) # (num_q, bs, embed_dims)\n        if memory_query is not None:\n            memory_query = memory_query.permute(1, 0, 2)\n\n        inter_states, inter_references = self.decoder(\n            query=query,\n            key=None,\n            value=feat_flatten,\n            query_pos=None,\n            key_padding_mask=mask_flatten,\n            reference_points=init_reference_points,\n            spatial_shapes=spatial_shapes,\n            level_start_index=level_start_index,\n            reg_branches=reg_branches,\n            cls_branches=cls_branches,\n            memory_query=memory_query,\n            memory_bank=memory_bank,\n            **kwargs)\n        \n        return inter_states, init_reference_points, inter_references"
  },
  {
    "path": "plugin/models/transformer_utils/__init__.py",
    "content": "from .deformable_transformer import DeformableDetrTransformer_, DeformableDetrTransformerDecoder_\nfrom .base_transformer import PlaceHolderEncoder\nfrom .CustomMSDeformableAttention import CustomMSDeformableAttention\nfrom .MapTransformer import MapTransformer, MapTransformerDecoder_new, MapTransformerLayer"
  },
  {
    "path": "plugin/models/transformer_utils/base_transformer.py",
    "content": "import numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmcv.cnn import xavier_init, constant_init\nfrom mmcv.cnn.bricks.registry import (ATTENTION,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import (MultiScaleDeformableAttention,\n                                         TransformerLayerSequence,\n                                         build_transformer_layer_sequence)\nfrom mmcv.runner.base_module import BaseModule\n\nfrom mmdet.models.utils.builder import TRANSFORMER\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass PlaceHolderEncoder(nn.Module):\n\n    def __init__(self, *args, embed_dims=None, **kwargs):\n        super(PlaceHolderEncoder, self).__init__()\n        self.embed_dims = embed_dims\n\n    def forward(self, *args, query=None, **kwargs):\n        \n        return query"
  },
  {
    "path": "plugin/models/transformer_utils/deformable_transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\nimport warnings\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import build_activation_layer, build_norm_layer, xavier_init\nfrom mmcv.cnn.bricks.registry import (TRANSFORMER_LAYER,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import (BaseTransformerLayer,\n                                         TransformerLayerSequence,\n                                         build_transformer_layer_sequence)\nfrom mmcv.runner.base_module import BaseModule\nfrom torch.nn.init import normal_\n\nfrom mmdet.models.utils.builder import TRANSFORMER\n\nfrom mmdet.models.utils.transformer import Transformer\n\ntry:\n    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention\nexcept ImportError:\n    warnings.warn(\n        '`MultiScaleDeformableAttention` in MMCV has been moved to '\n        '`mmcv.ops.multi_scale_deform_attn`, please update your MMCV')\n    from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention\n\nfrom .fp16_dattn import MultiScaleDeformableAttentionFp16\n\ndef inverse_sigmoid(x, eps=1e-5):\n    \"\"\"Inverse function of sigmoid.\n    Args:\n        x (Tensor): The tensor to do the\n            inverse.\n        eps (float): EPS avoid numerical\n            overflow. Defaults 1e-5.\n    Returns:\n        Tensor: The x has passed the inverse\n            function of sigmoid, has same\n            shape with input.\n    \"\"\"\n    x = x.clamp(min=0, max=1)\n    x1 = x.clamp(min=eps)\n    x2 = (1 - x).clamp(min=eps)\n    return torch.log(x1 / x2)\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass DeformableDetrTransformerDecoder_(TransformerLayerSequence):\n    \"\"\"Implements the decoder in DETR transformer.\n    Args:\n        return_intermediate (bool): Whether to return intermediate outputs.\n        coder_norm_cfg (dict): Config of last normalization layer. Default：\n            `LN`.\n    \"\"\"\n\n    def __init__(self, *args, \n            return_intermediate=False, coord_dim=2, kp_coord_dim=2, **kwargs):\n\n        super(DeformableDetrTransformerDecoder_, self).__init__(*args, **kwargs)\n        self.return_intermediate = return_intermediate\n        self.coord_dim = coord_dim\n        self.kp_coord_dim = kp_coord_dim\n\n    def forward(self,\n                query,\n                *args,\n                reference_points=None,\n                valid_ratios=None,\n                reg_branches=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoder`.\n        Args:\n            query (Tensor): Input query with shape\n                `(num_query, bs, embed_dims)`.\n            reference_points (Tensor): The reference\n                points of offset. has shape\n                (bs, num_query, 4) when as_two_stage,\n                otherwise has shape ((bs, num_query, 2).\n            valid_ratios (Tensor): The radios of valid\n                points on the feature map, has shape\n                (bs, num_levels, 2)\n            reg_branch: (obj:`nn.ModuleList`): Used for\n                refining the regression results. Only would\n                be passed when with_box_refine is True,\n                otherwise would be passed a `None`.\n        Returns:\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\n                return_intermediate is `False`, otherwise it has shape\n                [num_layers, num_query, bs, embed_dims].\n        \"\"\"\n        output = query\n        intermediate = []\n        intermediate_reference_points = []\n        for lid, layer in enumerate(self.layers):\n\n            reference_points_input = \\\n                reference_points[:, :, None,:self.kp_coord_dim] * \\\n                valid_ratios[:, None,:,:self.kp_coord_dim]\n            # if reference_points.shape[-1] == 3 and self.kp_coord_dim==2:\n            output = layer(\n                output,\n                *args,\n                reference_points=reference_points_input[...,:self.kp_coord_dim],\n                **kwargs)\n            output = output.permute(1, 0, 2)\n\n            if reg_branches is not None:\n                tmp = reg_branches[lid](output)   \n                new_reference_points = tmp\n                new_reference_points[..., :self.kp_coord_dim] = tmp[\n                    ..., :self.kp_coord_dim] + inverse_sigmoid(reference_points)\n                new_reference_points = new_reference_points.sigmoid()\n                if reference_points.shape[-1] == 3 and self.kp_coord_dim==2:\n                    reference_points[...,-1] = tmp[...,-1].sigmoid().detach()  \n                reference_points[...,:self.coord_dim] = new_reference_points.detach()\n\n            output = output.permute(1, 0, 2)\n            if self.return_intermediate:\n                intermediate.append(output)\n                intermediate_reference_points.append(reference_points)\n\n        if self.return_intermediate:\n            return torch.stack(intermediate), torch.stack(\n                intermediate_reference_points)\n\n        return output, reference_points\n\n\n@TRANSFORMER.register_module()\nclass DeformableDetrTransformer_(Transformer):\n    \"\"\"Implements the DeformableDETR transformer.\n    Args:\n        as_two_stage (bool): Generate query from encoder features.\n            Default: False.\n        num_feature_levels (int): Number of feature maps from FPN:\n            Default: 4.\n        two_stage_num_proposals (int): Number of proposals when set\n            `as_two_stage` as True. Default: 300.\n    \"\"\"\n\n    def __init__(self,\n                 as_two_stage=False,\n                 num_feature_levels=1,\n                 two_stage_num_proposals=300,\n                 coord_dim=2,\n                 **kwargs):\n        super(DeformableDetrTransformer_, self).__init__(**kwargs)\n        self.as_two_stage = as_two_stage\n        self.num_feature_levels = num_feature_levels\n        self.two_stage_num_proposals = two_stage_num_proposals\n        self.embed_dims = self.encoder.embed_dims\n        self.coord_dim = coord_dim\n        self.init_layers()\n\n    def init_layers(self):\n        \"\"\"Initialize layers of the DeformableDetrTransformer.\"\"\"\n        self.level_embeds = nn.Parameter(\n            torch.Tensor(self.num_feature_levels, self.embed_dims))\n\n        if self.as_two_stage:\n            self.enc_output = nn.Linear(self.embed_dims, self.embed_dims)\n            self.enc_output_norm = nn.LayerNorm(self.embed_dims)\n            self.pos_trans = nn.Linear(self.embed_dims * 2,\n                                       self.embed_dims * 2)\n            self.pos_trans_norm = nn.LayerNorm(self.embed_dims * 2)\n        else:\n            self.reference_points_embed = nn.Linear(self.embed_dims, self.coord_dim)\n\n    def init_weights(self):\n        \"\"\"Initialize the transformer weights.\"\"\"\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        for m in self.modules():\n            if isinstance(m, MultiScaleDeformableAttention):\n                m.init_weights()\n            elif isinstance(m,MultiScaleDeformableAttentionFp16):\n                m.init_weights()\n        if not self.as_two_stage:\n            xavier_init(self.reference_points_embed, distribution='uniform', bias=0.)\n        normal_(self.level_embeds)\n\n    @staticmethod\n    def get_reference_points(spatial_shapes, valid_ratios, device):\n        \"\"\"Get the reference points used in decoder.\n        Args:\n            spatial_shapes (Tensor): The shape of all\n                feature maps, has shape (num_level, 2).\n            valid_ratios (Tensor): The radios of valid\n                points on the feature map, has shape\n                (bs, num_levels, 2)\n            device (obj:`device`): The device where\n                reference_points should be.\n        Returns:\n            Tensor: reference points used in decoder, has \\\n                shape (bs, num_keys, num_levels, 2).\n        \"\"\"\n        reference_points_list = []\n        for lvl, (H, W) in enumerate(spatial_shapes):\n            #  TODO  check this 0.5\n            ref_y, ref_x = torch.meshgrid(\n                torch.linspace(\n                    0.5, H - 0.5, H, dtype=torch.float32, device=device),\n                torch.linspace(\n                    0.5, W - 0.5, W, dtype=torch.float32, device=device))\n            ref_y = ref_y.reshape(-1)[None] / (\n                valid_ratios[:, None, lvl, 1] * H)\n            ref_x = ref_x.reshape(-1)[None] / (\n                valid_ratios[:, None, lvl, 0] * W)\n            ref = torch.stack((ref_x, ref_y), -1)\n            reference_points_list.append(ref)\n        reference_points = torch.cat(reference_points_list, 1)\n        reference_points = reference_points[:, :, None] * valid_ratios[:, None]\n        return reference_points\n\n    def get_valid_ratio(self, mask):\n        \"\"\"Get the valid radios of feature maps of all  level.\"\"\"\n        _, H, W = mask.shape\n        valid_H = torch.sum(~mask[:, :, 0], 1)\n        valid_W = torch.sum(~mask[:, 0, :], 1)\n        valid_ratio_h = valid_H.float() / H\n        valid_ratio_w = valid_W.float() / W\n        valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1)\n        return valid_ratio\n\n    def get_proposal_pos_embed(self,\n                               proposals,\n                               num_pos_feats=128,\n                               temperature=10000):\n        \"\"\"Get the position embedding of proposal.\"\"\"\n        scale = 2 * math.pi\n        dim_t = torch.arange(\n            num_pos_feats, dtype=torch.float32, device=proposals.device)\n        dim_t = temperature**(2 * (dim_t // 2) / num_pos_feats)\n        # N, L, 4\n        proposals = proposals.sigmoid() * scale\n        # N, L, 4, 128\n        pos = proposals[:, :, :, None] / dim_t\n        # N, L, 4, 64, 2\n        pos = torch.stack((pos[:, :, :, 0::2].sin(), pos[:, :, :, 1::2].cos()),\n                          dim=4).flatten(2)\n        return pos\n\n    def forward(self,\n                mlvl_feats,\n                mlvl_masks,\n                query_embed,\n                mlvl_pos_embeds,\n                reg_branches=None,\n                cls_branches=None,\n                **kwargs):\n        \"\"\"Forward function for `Transformer`.\n        Args:\n            mlvl_feats (list(Tensor)): Input queries from\n                different level. Each element has shape\n                [bs, embed_dims, h, w].\n            mlvl_masks (list(Tensor)): The key_padding_mask from\n                different level used for encoder and decoder,\n                each element has shape  [bs, h, w].\n            query_embed (Tensor): The query embedding for decoder,\n                with shape [num_query, c].\n            mlvl_pos_embeds (list(Tensor)): The positional encoding\n                of feats from different level, has the shape\n                 [bs, embed_dims, h, w].\n            reg_branches (obj:`nn.ModuleList`): Regression heads for\n                feature maps from each decoder layer. Only would\n                be passed when\n                `with_box_refine` is True. Default to None.\n            cls_branches (obj:`nn.ModuleList`): Classification heads\n                for feature maps from each decoder layer. Only would\n                 be passed when `as_two_stage`\n                 is True. Default to None.\n        Returns:\n            tuple[Tensor]: results of decoder containing the following tensor.\n                - inter_states: Outputs from decoder. If\n                    return_intermediate_dec is True output has shape \\\n                      (num_dec_layers, bs, num_query, embed_dims), else has \\\n                      shape (1, bs, num_query, embed_dims).\n                - init_reference_out: The initial value of reference \\\n                    points, has shape (bs, num_queries, 4).\n                - inter_references_out: The internal value of reference \\\n                    points in decoder, has shape \\\n                    (num_dec_layers, bs,num_query, embed_dims)\n                - enc_outputs_class: The classification score of \\\n                    proposals generated from \\\n                    encoder's feature maps, has shape \\\n                    (batch, h*w, num_classes). \\\n                    Only would be returned when `as_two_stage` is True, \\\n                    otherwise None.\n                - enc_outputs_coord_unact: The regression results \\\n                    generated from encoder's feature maps., has shape \\\n                    (batch, h*w, 4). Only would \\\n                    be returned when `as_two_stage` is True, \\\n                    otherwise None.\n        \"\"\"\n        assert self.as_two_stage or query_embed is not None\n\n        feat_flatten = []\n        mask_flatten = []\n        lvl_pos_embed_flatten = []\n        spatial_shapes = []\n        for lvl, (feat, mask, pos_embed) in enumerate(\n                zip(mlvl_feats, mlvl_masks, mlvl_pos_embeds)):\n            bs, c, h, w = feat.shape\n            spatial_shape = (h, w)\n            spatial_shapes.append(spatial_shape)\n            feat = feat.flatten(2).transpose(1, 2)\n            mask = mask.flatten(1)\n            pos_embed = pos_embed.flatten(2).transpose(1, 2)\n            lvl_pos_embed = pos_embed + self.level_embeds[lvl].view(1, 1, -1)\n            lvl_pos_embed_flatten.append(lvl_pos_embed)\n            feat_flatten.append(feat)\n            mask_flatten.append(mask)\n        feat_flatten = torch.cat(feat_flatten, 1)\n        mask_flatten = torch.cat(mask_flatten, 1)\n        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)\n        spatial_shapes = torch.as_tensor(\n            spatial_shapes, dtype=torch.long, device=feat_flatten.device)\n        level_start_index = torch.cat((spatial_shapes.new_zeros(\n            (1, )), spatial_shapes.prod(1).cumsum(0)[:-1]))\n        valid_ratios = torch.stack(\n            [self.get_valid_ratio(m) for m in mlvl_masks], 1)\n\n        # reference_points = \\\n        #     self.get_reference_points(spatial_shapes,\n        #                               valid_ratios,\n        #                               device=feat.device)\n\n        feat_flatten = feat_flatten.permute(1, 0, 2)  # (H*W, bs, embed_dims)\n        # lvl_pos_embed_flatten = lvl_pos_embed_flatten.permute(\n        #     1, 0, 2)  # (H*W, bs, embed_dims)\n        # memory = self.encoder(\n        #     query=feat_flatten,\n        #     key=None,\n        #     value=None,\n        #     query_pos=lvl_pos_embed_flatten,\n        #     query_key_padding_mask=mask_flatten,\n        #     spatial_shapes=spatial_shapes,\n        #     reference_points=reference_points,\n        #     level_start_index=level_start_index,\n        #     valid_ratios=valid_ratios,\n        #     **kwargs)\n\n        memory = feat_flatten.permute(1, 0, 2)\n        bs, _, c = memory.shape\n        \n        query_pos, query = torch.split(query_embed, c, dim=-1)\n        reference_points = self.reference_points_embed(query_pos).sigmoid()\n        init_reference_out = reference_points\n\n        # decoder\n        query = query.permute(1, 0, 2)\n        memory = memory.permute(1, 0, 2)\n        query_pos = query_pos.permute(1, 0, 2)\n        inter_states, inter_references = self.decoder(\n            query=query,\n            key=None,\n            value=memory,\n            query_pos=query_pos,\n            key_padding_mask=mask_flatten,\n            reference_points=reference_points,\n            spatial_shapes=spatial_shapes,\n            level_start_index=level_start_index,\n            valid_ratios=valid_ratios,\n            reg_branches=reg_branches,\n            **kwargs)\n\n        inter_references_out = inter_references\n        return inter_states, init_reference_out, inter_references_out"
  },
  {
    "path": "plugin/models/transformer_utils/fp16_dattn.py",
    "content": "from turtle import forward\nimport warnings\ntry:\n    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention\nexcept ImportError:\n    warnings.warn(\n        '`MultiScaleDeformableAttention` in MMCV has been moved to '\n        '`mmcv.ops.multi_scale_deform_attn`, please update your MMCV')\n    from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention\nfrom mmcv.runner import force_fp32, auto_fp16\nfrom mmcv.cnn.bricks.registry import ATTENTION\n\n\nfrom mmcv.runner.base_module import BaseModule, ModuleList, Sequential\nfrom mmcv.cnn.bricks.transformer import build_attention\n\nimport math\nimport warnings\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.autograd.function import Function, once_differentiable\n\nfrom mmcv import deprecated_api_warning\nfrom mmcv.cnn import constant_init, xavier_init\nfrom mmcv.cnn.bricks.registry import ATTENTION\nfrom mmcv.runner import BaseModule\nfrom mmcv.utils import ext_loader\next_module = ext_loader.load_ext(\n    '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])\nfrom torch.cuda.amp import custom_bwd, custom_fwd\n\n\n@ATTENTION.register_module()\nclass MultiScaleDeformableAttentionFp16(BaseModule):\n\n    def __init__(self, attn_cfg=None,init_cfg=None,**kwarg):\n        super(MultiScaleDeformableAttentionFp16,self).__init__(init_cfg)\n\n        # import ipdb; ipdb.set_trace()\n        self.deformable_attention = build_attention(attn_cfg)\n        self.deformable_attention.init_weights()\n        self.fp16_enabled = False\n\n\n    @force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points','identity'))\n    def forward(self, query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                **kwargs):\n        # import ipdb; ipdb.set_trace()\n        return self.deformable_attention(query,\n                key=key,\n                value=value,\n                identity=identity,\n                query_pos=query_pos,\n                key_padding_mask=key_padding_mask,\n                reference_points=reference_points,\n                spatial_shapes=spatial_shapes,\n                level_start_index=level_start_index,**kwargs)\n\n\n\nclass MultiScaleDeformableAttnFunctionFp32(Function):\n\n    @staticmethod\n    @custom_fwd(cast_inputs=torch.float32)\n    def forward(ctx, value, value_spatial_shapes, value_level_start_index,\n                sampling_locations, attention_weights, im2col_step):\n        \"\"\"GPU version of multi-scale deformable attention.\n        Args:\n            value (Tensor): The value has shape\n                (bs, num_keys, mum_heads, embed_dims//num_heads)\n            value_spatial_shapes (Tensor): Spatial shape of\n                each feature map, has shape (num_levels, 2),\n                last dimension 2 represent (h, w)\n            sampling_locations (Tensor): The location of sampling points,\n                has shape\n                (bs ,num_queries, num_heads, num_levels, num_points, 2),\n                the last dimension 2 represent (x, y).\n            attention_weights (Tensor): The weight of sampling points used\n                when calculate the attention, has shape\n                (bs ,num_queries, num_heads, num_levels, num_points),\n            im2col_step (Tensor): The step used in image to column.\n        Returns:\n            Tensor: has shape (bs, num_queries, embed_dims)\n        \"\"\"\n\n        ctx.im2col_step = im2col_step\n        output = ext_module.ms_deform_attn_forward(\n            value,\n            value_spatial_shapes,\n            value_level_start_index,\n            sampling_locations,\n            attention_weights,\n            im2col_step=ctx.im2col_step)\n        ctx.save_for_backward(value, value_spatial_shapes,\n                              value_level_start_index, sampling_locations,\n                              attention_weights)\n        return output\n\n    @staticmethod\n    @once_differentiable\n    @custom_bwd\n    def backward(ctx, grad_output):\n        \"\"\"GPU version of backward function.\n        Args:\n            grad_output (Tensor): Gradient\n                of output tensor of forward.\n        Returns:\n             Tuple[Tensor]: Gradient\n                of input tensors in forward.\n        \"\"\"\n        value, value_spatial_shapes, value_level_start_index,\\\n            sampling_locations, attention_weights = ctx.saved_tensors\n        grad_value = torch.zeros_like(value)\n        grad_sampling_loc = torch.zeros_like(sampling_locations)\n        grad_attn_weight = torch.zeros_like(attention_weights)\n\n        ext_module.ms_deform_attn_backward(\n            value,\n            value_spatial_shapes,\n            value_level_start_index,\n            sampling_locations,\n            attention_weights,\n            grad_output.contiguous(),\n            grad_value,\n            grad_sampling_loc,\n            grad_attn_weight,\n            im2col_step=ctx.im2col_step)\n\n        return grad_value, None, None, \\\n            grad_sampling_loc, grad_attn_weight, None\n\n\ndef multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,\n                                        sampling_locations, attention_weights):\n    \"\"\"CPU version of multi-scale deformable attention.\n    Args:\n        value (Tensor): The value has shape\n            (bs, num_keys, mum_heads, embed_dims//num_heads)\n        value_spatial_shapes (Tensor): Spatial shape of\n            each feature map, has shape (num_levels, 2),\n            last dimension 2 represent (h, w)\n        sampling_locations (Tensor): The location of sampling points,\n            has shape\n            (bs ,num_queries, num_heads, num_levels, num_points, 2),\n            the last dimension 2 represent (x, y).\n        attention_weights (Tensor): The weight of sampling points used\n            when calculate the attention, has shape\n            (bs ,num_queries, num_heads, num_levels, num_points),\n    Returns:\n        Tensor: has shape (bs, num_queries, embed_dims)\n    \"\"\"\n\n    bs, _, num_heads, embed_dims = value.shape\n    _, num_queries, num_heads, num_levels, num_points, _ =\\\n        sampling_locations.shape\n    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes],\n                             dim=1)\n    sampling_grids = 2 * sampling_locations - 1\n    sampling_value_list = []\n    for level, (H_, W_) in enumerate(value_spatial_shapes):\n        # bs, H_*W_, num_heads, embed_dims ->\n        # bs, H_*W_, num_heads*embed_dims ->\n        # bs, num_heads*embed_dims, H_*W_ ->\n        # bs*num_heads, embed_dims, H_, W_\n        value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(\n            bs * num_heads, embed_dims, H_, W_)\n        # bs, num_queries, num_heads, num_points, 2 ->\n        # bs, num_heads, num_queries, num_points, 2 ->\n        # bs*num_heads, num_queries, num_points, 2\n        sampling_grid_l_ = sampling_grids[:, :, :,\n                                          level].transpose(1, 2).flatten(0, 1)\n        # bs*num_heads, embed_dims, num_queries, num_points\n        sampling_value_l_ = F.grid_sample(\n            value_l_,\n            sampling_grid_l_,\n            mode='bilinear',\n            padding_mode='zeros',\n            align_corners=False)\n        sampling_value_list.append(sampling_value_l_)\n    # (bs, num_queries, num_heads, num_levels, num_points) ->\n    # (bs, num_heads, num_queries, num_levels, num_points) ->\n    # (bs, num_heads, 1, num_queries, num_levels*num_points)\n    attention_weights = attention_weights.transpose(1, 2).reshape(\n        bs * num_heads, 1, num_queries, num_levels * num_points)\n    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) *\n              attention_weights).sum(-1).view(bs, num_heads * embed_dims,\n                                              num_queries)\n    return output.transpose(1, 2).contiguous()\n\n\n@ATTENTION.register_module()\nclass MultiScaleDeformableAttentionFP32(BaseModule):\n    \"\"\"An attention module used in Deformable-Detr. `Deformable DETR:\n    Deformable Transformers for End-to-End Object Detection.\n      <https://arxiv.org/pdf/2010.04159.pdf>`_.\n    Args:\n        embed_dims (int): The embedding dimension of Attention.\n            Default: 256.\n        num_heads (int): Parallel attention heads. Default: 64.\n        num_levels (int): The number of feature map used in\n            Attention. Default: 4.\n        num_points (int): The number of sampling points for\n            each query in each head. Default: 4.\n        im2col_step (int): The step used in image_to_column.\n            Default: 64.\n        dropout (float): A Dropout layer on `inp_identity`.\n            Default: 0.1.\n        batch_first (bool): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default to False.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: None.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims=256,\n                 num_heads=8,\n                 num_levels=4,\n                 num_points=4,\n                 im2col_step=64,\n                 dropout=0.1,\n                 batch_first=False,\n                 norm_cfg=None,\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        if embed_dims % num_heads != 0:\n            raise ValueError(f'embed_dims must be divisible by num_heads, '\n                             f'but got {embed_dims} and {num_heads}')\n        dim_per_head = embed_dims // num_heads\n        self.norm_cfg = norm_cfg\n        self.dropout = nn.Dropout(dropout)\n        self.batch_first = batch_first\n\n        # you'd better set dim_per_head to a power of 2\n        # which is more efficient in the CUDA implementation\n        def _is_power_of_2(n):\n            if (not isinstance(n, int)) or (n < 0):\n                raise ValueError(\n                    'invalid input for _is_power_of_2: {} (type: {})'.format(\n                        n, type(n)))\n            return (n & (n - 1) == 0) and n != 0\n\n        if not _is_power_of_2(dim_per_head):\n            warnings.warn(\n                \"You'd better set embed_dims in \"\n                'MultiScaleDeformAttention to make '\n                'the dimension of each attention head a power of 2 '\n                'which is more efficient in our CUDA implementation.')\n\n        self.im2col_step = im2col_step\n        self.embed_dims = embed_dims\n        self.num_levels = num_levels\n        self.num_heads = num_heads\n        self.num_points = num_points\n        self.sampling_offsets = nn.Linear(\n            embed_dims, num_heads * num_levels * num_points * 2)\n        self.attention_weights = nn.Linear(embed_dims,\n                                           num_heads * num_levels * num_points)\n        self.value_proj = nn.Linear(embed_dims, embed_dims)\n        self.output_proj = nn.Linear(embed_dims, embed_dims)\n        self.init_weights()\n\n    def init_weights(self):\n        \"\"\"Default initialization for Parameters of Module.\"\"\"\n        constant_init(self.sampling_offsets, 0.)\n        thetas = torch.arange(\n            self.num_heads,\n            dtype=torch.float32) * (2.0 * math.pi / self.num_heads)\n        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)\n        grid_init = (grid_init /\n                     grid_init.abs().max(-1, keepdim=True)[0]).view(\n                         self.num_heads, 1, 1,\n                         2).repeat(1, self.num_levels, self.num_points, 1)\n        for i in range(self.num_points):\n            grid_init[:, :, i, :] *= i + 1\n\n        self.sampling_offsets.bias.data = grid_init.view(-1)\n        constant_init(self.attention_weights, val=0., bias=0.)\n        xavier_init(self.value_proj, distribution='uniform', bias=0.)\n        xavier_init(self.output_proj, distribution='uniform', bias=0.)\n        self._is_init = True\n\n    @deprecated_api_warning({'residual': 'identity'},\n                            cls_name='MultiScaleDeformableAttention')\n    def forward(self,\n                query,\n                key=None,\n                value=None,\n                identity=None,\n                query_pos=None,\n                key_padding_mask=None,\n                reference_points=None,\n                spatial_shapes=None,\n                level_start_index=None,\n                **kwargs):\n        \"\"\"Forward Function of MultiScaleDeformAttention.\n        Args:\n            query (Tensor): Query of Transformer with shape\n                (num_query, bs, embed_dims).\n            key (Tensor): The key tensor with shape\n                `(num_key, bs, embed_dims)`.\n            value (Tensor): The value tensor with shape\n                `(num_key, bs, embed_dims)`.\n            identity (Tensor): The tensor used for addition, with the\n                same shape as `query`. Default None. If None,\n                `query` will be used.\n            query_pos (Tensor): The positional encoding for `query`.\n                Default: None.\n            key_pos (Tensor): The positional encoding for `key`. Default\n                None.\n            reference_points (Tensor):  The normalized reference\n                points with shape (bs, num_query, num_levels, 2),\n                all elements is range in [0, 1], top-left (0,0),\n                bottom-right (1, 1), including padding area.\n                or (N, Length_{query}, num_levels, 4), add\n                additional two dimensions is (w, h) to\n                form reference boxes.\n            key_padding_mask (Tensor): ByteTensor for `query`, with\n                shape [bs, num_key].\n            spatial_shapes (Tensor): Spatial shape of features in\n                different levels. With shape (num_levels, 2),\n                last dimension represents (h, w).\n            level_start_index (Tensor): The start index of each level.\n                A tensor has shape ``(num_levels, )`` and can be represented\n                as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].\n        Returns:\n             Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n\n        if value is None:\n            value = query\n\n        if identity is None:\n            identity = query\n        if query_pos is not None:\n            query = query + query_pos\n        if not self.batch_first:\n            # change to (bs, num_query ,embed_dims)\n            query = query.permute(1, 0, 2)\n            value = value.permute(1, 0, 2)\n\n        bs, num_query, _ = query.shape\n        bs, num_value, _ = value.shape\n        assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value\n\n        value = self.value_proj(value)\n        if key_padding_mask is not None:\n            value = value.masked_fill(key_padding_mask[..., None], 0.0)\n        value = value.view(bs, num_value, self.num_heads, -1)\n        sampling_offsets = self.sampling_offsets(query).view(\n            bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)\n        attention_weights = self.attention_weights(query).view(\n            bs, num_query, self.num_heads, self.num_levels * self.num_points)\n        attention_weights = attention_weights.softmax(-1)\n\n        attention_weights = attention_weights.view(bs, num_query,\n                                                   self.num_heads,\n                                                   self.num_levels,\n                                                   self.num_points)\n        if reference_points.shape[-1] == 2:\n            offset_normalizer = torch.stack(\n                [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)\n            sampling_locations = reference_points[:, :, None, :, None, :] \\\n                + sampling_offsets \\\n                / offset_normalizer[None, None, None, :, None, :]\n        elif reference_points.shape[-1] == 4:\n            sampling_locations = reference_points[:, :, None, :, None, :2] \\\n                + sampling_offsets / self.num_points \\\n                * reference_points[:, :, None, :, None, 2:] \\\n                * 0.5\n        else:\n            raise ValueError(\n                f'Last dim of reference_points must be'\n                f' 2 or 4, but get {reference_points.shape[-1]} instead.')\n        if torch.cuda.is_available():\n            output = MultiScaleDeformableAttnFunctionFp32.apply(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n        else:\n            output = multi_scale_deformable_attn_pytorch(\n                value, spatial_shapes, level_start_index, sampling_locations,\n                attention_weights, self.im2col_step)\n\n        output = self.output_proj(output)\n\n        if not self.batch_first:\n            # (num_query, bs ,embed_dims)\n            output = output.permute(1, 0, 2)\n\n        return self.dropout(output) + identity"
  },
  {
    "path": "plugin/models/utils/__init__.py",
    "content": ""
  },
  {
    "path": "plugin/models/utils/query_update.py",
    "content": "import math\nimport torch\nimport torch.nn as nn \nimport numpy as np\nfrom mmcv.cnn import bias_init_with_prob, xavier_init\n\n\nclass Embedder:\n    def __init__(self, **kwargs):\n        self.kwargs = kwargs\n        self.create_embedding_fn()\n        \n    def create_embedding_fn(self):\n        embed_fns = []\n        d = self.kwargs['input_dims']\n        out_dim = 0\n        if self.kwargs['include_input']:\n            embed_fns.append(lambda x : x)\n            out_dim += d\n            \n        max_freq = self.kwargs['max_freq_log2']\n        N_freqs = self.kwargs['num_freqs']\n        \n        if self.kwargs['log_sampling']:\n            freq_bands = 2.**torch.linspace(0., max_freq, steps=N_freqs)\n        else:\n            freq_bands = torch.linspace(2.**0., 2.**max_freq, steps=N_freqs)\n            \n        for freq in freq_bands:\n            for p_fn in self.kwargs['periodic_fns']:\n                embed_fns.append(lambda x, p_fn=p_fn, freq=freq : p_fn(x * freq))\n                out_dim += d\n                    \n        self.embed_fns = embed_fns\n        self.out_dim = out_dim\n        \n    def embed(self, inputs):\n        return torch.cat([fn(inputs) for fn in self.embed_fns], -1)\n\n\nclass MotionMLP(nn.Module):\n    ''' \n    Args:\n        c_dim (int): dimension of latent code c\n        f_dim (int): feature dimension\n    '''\n\n    def __init__(self, c_dim, f_dim=512, identity=True):\n        super().__init__()\n        self.c_dim = c_dim\n        self.f_dim = f_dim\n        self.identity = identity\n\n        multires = 10\n        embed_kwargs = {\n                'include_input' : True,\n                'input_dims' : c_dim,\n                'max_freq_log2' : multires-1,\n                'num_freqs' : multires,\n                'log_sampling' : True,\n                'periodic_fns' : [torch.sin, torch.cos],\n        }\n        self.pos_embedder = Embedder(**embed_kwargs)\n\n        self.fc = nn.Sequential(\n            nn.Linear(f_dim + self.pos_embedder.out_dim, 2*f_dim),\n            nn.LayerNorm(2*f_dim),\n            nn.ReLU(),\n            nn.Linear(2*f_dim, f_dim)\n        )\n        self.init_weights()\n\n    def init_weights(self):\n        for m in self.fc:\n            for param in m.parameters():\n                if param.dim() > 1:\n                    nn.init.xavier_uniform_(param)\n            \n\n    def forward(self, x, pose_info):\n        pose_embed = self.pos_embedder.embed(pose_info)\n        xc = torch.cat([x, pose_embed], dim=-1)\n        out = self.fc(xc)\n\n        if self.identity:\n            out = out + x\n        \n        return out\n"
  },
  {
    "path": "plugin/models/utils/renderer_track.py",
    "content": "import os.path as osp\nimport os\n#import av2.geometry.interpolate as interp_utils\nimport numpy as np\nimport copy\nimport cv2\nimport matplotlib.pyplot as plt\nfrom PIL import Image\n\ndef remove_nan_values(uv):\n    is_u_valid = np.logical_not(np.isnan(uv[:, 0]))\n    is_v_valid = np.logical_not(np.isnan(uv[:, 1]))\n    is_uv_valid = np.logical_and(is_u_valid, is_v_valid)\n\n    uv_valid = uv[is_uv_valid]\n    return uv_valid\n\ndef points_ego2img(pts_ego, extrinsics, intrinsics):\n    pts_ego_4d = np.concatenate([pts_ego, np.ones([len(pts_ego), 1])], axis=-1)\n    pts_cam_4d = extrinsics @ pts_ego_4d.T\n    \n    uv = (intrinsics @ pts_cam_4d[:3, :]).T\n    uv = remove_nan_values(uv)\n    depth = uv[:, 2]\n    uv = uv[:, :2] / uv[:, 2].reshape(-1, 1)\n\n    return uv, depth\n\ndef draw_polyline_ego_on_img(polyline_ego, img_bgr, extrinsics, intrinsics, color_bgr, thickness):\n    if polyline_ego.shape[1] == 2:\n        zeros = np.zeros((polyline_ego.shape[0], 1))\n        polyline_ego = np.concatenate([polyline_ego, zeros], axis=1)\n\n    polyline_ego = interp_utils.interp_arc(t=500, points=polyline_ego)\n    \n    uv, depth = points_ego2img(polyline_ego, extrinsics, intrinsics)\n\n    h, w, c = img_bgr.shape\n\n    is_valid_x = np.logical_and(0 <= uv[:, 0], uv[:, 0] < w - 1)\n    is_valid_y = np.logical_and(0 <= uv[:, 1], uv[:, 1] < h - 1)\n    is_valid_z = depth > 0\n    is_valid_points = np.logical_and.reduce([is_valid_x, is_valid_y, is_valid_z])\n\n    if is_valid_points.sum() == 0:\n        return\n    \n    uv = np.round(uv[is_valid_points]).astype(np.int32)\n\n    draw_visible_polyline_cv2(\n        copy.deepcopy(uv),\n        valid_pts_bool=np.ones((len(uv), 1), dtype=bool),\n        image=img_bgr,\n        color=color_bgr,\n        thickness_px=thickness,\n    )\n\ndef draw_visible_polyline_cv2(line, valid_pts_bool, image, color, thickness_px):\n    \"\"\"Draw a polyline onto an image using given line segments.\n\n    Args:\n        line: Array of shape (K, 2) representing the coordinates of line.\n        valid_pts_bool: Array of shape (K,) representing which polyline coordinates are valid for rendering.\n            For example, if the coordinate is occluded, a user might specify that it is invalid.\n            Line segments touching an invalid vertex will not be rendered.\n        image: Array of shape (H, W, 3), representing a 3-channel BGR image\n        color: Tuple of shape (3,) with a BGR format color\n        thickness_px: thickness (in pixels) to use when rendering the polyline.\n    \"\"\"\n    line = np.round(line).astype(int)  # type: ignore\n    for i in range(len(line) - 1):\n\n        if (not valid_pts_bool[i]) or (not valid_pts_bool[i + 1]):\n            continue\n\n        x1 = line[i][0]\n        y1 = line[i][1]\n        x2 = line[i + 1][0]\n        y2 = line[i + 1][1]\n\n        # Use anti-aliasing (AA) for curves\n        image = cv2.line(image, pt1=(x1, y1), pt2=(x2, y2), color=color, thickness=thickness_px, lineType=cv2.LINE_AA)\n\n\nCOLOR_MAPS_BGR = {\n    # bgr colors\n    'divider': (0, 0, 255),\n    'boundary': (0, 255, 0),\n    'ped_crossing': (255, 0, 0),\n    'centerline': (51, 183, 255),\n    'drivable_area': (171, 255, 255)\n}\n\nCOLOR_MAPS_PLT = {\n    'divider': 'r',\n    'boundary': 'g',\n    'ped_crossing': 'b',\n    'centerline': 'orange',\n    'drivable_area': 'y',\n}\n\nCAM_NAMES_AV2 = ['ring_front_center', 'ring_front_right', 'ring_front_left',\n    'ring_rear_right','ring_rear_left', 'ring_side_right', 'ring_side_left',\n    ]\nCAM_NAMES_NUSC = ['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT',\n    'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT',]\n\nclass Renderer(object):\n    \"\"\"Render map elements on image views.\n\n    Args:\n        cat2id (dict): category to class id\n        roi_size (tuple): bev range\n        dataset (str): 'av2' or 'nusc'\n    \"\"\"\n\n    def __init__(self, cat2id, roi_size, dataset='av2'):\n        self.roi_size = roi_size\n        self.cat2id = cat2id\n        self.id2cat = {v: k for k, v in cat2id.items()}\n        if dataset == 'av2':\n            self.cam_names = CAM_NAMES_AV2\n        else:\n            self.cam_names = CAM_NAMES_NUSC\n\n    def render_bev_from_vectors(self, vectors, labels, out_path, id_info=None, score_info=None):\n        '''Render bev segmentation using vectorized map elements.\n        \n        Args:\n            vectors (list): list of vectorized map elements.\n            labels (list): list of labels of map elements.\n            out_dir (str): output directory\n        '''\n        if id_info is not None:\n            assert len(vectors) == len(id_info)\n        if score_info is not None:\n            assert len(vectors) == len(score_info)\n\n        car_img = Image.open('resources/car.png')\n\n        plt.figure(figsize=(self.roi_size[0], self.roi_size[1]))\n        plt.xlim(-self.roi_size[0] / 2, self.roi_size[0] / 2)\n        plt.ylim(-self.roi_size[1] / 2, self.roi_size[1] / 2)\n        plt.axis('off')\n        plt.imshow(car_img, extent=[-2.5, 2.5, -2.0, 2.0])\n\n        for idx in range(len(labels)):\n            cat = self.id2cat[labels[idx].item()]\n            color = COLOR_MAPS_PLT[cat]\n            vector = vectors[idx].detach().cpu().numpy()\n            pts = vector[:, :2]\n            x = np.array([pt[0] for pt in pts])\n            y = np.array([pt[1] for pt in pts])\n            # plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], angles='xy', color=color,\n            #     scale_units='xy', scale=1)\n            # for i in range(len(x)):\n            plt.plot(x, y, 'o-', color=color, linewidth=20, markersize=50)\n            if id_info is not None:\n                vec_id = int(id_info[idx])\n                mid_idx = len(x) // 2\n                if vec_id == -1:\n                    plt.text(x[mid_idx], y[mid_idx], 'FP', fontsize=100, color=color)\n                else:\n                    plt.text(x[mid_idx], y[mid_idx], '{}'.format(vec_id), fontsize=100, color=color)\n            \n            if score_info is not None:\n                mid_idx = len(x) // 2\n                plt.text(x[mid_idx]-1, y[mid_idx]+2, '{:.2f}'.format(score_info[idx]), fontsize=100, color='purple')\n                \n                    \n        plt.savefig(out_path, bbox_inches='tight', dpi=40)\n        plt.close()\n\n    def render_bev_from_mask(self, semantic_mask, out_path):\n        '''Render bev segmentation from semantic_mask.\n        \n        Args:\n            semantic_mask (array): semantic mask.\n            out_dir (str): output directory\n        '''\n\n        c, h, w = semantic_mask.shape\n        bev_img = np.ones((3, h, w), dtype=np.uint8) * 255\n        if 'drivable_area' in self.cat2id:\n            drivable_area_mask = semantic_mask[self.cat2id['drivable_area']]\n            bev_img[:, drivable_area_mask == 1] = \\\n                    np.array(COLOR_MAPS_BGR['drivable_area']).reshape(3, 1)\n        \n        # NOTE: the semantic mask has been changed into instance masks for our use\n        for label in range(c):\n            cat = self.id2cat[label]\n            if cat == 'drivable_area':\n                continue\n            valid = semantic_mask[label] == 1\n            bev_img[:, valid] = np.array(COLOR_MAPS_BGR[cat]).reshape(3, 1)\n\n        #for label in range(c):\n        #    cat = self.id2cat[label]\n        #    if cat == 'drivable_area':\n        #        continue\n        #    mask = semantic_mask[label]\n        #    valid = mask == 1\n        #    bev_img[:, valid] = np.array(COLOR_MAPS_BGR[cat]).reshape(3, 1)\n        \n        cv2.imwrite(out_path, bev_img.transpose((1, 2, 0)))\n        "
  },
  {
    "path": "requirements.txt",
    "content": "av2\nnuscenes-devkit\neinops==0.6.1\nnumpy==1.23.5\nnumba==0.53.0\nShapely==1.8.5\nyapf==0.40.1\nsetuptools==59.5.0\nimageio-ffmpeg==0.4.9\n"
  },
  {
    "path": "tools/benchmark.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport time\nimport torch\nfrom mmcv import Config\nfrom mmcv.parallel import MMDataParallel\nfrom mmcv.runner import load_checkpoint, wrap_fp16_model\nimport sys\nfrom mmdet3d.datasets import build_dataset\nfrom mmdet3d.models import build_detector\nfrom tools.misc.fuse_conv_bn import fuse_module\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet benchmark a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', default=None, help='checkpoint file')\n    parser.add_argument('--samples', default=2000, help='samples to benchmark')\n    parser.add_argument(\n        '--log-interval', default=50, help='interval of logging')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n    cfg = Config.fromfile(args.config)\n\n    import sys, os\n    sys.path.append(os.path.abspath('.'))  \n    if hasattr(cfg, 'plugin'):\n        if cfg.plugin:\n            import importlib\n            if hasattr(cfg, 'plugin_dir'):\n                def import_path(plugin_dir):\n                    _module_dir = os.path.dirname(plugin_dir)\n                    _module_dir = _module_dir.split('/')\n                    _module_path = _module_dir[0]\n\n                    for m in _module_dir[1:]:\n                        _module_path = _module_path + '.' + m\n                    print(_module_path)\n                    plg_lib = importlib.import_module(_module_path)\n\n                plugin_dirs = cfg.plugin_dir\n                if not isinstance(plugin_dirs,list):\n                    plugin_dirs = [plugin_dirs,]\n                for plugin_dir in plugin_dirs:\n                    import_path(plugin_dir)\n                \n            else:\n                # import dir is the dirpath for the config file\n                _module_dir = os.path.dirname(args.config)\n                _module_dir = _module_dir.split('/')\n                _module_path = _module_dir[0]\n                for m in _module_dir[1:]:\n                    _module_path = _module_path + '.' + m\n                print(_module_path)\n                plg_lib = importlib.import_module(_module_path)\n\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n\n    # build the dataloader\n    # TODO: support multiple images per gpu (only minor changes are needed)\n    dataset = build_dataset(cfg.data.test)\n    from plugin.datasets.builder import build_dataloader\n\n    data_loader = build_dataloader(\n            dataset,\n            samples_per_gpu=1,\n            workers_per_gpu=cfg.data.workers_per_gpu,\n            dist=False,\n            shuffle=False,\n            shuffler_sampler=cfg.data.shuffler_sampler,  # dict(type='DistributedGroupSampler'),\n            nonshuffler_sampler=cfg.data.nonshuffler_sampler,  # dict(type='DistributedSampler'),\n        )\n\n    # build the model and load checkpoint\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    if args.checkpoint is not None:\n        load_checkpoint(model, args.checkpoint, map_location='cpu')\n    if args.fuse_conv_bn:\n       model = fuse_module(model)\n\n    model = MMDataParallel(model, device_ids=[0])\n\n    model.eval()\n\n    # the first several iterations may be very slow so skip them\n    num_warmup = 5\n    pure_inf_time = 0\n\n    # benchmark with several samples and take the average\n    for i, data in enumerate(data_loader):\n        torch.cuda.synchronize()\n        start_time = time.perf_counter()\n        with torch.no_grad():\n            model(return_loss=False, rescale=True, **data)\n\n        torch.cuda.synchronize()\n        elapsed = time.perf_counter() - start_time\n\n        if i >= num_warmup:\n            pure_inf_time += elapsed\n            if (i + 1) % args.log_interval == 0:\n                fps = (i + 1 - num_warmup) / pure_inf_time\n                print(f'Done image [{i + 1:<3}/ {args.samples}], '\n                      f'fps: {fps:.1f} img / s')\n\n        if (i + 1) == args.samples:\n            pure_inf_time += elapsed\n            fps = (i + 1 - num_warmup) / pure_inf_time\n            print(f'Overall fps: {fps:.1f} img / s')\n            break\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/data_converter/__init__.py",
    "content": ""
  },
  {
    "path": "tools/data_converter/argoverse_converter.py",
    "content": "from functools import partial\nfrom multiprocessing import Pool\nimport multiprocessing\nfrom random import sample\nimport time\nimport mmcv\nimport logging\nfrom pathlib import Path\nfrom os import path as osp\nimport os\nfrom av2.datasets.sensor.av2_sensor_dataloader import AV2SensorDataLoader\nimport argparse\n\n\nCAM_NAMES = ['ring_front_center', 'ring_front_right', 'ring_front_left',\n    'ring_rear_right','ring_rear_left', 'ring_side_right', 'ring_side_left',\n    # 'stereo_front_left', 'stereo_front_right',\n    ]\n\nFAIL_LOGS = [\n    '01bb304d-7bd8-35f8-bbef-7086b688e35e',\n    '453e5558-6363-38e3-bf9b-42b5ba0a6f1d',\n    '75e8adad-50a6-3245-8726-5e612db3d165',\n    '54bc6dbc-ebfb-3fba-b5b3-57f88b4b79ca',\n    'af170aac-8465-3d7b-82c5-64147e94af7d',\n    '6e106cf8-f6dd-38f6-89c8-9be7a71e7275',\n]\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Data converter arg parser')\n    parser.add_argument(\n        '--data-root',\n        type=str,\n        help='specify the root path of dataset')\n    parser.add_argument(\n        '--newsplit',\n        action='store_true')\n    parser.add_argument(\n        '--nproc',\n        type=int,\n        default=64,\n        required=False,\n        help='workers to process data')\n    args = parser.parse_args()\n    return args\n\ndef create_av2_infos_mp(root_path,\n                        info_prefix,\n                        log_ids,\n                        split,\n                        dest_path=None,\n                        num_multithread=64, \n                        newsplit=False):\n    \"\"\"Create info file of av2 dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        root_path (str): Path of the data root.\n        info_prefix (str): Prefix of the info file to be generated.\n        dest_path (str): Path to store generated file, default to root_path\n        split (str): Split of the data.\n            Default: 'train'\n    \"\"\"\n    \n    if dest_path is None:\n        dest_path = root_path\n\n    for i in FAIL_LOGS:\n        if i in log_ids:\n            log_ids.remove(i)\n    # dataloader by original split\n    train_loader = AV2SensorDataLoader(Path(osp.join(root_path, 'train')), \n        Path(osp.join(root_path, 'train')))\n    val_loader = AV2SensorDataLoader(Path(osp.join(root_path, 'val')), \n        Path(osp.join(root_path, 'val')))\n    test_loader = AV2SensorDataLoader(Path(osp.join(root_path, 'test')), \n        Path(osp.join(root_path, 'test')))\n    loaders = [train_loader, val_loader, test_loader]\n\n    print('collecting samples...')\n    start_time = time.time()\n    print('num cpu:', multiprocessing.cpu_count())\n    print(f'using {num_multithread} threads')\n\n    # ignore warning from av2.utils.synchronization_database\n    sdb_logger = logging.getLogger('av2.utils.synchronization_database')\n    prev_level = sdb_logger.level\n    sdb_logger.setLevel(logging.CRITICAL)\n\n    pool = Pool(num_multithread)\n    fn = partial(get_data_from_logid, loaders=loaders, data_root=root_path)\n    \n    rt = pool.map_async(fn, log_ids)\n    pool.close()\n    pool.join()\n    results = rt.get()\n\n    samples = []\n    discarded = 0\n    sample_idx = 0\n    for _samples, _discarded in results:\n        for i in range(len(_samples)):\n            _samples[i]['sample_idx'] = sample_idx\n            sample_idx += 1\n        samples.extend(_samples)\n        discarded += _discarded\n    \n    sdb_logger.setLevel(prev_level)\n    print(f'{len(samples)} available samples, {discarded} samples discarded')\n\n    id2map = {}\n    for log_id in log_ids:\n        for i in range(3):\n            if log_id in loaders[i]._sdb.get_valid_logs():\n                loader = loaders[i]\n        \n        map_path_dir = osp.join(loader._data_dir, log_id, 'map')\n        map_fname = os.path.basename(str(list(Path(map_path_dir).glob(\"log_map_archive_*.json\"))[0]))\n        map_fname = osp.join(map_path_dir, map_fname)\n        id2map[log_id] = map_fname\n\n    print('collected in {:.1f}s'.format(time.time() - start_time))\n    infos = dict(samples=samples, id2map=id2map)\n\n    if newsplit:\n        info_path = osp.join(dest_path,\n                                    '{}_map_infos_{}_newsplit.pkl'.format(info_prefix, split))\n    else:\n        info_path = osp.join(dest_path,\n                                    '{}_map_infos_{}.pkl'.format(info_prefix, split))\n    print(f'saving results to {info_path}')\n    mmcv.dump(infos, info_path)\n\ndef get_data_from_logid(log_id, loaders, data_root):\n    samples = []\n    discarded = 0\n\n    # find corresponding loader\n    for i in range(3):\n        if log_id in loaders[i]._sdb.get_valid_logs():\n            loader = loaders[i]\n    \n    # use lidar timestamps to query all sensors.\n    # the frequency is 10Hz\n    cam_timestamps = loader._sdb.per_log_lidar_timestamps_index[log_id]\n    prev = -1\n    for ts in cam_timestamps:\n        cam_ring_fpath = [loader.get_closest_img_fpath(\n                log_id, cam_name, ts\n            ) for cam_name in CAM_NAMES]\n        lidar_fpath = loader.get_closest_lidar_fpath(log_id, ts)\n\n        # if bad sensor synchronization, discard the sample\n        if None in cam_ring_fpath or lidar_fpath is None:\n            discarded += 1\n            continue\n\n        cams = {}\n        for i, cam_name in enumerate(CAM_NAMES):\n            pinhole_cam = loader.get_log_pinhole_camera(log_id, cam_name)\n            cams[cam_name] = dict(\n                img_fpath=str(cam_ring_fpath[i]),\n                intrinsics=pinhole_cam.intrinsics.K,\n                extrinsics=pinhole_cam.extrinsics,\n            )\n        \n        city_SE3_ego = loader.get_city_SE3_ego(log_id, int(ts))\n        e2g_translation = city_SE3_ego.translation\n        e2g_rotation = city_SE3_ego.rotation\n        \n        samples.append(dict(\n            e2g_translation=e2g_translation,\n            e2g_rotation=e2g_rotation,\n            cams=cams, \n            lidar_fpath=str(lidar_fpath),\n            prev=prev,\n            # map_fpath=map_fname,\n            token=str(ts),\n            log_id=log_id,\n            scene_name=log_id))\n        \n        prev = str(ts)\n\n    return samples, discarded\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    with open('tools/data_converter/av2_train_split.txt') as f:\n        train_split = [s.strip() for s in f.readlines()]\n    with open('tools/data_converter/av2_val_split.txt') as f:\n        val_split = [s.strip() for s in f.readlines()]\n    \n    test_split = None\n    if not args.newsplit:\n        train_split = os.listdir(osp.join(args.data_root, 'train'))\n        val_split = os.listdir(osp.join(args.data_root, 'val'))\n        test_split = os.listdir(osp.join(args.data_root, 'test'))\n\n    create_av2_infos_mp(\n        root_path=args.data_root,\n        split='train',\n        log_ids=train_split,\n        info_prefix='av2',\n        dest_path=args.data_root,\n        newsplit=args.newsplit)\n    \n    create_av2_infos_mp(\n        root_path=args.data_root,\n        split='val',\n        log_ids=val_split,\n        info_prefix='av2',\n        dest_path=args.data_root,\n        newsplit=args.newsplit)\n\n    if test_split:\n        create_av2_infos_mp(\n            root_path=args.data_root,\n            split='test',\n            log_ids=test_split,\n            info_prefix='av2',\n            dest_path=args.data_root,)"
  },
  {
    "path": "tools/data_converter/av2_train_split.txt",
    "content": "bb110668-5037-3c04-bd34-34cf1ace8d0f\n8beeb8db-28f9-396c-b752-17f906505948\n247f91e7-3177-33ad-b99e-0e0a4dc76751\n40bfcbec-ec59-3731-8e75-67f0bddc3b01\nef4a46c4-138e-3478-b94e-3e60a567ec7d\ncf6a99cb-b8bc-34d7-bdca-30e50e66cd74\n575d8d24-ba88-3b18-84c0-df5b29dccfde\ne66d1403-755b-3f63-938b-a2a69446a48a\ned93e1bb-7bbc-3444-8fc8-08a271438fc8\n0d37aee4-6508-33a2-998d-724834e80030\n7ce2a2ce-eed6-36d7-ba62-dda9acddb070\n06852209-b868-306b-b492-ee6dbc914cf8\ne424d4f7-4b28-322f-b630-31d42ae528eb\n768cf7e2-eb6c-3468-969e-e3b0fd87b34e\nb2a8a9aa-19cd-3ffd-b02c-0f2a47d1d0eb\n928e282f-d1a0-3e85-9582-0b33664c49e8\n5d8f4b0a-27f8-3889-925f-e9a146a395eb\n58a6bfcf-071e-3a6d-90f4-0e4cbdc298eb\n90f360d8-35f7-3c54-b2da-e99b354bc4cb\ne5178032-d260-3bc8-968e-a5cb98b6ae5a\naf170aac-8465-3d7b-82c5-64147e94af7d\na0cb0614-ee71-3cf3-b891-a4274883362f\nde56b100-508b-3479-81fe-735349f8e8de\nb87683ae-14c5-321f-8af3-623e7bafc3a7\n92b900b1-ac4a-3d41-b118-e42c66382c91\n2e95b33b-8ea1-3b48-875b-2f35f3092059\n75e8adad-50a6-3245-8726-5e612db3d165\nccb4e29d-e88f-3fbe-8958-67cfd62350a3\na3f59292-ad1d-370a-afde-64a9e16b341c\nf648b945-6c70-3105-bd23-9502894e37d4\ndf5d0b0e-5bcb-304a-a167-18b92d0f1d45\n6aae7f38-21de-31bf-8761-29d458338958\n80f31501-6533-3257-9870-b0c4dbf61967\n57356998-297c-330a-af4e-c6a1ad64f923\n0b9321c1-9bc2-4727-beb8-8046aa4bb6c4\nf7cf93d8-f7bd-3799-8500-fbe842a96f63\n108d2060-8bef-3d1c-88c5-c8295f596595\n4667e48c-4d16-38be-b277-6b0013d6588c\n3576c0f5-c1b5-35bb-a0c4-ee95cba5c754\na059b6b9-ca26-4881-bcf7-d202433de0c2\n53a8391f-b2d7-341f-95ce-b9174d48e040\nfd4e2c4c-f7e9-3110-8e32-28d3add3937d\n1ab241cf-f9c5-3f8a-88bd-4e14baad8ede\n2aea7bd1-432a-43c5-9445-651102487f65\nd9530d0a-b83e-44a3-910a-2b5bb8f1fb80\n072c8e90-a51c-3429-9cdf-4dababb4e9d8\n97738d01-b24f-365e-8818-2463149154d2\nb8a5a7a9-1c4f-4f2e-96a6-565e727b24d0\nc71cd96c-8e3f-3861-9ece-fcbabebc63a8\nc08279c0-10b4-3d21-b13f-a1c1a0b87f8b\nf3f8f680-e471-3662-a06a-0c00e6d88f43\nd78b78a0-2322-32c2-833a-e42ddc132d30\n271f4204-dd77-350b-b9db-5dabc4191985\n4766da89-ca13-3e92-b53e-00bc710e9bba\nb8d83d8e-1574-3efd-b067-a3ed422a259e\n2a9b2658-097e-3f8b-a817-22f2553c5de8\nc6c55112-0078-3867-a63f-1861a0125b8d\n286790ed-9dec-305b-bcad-4f8153301e7c\nc2c0e6bc-05e5-30dd-8e5e-0e7b6106ad30\n4f363f6a-e51c-4d22-b232-cf78f1520966\n8e5022ed-87a9-4480-b2a7-a7c0494f5c7b\n95acebfe-c694-3dab-9e6d-01cb501ff426\n14c8d182-9586-3f21-ad20-c4e19ec03e2c\n5ccb359a-2986-466c-88b2-a16f51774a8f\n4eb237d9-9f8c-3426-9da6-4aad349ff8aa\na783b484-437b-3569-bd44-4f83ad9e05cf\nd3efe9ba-f10a-35e7-b17e-6850c66693fe\n8a11791c-1d8f-3b12-bacc-38aa982b0003\n8c019de9-7043-37bc-9498-b5858e7240af\n63f32613-2856-4ab0-898d-f881d74eb8bf\n56c24ed8-68c5-3a08-8e3c-19646ac670e5\n8bc34c99-1b8f-3463-b0e7-12bf1eb222b3\n71d1938d-536e-39eb-beb4-bb4f5b607427\n7b0bf9d6-084a-31d4-9e52-d9b582a0ec84\nb09ba294-96b3-3c45-aeed-c40a309e5f4e\n6dc6e668-549e-33b8-b952-ed8e807b1d48\n595acd37-183c-489f-bb8a-c299a86b74c0\na9a24c80-600f-3f85-b4d9-a70ceccad385\n718a2f8d-954a-3cd8-89e6-43898cf21fee\nc8ec7be0-92aa-3222-946e-fbcf398c841e\n7039e410-b5ab-35aa-96bc-2c4b89d3c5e3\n72cf3ca1-1a9e-3254-bca0-29c62521e454\n65387aee-4490-38b9-8f4f-1fc43bd4ac06\n27c03d98-6ac3-38a3-ba5e-102b184d01ef\n58fed0d4-97d5-469b-89a4-4394838e10c7\nd1395998-7e8a-417d-91e9-5ca6ec045ee1\ne72ef05c-8b94-3885-a34f-fff3b2b954b4\n36aec72e-5086-376c-b109-295b128e77e1\necbe6def-7560-352c-8822-b2b92613e1e4\n544a8102-0ef5-3044-921e-dc0544370376\ncf5aaa11-4f92-3377-a7a2-861f305023eb\n20dd185d-b4eb-3024-a17a-b4e5d8b15b65\nded5ef6e-46ea-3a66-9180-18a6fa0a2db4\n42f92807-0c5e-3397-bd45-9d5303b4db2a\nf668074d-d6c6-3ea7-a7b5-aad0a1203b03\nd3ca0450-2167-38fb-b34b-449741cb38f3\n1ad57a00-cc61-3f5f-9e2a-9981a57e9856\n418da7f6-88e4-388b-a69f-44cabd24ed55\n613558a1-6a8e-3fda-8fa6-1045a064a0f9\n6626b7b2-bcc8-4497-ae92-307ceacd5010\n9a82e3c8-1738-3f85-9245-1d3717171d2f\na69fa035-5121-3a39-a3ce-e33e9f54b506\n14896a70-a440-34d0-b68e-fd9882557da6\nc42d34f3-78d5-35be-9c47-77d297caebfe\nc9fc62c5-a289-36e3-a900-7e7807eb2716\ne368d49a-e02a-3374-876e-8325f66c3574\n6b012ed3-c322-3522-b52c-b4f24f894d4c\n30e94a6b-ca9e-3d2c-9099-86700ce845f9\n5f016e44-0f38-3837-9111-58ec18d1a5e6\n4bf8e9ff-e1a1-3a22-a9d1-80f3846c0263\n841fe537-5e76-3b3a-8298-75fa1a41a14b\n64037371-4aa4-3fed-97f4-bafc1674caee\nb5ea60b0-2540-4efe-b60e-f421ade3c128\n34fe8fbd-2b1a-3552-94c4-e80d1e85e5c2\n67d5fb0a-baf8-32f4-9316-18ce755f3e8b\n4619e709-c9c0-3b26-923f-23a78e231136\n285dcef7-9f00-3c9c-baca-6c8269210ac4\n67d00dd9-fd33-3518-85f5-e26353373c33\n156a412d-3699-3c1c-9ada-6ab587347996\n9d65b03d-b59d-3a45-ba66-e313d3cdab40\n022af476-9937-3e70-be52-f65420d52703\n539b7a79-54c6-30ef-8e06-210d42c79125\nb4fc7eef-819c-35a6-b937-358ffb5c2aa0\nec02cf7e-36d3-3e9d-8835-3b6c27975bea\n6ee14358-31cb-3c6d-82f5-54d6a20444de\na7bcdabb-f9b7-3c16-806d-3ddf1c2d49a2\n54bc6dbc-ebfb-3fba-b5b3-57f88b4b79ca\n45433055-2b69-3cff-8135-67b3bfa04034\nf61bcee1-2964-3c4b-95a5-697df5f42f47\n902d5e72-b665-3615-af2d-a2b6164864b2\n812a45e8-b5d9-389b-9151-09c57ce969df\n5d391e54-adec-3584-adf0-5025d7564e1b\n9f6d282e-f573-31d5-80e6-9a193e80cd7d\nf6350a4f-eee8-31bd-8520-28f9c81c98a8\nac3e33eb-5a53-369d-9e5a-7950392bfe99\n3844f8a9-b7d4-4919-8e9b-a0370ad29ec0\n57636c80-9335-3aae-af70-11755db93854\n38f30522-2d43-3ff3-a94b-84887ab1671d\n968e77fb-9ab1-3427-8984-9e3028b186e2\n7186d7d2-453e-4193-b327-72b66bbe3fd0\n3a1b3424-700b-3b65-9e89-90772a8e24ea\n7e3d8631-3b7d-38c1-b833-ee7cfa7235ca\n93b755f1-f865-44dc-a98d-cae5eb1a25d0\n3ca11a5e-50b2-3cc3-af7a-ce7ab02b9954\nc6940de1-dccf-3b42-9c39-bbb9feb2d638\ncaabc342-aed4-3104-8195-7461a4add481\nafbdd2e0-696a-3222-a20a-2023baf8e5af\n70e92418-e4e6-32a2-98c8-9844b1c24f92\n4bab74cd-aba9-4752-9e1f-006cc639d63e\n982bcae9-1840-37f4-9278-3dbb63031aac\n23808d42-e4df-3a0d-b713-fe20e09a4f39\ne7e7ebad-79d2-3ae1-95ce-f3035bc8f719\neec8ae97-8de4-354a-b11a-d3a14b276479\nd9c267be-f19a-3183-afe0-f0625a375743\ne1e9d341-716f-3613-9ec2-2201c72361af\n4aae26d1-aa71-30ae-b838-2a25d1f317f5\n1c8f1189-c4fe-3303-bf2a-f88f5751b81e\n4c18ef76-84ba-4a78-8275-7663101fffcf\n26daba71-ca0a-37e9-9dc6-0f81f02c0afd\n737314f0-997a-3cd1-a652-78453bfe2b57\nfdb0578d-4fa7-37a7-b60d-5472b4d39136\nf4c6ade0-7b9e-4ad7-8d86-13d2f4c91499\n8aaa2fac-59f9-3a3d-98ee-f3dde8b4d781\n6937b4e5-b5b4-3970-b5dd-9ad194e6c338\n8911207d-fc3d-3009-bb35-18138197724f\n7ebbdbeb-a8de-3612-8f22-6ce91980841d\nb4d5e738-b937-33fd-8131-bf1df36f598a\n1c7d3b85-6cec-373c-a4eb-5137d7cc6a7c\ne38c1e1d-f0e9-3d73-8193-29cbea481b4c\n98fd128c-4f32-40fc-a23c-7feb50c4478a\need8593d-60e3-3e41-9fea-55f544b01749\n52c9e613-61b3-3d17-9f6d-b28de8a14829\n50d508e2-6753-4519-a8c3-ad94a76ee948\nd2901fe5-4b64-3144-98e1-67ef5ef83fa7\ndb447b86-8103-3ee4-93de-2c838ba061dc\naed1b616-9d9a-36d3-a047-07ad3955fbb7\ncd822baf-4aa1-33fa-bfe5-d91386598edb\n0c143226-9c39-387c-a935-1391bed6dc75\nd72c43a5-95bf-3a3e-9019-cf25cc0a61c0\nf46707f9-435f-3a06-9017-deae11feab53\n956dd277-e000-4c6c-af4a-aee4d86971c9\n7f40c022-9f9d-3805-abf8-7533175b3f25\n08734a1b-0289-3aa3-a6ba-8c7121521e26\n41b6f7d7-e431-3992-b783-74b9edf42215\n59a668bc-5caf-3ccc-8335-e9cff4c61d0e\n9cdbe6f4-938f-4ac5-88f7-94a82bea715b\n382dfbe0-836e-385c-86f2-f1afcf57a402\ne4f6dbab-f2eb-3bd2-9dbc-88640e3b4a5f\na3e09a66-a921-3c4a-89e6-7fecf6854a3a\nbba89165-0e5d-3052-abb6-6a61e37861a3\nc0d36fde-5672-430a-9dd1-3e2a5d4f6cab\n1e6f21fd-5c8a-3526-ac50-72adae89d6a8\naa630060-2eda-37bd-ae88-a513fd9fc8e3\na8a3297d-62f3-31ac-8db7-95ef53ce0d31\n8346e544-4a73-3c88-9339-c7a21fbd3a2f\n93c97162-a834-3331-b15c-e4ab278b1c6d\n5037a27d-95f5-352f-9c64-5b8e75f574b2\n74dd5c1d-7a9a-32d1-903a-fc57e07109b9\nebece6dc-ec92-326f-97ed-f66b2970e358\nbec0f69b-832c-3898-b589-0127ddc282f3\n27fba275-0b37-3033-b20d-8f9848f78b1c\n7c539ecc-658a-3956-a9a4-6e7f5bd67373\n6fdd8d39-7b04-365a-9941-e9e805b05ecf\n43efcbe2-fe24-35b0-9e69-b07c1b0725d3\n5f2b8881-3447-3905-99f8-def9d72aae42\nb5f3900c-b421-3032-aef2-2e91a69d1163\n4977e8a8-4e1f-3ca2-a44e-454cd3756a5f\nfb2cce69-655d-3203-990a-74301895408d\n74f15437-b85e-314a-9d86-7294b98bf07f\n75a9cbdf-0bec-39f9-b536-5b37aacadf96\nb6c04ab6-1c07-3e17-97d5-e870db090e52\n4e302e79-1cb9-358f-a3fb-e133a655af4f\ne7e178aa-931a-4674-9bff-9278a54e6aae\n121007f3-a0cc-3795-9606-85108b800772\n7ef4a6cc-7266-3a31-8dd4-01c3d3c58dcb\n4d9e3bdf-7216-3161-8281-72863f3c2bf6\nb7843066-abbd-3275-ac52-90a8363f65f7\n633addf9-441c-35e7-868a-738aa612d51c\n105f47eb-491c-3cab-91af-83c5bc1f6c48\n66a40dcc-4de0-3f72-bff0-ca543ac5019d\n098fe60e-bab0-32e2-89bc-bedced881911\nd6ba4898-1369-3521-981c-b9ac57420418\nce5033ee-e74e-354a-9299-8aaefbd03f59\ne65e405c-8aea-30f5-a926-1e0fbbeefb9f\ndb17141d-4d35-381d-9949-36ce767d6641\nb66a9b8e-8fa8-3409-907f-a70ebd7051e1\ne88132d0-4512-3d6f-a1c8-f60972332af5\ndf321672-461c-361b-aac9-e81cc9a88b9f\nbb25d7d0-9146-46a0-8ff1-ebfc25d63417\n9bdb4139-173f-33d3-8730-e29752d737d3\nf4cb6ba4-cd0f-30cc-9cc9-52bd14bfb3cc\ne28c16d0-084a-3dc4-aad4-9d157ca528de\nede387f4-f390-3f0e-a071-eb543b73ed73\n74ec2f62-9d57-39a9-bf88-97006f64ee5c\na160c635-aa67-352c-a5e6-03b113493090\n6180bbb1-95ce-381b-ba17-5411c5712824\nd58d55ea-f30c-3622-8303-1574616b9865\n6dadba1d-0f67-345b-bc5e-407ab8f7654c\n3b2994cb-5f82-4835-9212-0cac8fb3d164\na88da814-ecc6-39c0-93ba-8a81f403a7e4\n094c4119-eb33-3dfb-a18d-492cbdc8413a\nbbdb1e21-62eb-3230-8cef-a3b091c5edad\naa82b61f-7156-3c68-95a4-b79cebd120eb\nb0663029-8f8d-398a-8a28-81ba29224696\n8aeeeeca-6a79-34ef-b667-835d53536a8f\nf8412dbd-48b9-39f3-b534-08950f6e633b\nc062ba0f-7591-3225-a57d-8181622dc2da\n75449af9-61a5-3a4f-95ec-3a3dc35b4cbb\nb7fbc13b-47ff-3e3f-a363-86d60ba664b8\n79cb0109-4c92-3ede-8849-76cc6824b95a\nc556f8e0-a001-3586-b2cf-d3256685c39f\nee27a871-85cf-494c-8519-f54815040af5\n555a7659-ffce-39df-ba06-d9fcb2f812f0\n1b8fc962-7036-4d7f-885e-40b631cbdeaf\n7f7e4709-7596-35f9-89ac-d808178b1533\nc2ec8955-1797-338f-9486-d7c41926f791\n72ad5f22-3a9c-3758-81af-abda8181a622\n2501c6d0-071c-3a7a-b51f-c8cbd37abe25\n823371b1-3197-35d6-a6b7-bfd432e10440\n63a006b5-07c8-375d-98e3-21466f5b9c6a\n1a4e2d86-23d4-3a0d-a9ac-8b0936ae94ce\n828ddef2-7609-3683-8e32-c21e7c07d6a6\ndf738339-958b-31fb-8e48-a4380f4c538a\n3fbdfb6c-927f-4aaa-81b1-21b02efd4c01\nc67a748c-1e93-3a6d-be38-daedf175f911\nf9f6a7e9-4f79-3fdf-b1a7-ba300622f116\ne1450d07-faed-3d97-b674-c6f8d2498d80\na7a2236e-8f8e-34aa-9343-722f9b3bb829\n64b24fd1-f639-4f7e-a535-dbfe9fd737a1\n44200521-4cad-3a5d-8568-e0f3f1ca24d4\n444cce44-cc82-4620-b630-1b5849284ac7\ne033cc8e-b23d-3fc6-8954-d90c5e98550e\nfa9ec72a-cbcf-35dc-be20-4d0d9e7215ef\n3c51357e-f6e9-3cda-9036-fe6e6cd442fe\n1e51a567-b416-3c46-9424-05688ff851f7\n2b443c95-d55f-3cc4-a2a1-ae4af293d8d9\n3d7743c1-c0a5-3ab2-976e-84af93270f30\n441871a2-a9c5-3048-b7e9-d88af5acb8f1\nf2b0585b-ada3-3123-963e-14df7d96ca9e\nc7f5e5c1-dc52-3619-8998-420b2e280d8a\n855908a6-a848-3b7b-a4a3-bbab78a423cd\n07e4fccb-eb2d-31e5-bbcb-6550d0860f64\nf7cdc2d1-f59a-30a2-aae8-8bb81c769e6e\n8223c3d0-3b08-3889-9cdc-a88592c4bd4a\nde586ff4-3413-367d-befc-ad022b73592b\nac1b1697-42b9-4225-a666-d17f72204fa8\n73539e96-eef2-3302-bdf4-a39e9d95b6e7\ne0ba7664-d287-39df-8193-00d60cae1417\n0a132537-3aec-35bb-af13-7faa0811000d\nb29b43d7-3af9-363e-aaeb-8805d958f982\n8ca98d88-67b5-385e-80f7-b32758668fab\na4f240a0-12d4-3542-a11f-0c592e90e4da\n1844c439-b94c-332a-bb94-600818350eb4\nce0e814a-d9df-3975-a521-d8ae9a091e96\n95a47a36-1041-3924-bbd0-4dcad52c323a\nf54c1d50-48a3-4651-bfb0-50b87f13dc9e\n890cf3b7-3385-390c-8b2e-132c744b5d2d\n189c8512-b034-3d58-a372-cf48eacf02dd\n4e391f98-31a6-330d-9252-d02aab82f5db\n1d950a38-5c2f-39ce-9cd3-61249bc85194\n4e1ac476-80a2-3612-bfd7-1abd24d2b644\nabd4fe8d-7520-3b35-b8ac-4de367141b6f\n0b97f5dd-c396-3c02-b07d-b7fdbcb6c3d0\n6aa2ac89-6b25-3af6-ad59-221351189f4b\n298715e3-b204-3bf5-b8c2-fe3be9e310e8\ne1f37027-6a39-3eb1-b38a-3f2836b84735\n9e684390-4af3-3ec5-b163-855bbd026ff1\n1842383a-1577-3b7a-90db-41a9a6668ee2\n7a1412d3-5a53-378f-85df-ba58b2408f46\n91cded81-9f72-3930-bab7-5d3e3fa0a220\na7c9bb12-322e-3f8e-8798-cf57a4a72f99\n648e8393-f46f-384b-9bd1-c25a2285077d\nc69e348a-8e10-31dc-b71b-dd8e5cfd7211\n87ca3d9f-f317-3efb-b1cb-aaaf525227e5\n182ba3f7-b89a-36cc-ae40-32a341b0d3e9\nf6cc0ebf-fc6a-3bf2-8bcb-76d8c43f194e\nf2576c8a-da9b-450e-88cf-a70af1b0eadf\n78683234-e6f1-3e4e-af52-6f839254e4c0\n7dbc2eac-5871-3480-b322-246e03d954d2\n20bcd747-ef60-391a-9f4a-ae99f049c260\n11ba4e81-c26f-3cd1-827d-b6913bcef64e\neec284b2-840a-3c75-aa42-04d2e309bbe1\nb50c4763-5d1e-37f4-a009-2244aeebabcd\n15ec0778-826e-3ed7-9775-54fbf66997f4\ne0ea281b-6956-3605-b720-71b54ec87d25\ne8c9fd64-fdd2-422d-a2a2-6f47500d1d12\nb8489c02-60d0-3f44-a3b4-9de62830d666\n0b86f508-5df9-4a46-bc59-5b9536dbde9f\n201fe83b-7dd7-38f4-9d26-7b4a668638a9\n335aabef-269e-3211-a99d-2c3a3a8f8475\n76916359-96f4-3274-81fe-bb145d497c11\n22052525-4f85-3fe8-9d7d-000a9fffce36\n4e3fedbb-847c-3d5b-8a62-c9ff84550985\n77574006-881f-3bc8-bbb6-81d79cf02d83\ndafe14f5-825c-4e7a-9009-6dfdfdd5b030\n2f2321d2-7912-3567-a789-25e46a145bda\nbbd19ca1-805a-3c22-8df3-cd7501aa06f3\n58e82365-03bc-3b2f-b55a-a4ad0e3e792d\nd770f926-bca8-31de-9790-73fbb7b6a890\nb6500255-eba3-3f77-acfd-626c07aa8621\n8749f79f-a30b-3c3f-8a44-dbfa682bbef1\n47286726-5dd4-4e26-bd2d-5324f429e445\n185d3943-dd15-397a-8b2e-69cd86628fb7\n2ff4f798-78d9-3384-87e9-61928aa4cb6d\n6803104a-bb06-402e-8471-e5af492db0a8\ndc9077b9-2fe0-3d18-9b97-8067ff090874\n7a2c222d-addc-30b2-aac6-596cb65a22e3\n0fb7276f-ecb5-3e5b-87a8-cc74c709c715\n3b3570b4-7b0b-3268-a571-b0889dbf40b6\ne42aa296-0e5d-4733-87ec-131a82f917bc\n19350c96-623d-4d77-af96-f8c23f00c358\n02a00399-3857-444e-8db3-a8f58489c394\n7e4d67b3-c3cc-3288-afe5-043602ea3c70\n5c0584a3-52a6-3029-b6ff-ca45a19d8aa6\na1589ae2-2678-310e-91cc-c4b512cd7fa5\n3de5b5d6-68c4-3c95-84ed-be7c83d829f8\n9d16e76e-46ae-38c6-8399-99218514afde\n2d403b7b-06e8-320c-b013-4f684ad53be2\nf77889f6-ef5a-4eed-a4cd-5d67d4a6e9c5\n1eb3360f-4c34-3310-9ce6-845ea9272c56\n5546df9c-9310-3ed5-929a-d7da19e18bf8\na1358c59-b28d-3ddb-af1c-3a5d1c394ef5\n1bd7db3a-0b42-31cf-ac1a-de88fd9fa721\na4400a38-bc38-391c-b102-ba385d7e475e\n4fcdebe7-b52f-39e7-a5bc-c664eeba5e7b\nf7d568d4-0836-3f47-b330-f8d204c4b96e\n412ccada-28df-3de2-b394-9cba3fca5bdf\n6f3dbf4b-9559-340c-a3e4-cbe655bf2059\n84c98474-28d8-309e-91c7-9cf9539825ab\nde23dfe1-c0b1-441b-810b-324090dc171b\ndeec57d0-d31b-31ec-aa75-88db5d9dadf5\ne95c8cc2-ddb3-3e7b-b8c3-e7584a778464\nc3388791-4fef-3278-a085-26121cf5f513\n45488531-3648-3e2d-8f9c-3c287032112d\n21c0472c-5ba2-3276-aad4-b9aa66cb5fa3\n98e7f0eb-4676-3120-94f1-8a790581e6a4\n28bd43de-e2b7-3c60-a626-0e525f639357\n5c0afbc0-a6ee-37c2-aebd-c1927caf7340\n5677a441-abd2-3b29-9f0b-333e181cc907\nd8192bbb-3b00-3c68-a79a-65872ea4276f\n0fdbd56a-1ff7-3624-81f9-03cd68fd5616\n6ef553eb-6dbb-3a2a-ae3e-ed7090b8826a\nbc20a6d3-2db2-3849-8843-1e1b8c93e5db\n11a84740-18a3-3798-91c5-21dc9c765350\n6a6e93f0-a130-3340-975b-b2c88b16d343\nf6107596-76e0-3064-a4a6-86332a90e539\n0f0cdd79-bc6c-35cd-9d99-7ae2fc7e165c\nd67d020a-4d28-3bfd-891d-d6aa7dcf0a69\na674e2e5-3dfd-3dd5-8503-192357b0e96c\na89557fc-1268-36e5-9cce-335f2da27bc8\n51428934-b0a7-3507-94e3-31d37bba38a3\nf849731b-d288-3bec-8f35-6bea979f7dd8\n91ac892f-d2c1-3143-b5c5-f0d4640cfc0d\nb48a15fb-2e84-34df-946f-ad72b3d7296f\ne7547e4c-1ebc-3428-8964-a5b91e81098e\n069cc46d-38bb-309d-88cf-296a3d0c0820\n8c52d911-fe34-3424-9864-d3fdfac38064\nd33f667d-7b6c-39aa-9ba9-eac2fa615ae1\naedbd525-e6df-4c0c-8be6-61c27fe58fd6\n81d2b40a-c579-3e9c-b520-bee26cda947d\n3153b5b3-d381-3664-8f82-1d3c5ca841d2\nc780d53a-2d37-3cd8-9e89-530966aef53e\n88f47a10-87b4-3ea8-a0c7-a07d825b647d\na91d4c7b-bf55-3a0e-9eba-1a43577bcca8\n25e5c600-36fe-3245-9cc0-40ef91620c22\nd5d6f11c-3026-3e0e-9d67-c111233e22de\n91aab547-1912-3b8e-8e7f-df3b202147bf\ne1d68dde-22a9-3918-a526-0850b21ff2eb\n9bb1f857-8b61-369f-a537-484c1323ae32\nb6c4361a-7dd8-32a0-83d4-7f9d2beaed08\n798354fc-30ee-36f4-83b4-f49c3b307db5\n62a1e53b-b55c-36c2-bc5b-e216d494875a\n47167c79-2ba4-369c-8db8-760a30b4c38a\n2b044433-ddc1-3580-b560-d46474934089\n380e5bf0-1c68-36a4-ac64-09a03b60bebf\n8066e267-a653-3b43-8fce-a5a780912c82\n3c56f1ef-d4df-30ae-80f3-0a5b22d4d3a6\n3fa8c20e-a4b4-3af6-b9c4-6cb96f83916d\n8e02e2db-2836-37ec-af33-a1cc2e6e49dc\na36f80a5-5edc-3842-80af-292ae639ee74\nbb9be2e6-8f0e-3bb3-8bb9-5d9aa9df384d\ndbe19bf6-93ad-372e-b96d-f7b652cdba93\n1992ed13-948e-34e6-8d9b-a3416e545a95\na47ba6a9-ffa1-3979-bb40-512339284b8b\n0a524e66-ee33-3b6c-89ef-eac1985316db\ne743b441-ea8a-36d7-8124-f14dfa13a0e6\n8d8b550e-d0be-3cbb-a371-49ec36fa619f\nc85ebc24-0934-3423-9c14-f0fdbee64b68\n65d3f43d-1969-35d4-bf86-bd5e4b1ac803\nb51561d9-08b0-3599-bc78-016f1441bb91\n8f317f00-f8b4-325e-a5c7-e4045427a610\n2772dd5f-bc0a-47ea-ae19-a5e0dbef8f41\nb98a7838-ac1f-339f-93c5-fe7f98ea8657\na146ab19-f4f3-334f-b830-fc68de83e26c\n5481321f-d317-3e80-8061-6e9c635c4ca9\n4a789b07-7578-36ec-89cd-68b01e0737fb\nf8825b65-5631-3417-8309-bd5677d694aa\n790d3c83-f6bf-348e-80e7-12f29240e598\nd26b95e4-d200-34e2-92c9-c16fda4cd9dd\n945f3b20-778a-3581-adef-544de4a089ef\n65732efc-1564-3ff8-8c7c-4239a08c0d70\n5c7ee953-d8b0-33ef-a491-0bb716763cfe\nc67f439a-f945-33cb-8517-40c9fdf60d59\n6f2f7d1e-8ded-35c5-ba83-3ca906b05127\n72c31859-3676-3cbb-a773-0591d8d5799e\n74a3e9ae-6811-4d11-a112-4c4963773cfe\nf41d0e8f-856e-3f7d-a3f9-ff5ba7c8e06d\nbd4a7d9d-14e1-3c17-873d-a74d0cd6a5d7\n490f13c4-4c1f-3e3b-8a9f-0f27c6906b4e\ned6ad297-ee09-3532-bcfc-c16ad5a05c49\n595ec33e-a1aa-3aaf-8821-8d1780db354c\n3933d1a2-f121-3c8a-8b01-7738e58c045f\ne0cfd042-ae29-3d21-bb47-81eb8f933ec8\nb1a98ad6-9b3e-35fb-afae-70b279fcbfc0\n1bf2bf1c-64d1-308f-afd1-220de9d30290\n49a9df80-ab0a-31fb-9341-a79f7b0258dd\n118a1e87-aff4-35f5-aa38-01504a63ddce\n41c3597a-aab1-3123-85a1-dd5d459af461\n9a8aea4b-9b61-3884-9f3c-84c3c36e6373\ncea5f5c2-e786-30f5-8305-baead8923063\nf03bfd11-5ba2-3bc6-ad76-4166b06491f5\ne0d2fe70-8f98-3ce2-8d8f-4268a81f7169\nda30abcf-652b-38df-a128-10942b225ec5\n5e9fc665-2353-34da-a2e7-2094ab17e790\n9b1da4e7-03a9-3277-91f3-ef6e610a6320\n067b1c50-6567-3840-ab56-1ca2a0ed9c30\n134bb8e9-9080-3bc5-948d-88d8cc034550\nb56e3f47-72a6-34e8-9ada-b4169e28e5b9\n84bb4b17-e7f2-3a1b-8c2b-6d6ec9a23e31\n7c696d35-e34f-38b0-b4b4-e88803ad1f6a\n8858428d-8fd5-3c3f-8ca4-d01f6e25e63c\n93582b51-5be1-30cd-abb0-3eac16dd6dbc\n32edd7c7-8a8f-360d-bcda-83ecf431e3e6\nbdd7e8ba-f7fa-38d1-b6bf-9dc77334fec5\neb777faa-5b76-387e-a408-90524c6f2848\n7ad46cf0-aa12-4050-ac2d-cf34b5f64d41\nc990cafc-f96c-3107-b213-01d217b11272\n61e56102-4d85-3a40-bbba-1a007c816f68\n38609ed6-2445-3df3-bd92-849d3963510e\na359e053-a350-36cf-ab1d-a7980afaffa2\n76038978-47aa-30ed-bfa1-2d63753a866c\nc654b457-11d4-393c-a638-188855c8f2e5\n5d062611-5417-3405-997c-1d1aefe4d85f\n4058d838-75cb-35e2-af7e-a51aaa833271\n6b0cc3b0-2802-33a7-b885-f1f1409345ac\nbb533c69-1e0b-341c-bedd-ff25fe9b84bf\ndebbba6b-8cb8-3ab6-adfe-54fcc6b02839\n133e2e0b-b0fe-3bb0-b1f9-c846fcfd29e8\nedf3a727-664e-38be-b990-65d34012d926\nce34ff64-0faa-3fae-a79e-985f7a5172c9\n0f257dcc-8606-3ef9-b17e-b022a3fc72c7\n614812d4-3344-3975-a1c8-4131910c4a10\nd9fd666a-8f55-38bb-8387-80fa44c29348\n03b2cf2d-fb61-36fe-936f-36bbf197a8ac\nadcf7d18-0510-35b0-a2fa-b4cea13a6d76\ne574050e-f787-3186-9686-2e9aca8102a0\nf3d1e3c3-2770-3504-a592-b62619598812\n953087a4-f704-37fe-a60f-82877e84a413\nd5d40b4c-48d9-3b68-903a-025eb0fa334d\n0749e9e0-ca52-3546-b324-d704138b11b5\ne757cddd-5ff5-305a-af11-d7c6747d3979\nd97ae2c0-b8d1-341c-94b7-f19d5fd2982a\n46d917cd-531c-330b-8d7b-979b51a8927f\n03fba633-8085-30bc-b675-687a715536ac\n6419dcfd-8777-35fa-924c-ebefccde0a9b\n855ba280-cd69-348d-9107-69e28cb8ad99\nce0575bf-c2fc-38bd-9947-ea7494a799f9\ne125bb91-dcaf-3013-9cc7-da653d7e11e1\n49d76058-b4f0-3931-86fa-de160b4c1b88\nb48d6d4b-f0dd-35da-850d-36a715691e2f\n99a3270d-c5c5-3df7-9a2d-a612c8104d0e\n7cb4b11f-3872-3825-83b5-622e1a2cdb28\ndc4d148d-f84c-307c-b2b7-f0cd7c267f57\n106d962b-911d-354d-961d-9abe93119b9c\n14bf638b-8f0d-35b2-a369-6d846b5b3892\n8aad8778-73ce-3fa0-93c7-804ac998667d\n5cf52bbe-f7f4-30c9-a4c2-a1fbb93513e4\nbee1146d-2e80-37e3-b08a-6ac8858e8973\ncdd752d0-caee-3d95-b1db-7fc20cbbc783\n9caf211e-3e6e-3996-8518-f617b9454e67\n0a8a4cfa-4902-3a76-8301-08698d6290a2\ne4279e3e-b7e1-3f43-aeef-2bfa2836dab6\n6ff3a51a-e0ab-32be-beb5-4079e56933c6\n4d324eb4-39f1-3837-9b97-c10db5d2b61d\na1537c1c-775b-3969-ae13-2e83e5a4728a\n0d9e4cff-73ff-33eb-9981-795475e62faf\n74648e09-358d-3183-9b40-278620befa40\nad319b98-6faa-3648-98bd-43afdbd20020\nb9f73e2a-292a-3876-b363-3ebb94584c7a\n7d1d720d-6708-3148-917a-b8dc78f1dcd9\nf64ed43e-417a-31ad-a322-b6108bf99a71\n4d7b84b9-0a03-3aa1-83f0-4766013c3fb1\nc96a09c8-46ed-391f-8a66-c46fa8b76029\n5c1db299-e2a2-35e5-84dd-acda8fb393bc\n194b6c89-8060-3174-b402-308f72cb1c15\n9ecbfef8-29c6-334a-b4ff-aa8201439826\nc4ea1b05-c7d5-3b59-aed1-9f3d2621ac00\nb403f8a3-4cad-333e-8557-d7da2e163f4b\n4f1b4bb2-b30b-3537-8fed-dd8f843f5adb\nd201af7e-48c8-34ad-be1c-e649af2cb5c2\n0d8aab9f-4edf-3fb3-895a-ba64e8f2cfb2\n81700b3c-2db4-3f72-935c-274d3607d6d2\n62879808-1586-4d49-80fe-2f547e355191\ndc9c2d63-083f-32c3-90ff-943ca823a245\ne331aa95-3660-3c71-be9e-030bab0b8ee2\n7c5e3704-33c8-3a4e-b032-9187a6f90206\n35a15c5c-fa4a-3838-a724-396e112ec95c\n5d55a63a-3146-32d9-89ec-e207e95ecbde\n0322b098-7e42-34db-bcec-9a4d072191e9\na2f568b5-060f-33f0-9175-7e2062d86b6c\n332b278a-a6b9-3bc3-b88c-241e4b03b4ef\n0c61aea3-3cba-35f3-8971-df42cd5b9b1a\n53f5011b-2a8f-3a73-9d86-805462bb542d\nc1a6c20c-e336-3efa-81b6-7c1242d70bd2\n7a17d467-9f29-3706-8e40-32bb7fb033de\n0ab21841-0c08-3bae-8424-daa9b336683f\neb69a196-fb43-3ddf-9bbe-9d55fa1e8200\na3876690-9d49-3c98-9421-02cfe0ccb551\n7c30c3fc-ea17-38d8-9c52-c75ccb112253\n4935629c-fd9e-3b2f-b68e-9489c89585df\n49e970c4-7364-33cb-a298-ead218e9a705\n511b93af-f16e-3195-8628-fbb972a17f74\n91923e20-9a05-32e0-ac53-8c09b0b60341\n00a6ffc1-6ce9-3bc3-a060-6006e9893a1a\nb5e6e498-54b3-37bb-b2a3-cdac33a18363\nc730e199-fb8d-3abf-b7aa-bbc81bf8c08f\nd37be0e2-8223-3eeb-a0e2-c4b75d5ff87b\n9afab336-dbae-3f70-a669-46813f4570d7\nab3d8387-8e07-37f6-a74c-cf100fb6a612\n1a10b0e6-569f-32db-95e8-10c074e353e8\n382cf8af-6c8d-3ed9-907b-12214d2c7cb0\nd842ce41-8d9c-3c0f-9c04-595d97be5140\n7cd08674-1787-37d9-9365-988df023724b\nf150d98f-0109-3380-8480-c6846fb8e9c8\nbd90cd1a-38b6-33b7-adec-ba7d4207a8c0\n52071780-5758-3ed4-8835-0d64ecdc5575\n04994d08-156c-3018-9717-ba0e29be8153\nbf360aeb-1bbd-3c1e-b143-09cf83e4f2e4\n24642607-2a51-384a-90a7-228067956d05\n78da7b7e-8ddf-3c7d-8716-eaa890106dd3\nff0dbfc5-8a7b-3a6e-8936-e5e812e45408\n78f7cb5c-9d51-34f0-b356-9b3d83263c75\n7606de8d-486c-4916-9cbb-002ee966f834\n858d739b-a0ba-35aa-bafc-4f7988bcad17\nb6e967f6-92bc-3bf5-99c9-1b0c4649fd67\nde9cf513-a0cd-3389-bc79-3f9f6f261317\n95bf6003-7068-3a78-a0c0-9e470a06e60f\nf1275002-842e-3571-8f7d-05816bc7cf56\nf292cc5c-7a90-360d-b62a-074c643bdf59\n3bffdcff-c3a7-38b6-a0f2-64196d130958\n472a240a-10cd-39cd-8681-558f7c7cf868\nadf9a841-e0db-30ab-b5b3-bf0b61658e1e\na060c4c1-b9fc-39c1-9d30-d93a124c9066\n6aaf5b08-9f84-3a2e-8a32-2e50e5e11a3c\na33a44fb-6008-3dc2-b7c5-2d27b70741e8\n9e9bcfb7-601d-3d80-bc12-ef7025174beb\n2e3f2ae7-9ab9-3aef-a3ce-a0a97a0cb1ab\nf4c94798-4d77-36ab-bdc5-c1194e5e7aff\n52971a8a-ed62-3bfd-bcd4-ca3308b594e0\n0aa4e8f5-2f9a-39a1-8f80-c2fdde4405a2\nfbee355f-8878-31fa-8ac8-b9a45a3f130a\n214e388e-cbd7-3dde-a204-d2ec42298808\n280269f9-6111-311d-b351-ce9f63f88c81\n20d47f81-46e8-3adf-a0ca-564fbb5c599d\nb2053fdc-0b94-30bc-aee7-5bc6fb7e9f52\n02678d04-cc9f-3148-9f95-1ba66347dff9\n29a00842-ead2-3050-b587-c5ef507e4125\n9a448a80-0e9a-3bf0-90f3-21750dfef55a\ne858fb96-6b1f-3025-b40a-f71fd8d28c32\nd70dae33-b4b2-36da-a4eb-345ef1c484cc\n386c34fc-ff56-371c-9288-6ba42620f23a\naaed41a5-47f2-3e0a-9645-2dbd871f744f\nbe0615bc-1d82-334b-9c98-6adf40406955\n4abe4fc9-183a-3ec1-9434-bc74fb724c0f\nb40c0cbf-5d35-30df-9f63-de088ada278e\n67be173f-28a9-3bcc-b110-4b81dfe3bf5e\nf554d503-4901-3b97-9516-a16398c66631\n399064b4-6df3-3de8-8793-2738f8723ee3\nae908cc4-7301-3390-8940-eb9b679a8a39\na86ee261-b86b-34f7-92ab-be8367d1fc4c\n3503b283-fbcd-3835-8779-0cb2b7ef55b0\n1ca5291b-3178-3a93-a117-001497899b79\nfb207d3b-d2d5-3100-94c0-9145aebc770b\nf7c4cf87-6bab-3723-bd74-1c9ac5add9cb\n65f1eefa-cbc3-3d53-9991-dc0500ae9183\n9320afa3-ed05-3364-a017-ae7ddc5d26c7\nb248d26b-9c48-3d5f-bda1-a05ec99c2d97\n7c4e5ad1-d604-3e44-81ae-68f7bfe21d27\ne4221cc6-a19d-31ca-bf94-031adb0ea390\n6784f175-e69d-3802-99df-d21ec2081878\n97ae6596-a903-3045-836b-34f8206c6cfe\n48c9cd36-68bf-3bb9-ab95-5e0a6fee61ab\nb42dc943-8b33-3b79-a260-14eb9f58a991\ncf79d751-5d2a-3d5c-96a2-bb8d603f21e0\nc2bbb391-a453-36af-b987-9d15f46b8589\n803c44cc-e1de-3797-9b5f-15324a1604f8\naf8471e6-6780-3df2-bc6a-1982a4b1b437\n4e6d6bcd-8718-3e71-b9c1-7c352c991a56\n6b6b2e8b-3f4d-3b7d-acaa-8f970cb12adb\na7f532a3-87de-3129-8864-258396fd0b50\nb7cbdba9-18ac-393a-8352-4841ffee722e\n557dd6a4-2b80-3264-9c13-f70094526174\nd029a394-7118-33c9-896d-eabb894f58c4\n8ee606e6-4cbd-3c07-8419-fbda836ccaac\nab8c747b-b9cb-3835-a275-54c56cb9a469\n3e707e96-ad84-3e68-bea5-2f9ac502a2d9\n1a7e18b5-d8dc-371d-be5f-03a37b113e81\n7df1f32e-f059-3ac4-9d57-213f2f69b8b4\n8e5442cf-8882-3b94-bc47-18fcad84bb20\n3c27dfaf-1624-39d2-9075-158824ed8e8c\nff8e7fdb-1073-3592-ba5e-8111bc3ce48b\n770a58e6-eff6-39b7-a265-fe7f202fe8b2\nff52c01e-3d7b-32b1-b6a1-bcff3459ccdd\n47358aac-2ec0-3d45-a837-f2069ca7cee3\n18bdf01b-6ba6-30a8-a707-1f1458529d3d\n71283e26-905b-3811-b9e0-c10c0253769b\nd0ba7a1b-f5ca-39d6-98d0-29c671baec65\n29080565-8133-3274-80cf-6ea98078e50d\n06e5ac08-f4cb-34ae-9406-3496f7cadc62\n83faae69-e37e-4804-b7a9-684d4a900320\ne4d53680-f7ef-364b-91a4-00e5aa91ab9b\nc94991c0-3662-3936-972c-1af63db486d8\n79f3de22-c643-3e97-96d5-f77274a458c0\n5bd6bd4d-3c89-3794-9935-2d044ce6ef37\nf3cc42c7-84a8-35c5-8683-13878bb9beeb\n9a25fd14-783b-35c3-ab2d-df4687f82b5e\n7ccdda39-69b1-36d1-89c8-2acc3823264b\n71d95611-9032-3787-a66e-e26313b08d46\nb5a1b0b0-a7fc-3a47-af82-9b25a81a8c0b\ndd251cc5-736d-3b76-8ad3-3f6cb138178e\n8a0ff1a2-9045-3be3-b67f-3914d88178ec\n080b1ce2-9477-39ee-8233-b7f33e1dfe56\n3dd173a6-8b21-3189-bd53-132919b96a48\n2fff4135-98ec-3b82-a330-b73d8afdf36c\n42c8449f-6e6d-3980-b54a-805eba6621c4\na1c1d559-0480-39d2-94f0-1a89f0226c4f\ncd2353c2-0fb6-3e18-8281-4c0df1d3189a\nd20c3612-a64d-3aa8-bd4a-58890413afbb\n2716d83e-8c4f-39a3-a2a3-d5e255fe8a03\nea6895f2-504b-37b5-bfd0-cbf7017f22c3\n3c3ed78e-1fcf-30ec-9e19-9bf142e2621d\n20b00c37-4fe4-31dc-a258-dae253ae6992\nc2f301b6-5d19-3296-a8ac-418ff48e052b\nfac8a63c-6b75-39d0-9f57-4344fde0f794\n256c185c-284a-343e-93f4-894eed474edd\n8c54e429-a3de-3eb3-96f3-d3127e2cc18f\n41d69427-364c-366e-94a5-8e556bcac39f\nb436606f-daa4-337f-8103-4360bf4704d9\nf84b4941-8e99-3957-b6f6-db1590338cf6\n11420316-aec9-3ad9-8b4a-d618bcd180e9\n48a52b7b-9391-3728-84f1-9aa6ca336214\nf4d1a3c3-5002-336b-a67f-775b3725237e\n2b6d18dc-4c95-3301-a498-3ed152798d5b\nab83611b-436e-3de7-aad1-f0c9ad254196\n389069d7-e6db-3d22-9328-e228c002bf75\ne123ba3f-99bd-3039-b6e7-8c62eaebf9c2\n12c3c14b-9cf2-3434-9a5d-e0bfa332f6ce\n7da33189-2698-3a98-b038-b0e5a271ee96\n3a789fb0-5cd2-3710-b8ea-f32fce38e3ca\n7d3f2f76-2f4f-3762-bf0f-f94f79eb0404\n16af3863-0d31-3cd1-8fa2-58053ffb953a\n80da8956-f418-319c-9f49-3d47d9002546\neb222d5d-0052-3ce7-9b87-19e09054a2c0\nb28a3715-4624-3a54-9652-b8f0b293a5a8\n3b2e6033-f37f-3a73-9fab-88317b9b6095\na4e62775-131f-37c4-9239-c38e3b254dad\nf110598d-7e01-3ed7-a227-4e958987a31f\n40870b19-3356-3e8e-a4a4-9f34eef8ea30\n47972731-b0ea-3c38-a10f-5ffdd42329fc\n991d11df-0265-3e41-b942-5b0c615d21e2\nb81922e7-092f-3052-8cd1-fec6a6763295\nc858bd6a-81ab-3f54-b46d-ffc091ef6945\n9807c577-0dc0-3116-864b-cf46a1276389\na6817756-af01-32ec-829f-d9e56ef7b6e8\n95312039-73b9-35a2-9aec-905494a4f7f0\n5d333477-796b-3e49-bf41-0cdbed39c8dd\n86519a39-4ce9-3d0b-a3f9-dd9aa26a2b25\n76c3f58f-9003-3bdb-90a3-b87cfbfa1c3b\nb213af37-7d89-342d-ae39-8a3c72159a01\ncae56e40-8470-3c9c-af75-6e444189488f\ne50e7698-de3d-355f-aca2-eddd09c09533\n4c33fc38-5e59-34f8-96ba-4e5a404d3988\n44adf4c4-6064-362f-94d3-323ed42cfda9\nda036982-92bf-36a8-b880-4ccf4e20b74e\nbdb9d309-f14b-3ff6-ad1f-5d3f3f95a13e\n0b5142c1-420b-3fea-9e98-b87327ae22c6\n0c3bad78-9f1e-395d-a376-2eb7499229fd\nbf382949-3515-3c16-b505-319442937a43\n19f53e16-9f99-3035-9672-7e860f3b0048\ne13c06cb-cd01-380e-946f-6d92ac1af49d\n1f434d15-8745-3fba-9c3e-ccb026688397\n1da4a0aa-22ae-3958-856d-05303de1f576\nf3cd0d0d-8b71-3266-9732-d9f0d5778eb6\n96dd6923-994c-3afe-9830-b15bdfd60f64\n6fa5051b-0220-3e04-8ae3-7a199c2f5877\n32835bfa-e53b-3526-9ec0-b0efcd11cbdf\n4fae2ef6-7112-309a-b926-448a5a3e1802\na9a3d5d7-e0c6-3f24-af35-2acadc1aa2d9\n1c8648f9-e7a1-3056-a2c0-19c8827a6a50\nc45888cf-30f5-3e27-abeb-4f55caecc1f0\nd4c7aa45-dfd6-3d71-bb8a-40efd5110d3b\n34c79495-dbdf-393d-bcc6-e6f92f797628\n87e61f5a-083c-305e-9ff4-5f699e85900a\ne95e20d1-7f04-34b9-9105-4333f11bf6b9\nb0116f1c-f88f-3c09-b4bf-fc3c8ebeda56\n9da07440-1001-3b00-a29f-c8bdc2f2b7d4\n2ee0eda7-151a-3957-bab5-1e5370192122\nc91f95de-d041-32f6-8b18-628a220be100\nc6b7a5fb-8cd8-3ee2-8e99-b788eb02e731\n"
  },
  {
    "path": "tools/data_converter/av2_val_split.txt",
    "content": "22dcf96c-ef5e-376b-9db5-dc9f91040f5e\n5b1d8b11-4f90-3577-be0b-193e102fda82\n3f9796e9-c892-3915-b719-3292df878ece\nb5a7ff7e-d74a-3be6-b95d-3fc0042215f6\n4d73c4eb-5de9-300c-b34f-ff5d0af89653\ne40d67c5-3749-397e-aa2a-7dfe576a31b0\nb43d449e-daaf-33a1-bb7f-3f7a0b5f056c\n69c0ec7c-e289-3c4d-ade3-d2287ec34026\nf5a3ee79-a131-3f8a-91e9-a6475d778149\nd3dc783e-663a-31b1-bd85-46e04ca693db\n9239d493-31d7-3dd0-a05f-03d50a242392\n9946b521-ea55-3c52-9fd1-71afc3abf3c6\n1579b300-e7f5-3318-97c2-2c827b0c411e\n14f5485e-7417-3a5b-9be3-ec88461d03d4\n41e31361-569b-3ed8-bafd-2308b7a9377e\n4207ef92-0b3b-4708-8868-4ffcaef308e0\n5d40499f-c9be-38b9-a0cb-cd234850ba85\n73d86f1c-5e5c-3842-b671-7f29c78ccc55\nfa708289-f2b2-399e-989e-53f83fa379c5\nd1695c5e-08a9-44fd-8f45-93c23f700c8b\ndfc6d65f-20f5-389d-a5cd-81c1c7ecb11f\n2c652f9e-8db8-3572-aa49-fae1344a875b\n4a78c5db-041b-347b-9821-ceb82f99e3f8\n677c7bcc-f29b-34ae-a91d-74cb863117c8\n2a930061-3d8c-3915-8aac-f81199db95d8\nb6c86134-d7e6-3af6-9db5-8aba3df4f7a7\nb1527e96-5a5d-3adc-a893-314ab3a6012e\na4087bac-8194-4c9e-8b2d-4bda58773a3c\n58d01358-5927-36fa-9e11-d18d1dc1f4f0\n87ce1d90-ca77-363b-a885-ec0ef6783847\n460324ea-c769-38db-bba9-044643c8780e\n074d2237-ed1b-34d7-a2fc-68edbce50bb2\ne94f58d9-177b-31be-aa05-e6dd10d04124\nc453a8e7-d3da-317a-946b-f8e9678a8582\n31f062b7-dd17-3e7e-945d-198e91597de9\nff6adc87-5f47-32f7-b36a-546453c0e332\n3c58172c-7a07-3ad4-bdf6-7cae60928c56\n11995cbe-e076-3a35-910d-1e56ecf2c3c8\n8feb3dbe-4450-3aeb-b22b-e65128aa696b\n3cd2847c-604e-32b4-af19-6cd0da0dcdc5\n8de6abb6-6589-3da7-8e21-6ecc80004a36\n04973bcf-fc64-367c-9642-6d6c5f363b61\na4f72852-c2ff-35d3-8375-e52055508240\nb9b1564c-66d0-4597-a664-2735cf2ffd04\nbffb0c9e-5e3a-3251-ab5e-299491b53cbf\n0b1b993a-68b3-3232-9afa-fc9942b5b79b\n3b68c074-1680-3a93-92e5-5b711406f2fe\nc049334b-5568-3ca0-9b28-0c09d00b7bb3\nad870270-f3d8-3790-866a-78d61b5b76ee\n78cbd619-8ded-35b8-87a1-38c4f4aeb82d\n7ce85124-312b-35f0-a1a2-32206f75a947\n5426cd2f-f4b9-3660-99d2-6617bb0f1b26\n349c4c1c-9561-360f-9ae7-59772335d54b\n96284bbc-6b58-330f-a5a6-76cd518543f0\nef625e46-d0d4-38b9-9403-5614e7b39ec8\ncd83b7cd-e2e7-34f6-bee7-1ff5ca3ed665\nadc1fad7-de31-371f-810b-140576d9accc\na98c14bf-bf01-3ae5-992d-ea9f0a18e3c7\n0b324587-6097-3f92-a07a-a44f48c85d9e\n35f32393-e82f-3b20-b214-1f6a43d60f23\n36b38cbf-f6c5-3a12-8e7a-eb281cc9c2fc\nb9fcb487-363e-30a7-a316-a42dd81d8fe5\nc222c78d-b574-4b9d-82e1-96a4f3f8bb27\n51bbdd4d-3065-34ae-b369-b6e0444f34db\nba67827f-6b99-3d2a-96ab-7c829eb999bb\nd5fa4d54-74ba-369c-a758-636441ad7f07\n5f278cdd-ca28-3c53-8f5c-04e62308811d\nc865c156-0f26-411c-a16c-be985333f675\n7b7f86ca-b430-3872-a131-ff5b4a6b5dcf\n6da5d01e-54a7-3d7a-b86b-e0d6f8d3971d\n3fca5366-2b2c-387b-b63c-7ae8f9e0cec1\n5b614cfd-21c2-3b03-94c8-2a6c6bee166c\n416f2e1c-0ffd-3089-97d2-0514b818f8d1\n0c6e62d7-bdfa-3061-8d3d-03b13aa21f68\n7e48bba5-438c-3813-9ce2-97c98868afed\n756f4ed0-5352-31e4-b3c6-2841b9e779d7\n6d3bfbc9-45dc-316e-a94c-a441371d0571\nfd5c6932-2ee2-3cfb-9bdc-0b30bfb33a91\n4a60c567-f167-3890-aa7e-01e75ccc40e0\n87918291-e9ba-3759-be1a-4c874ca40997\n4487b659-692e-3b35-9d1e-a230279ed646\naa539866-29e4-353e-95a9-b6d321b53b33\ndf1935dc-1e5f-3f4d-bdcb-e6c2bcb07667\n087695bd-c662-3e86-83b4-aedc3b8eec36\n19711b73-c43b-3922-be61-8c44df707a7d\n1886b0d1-9c5e-326f-99df-30b64044638f\nb6642e23-d100-3680-8882-9f3b753b2eef\n89f79c55-6698-3037-bd2e-d40c81af169a\nca4144fb-10e5-3895-836f-87001f59ac65\ndc3d4b79-6cd8-324b-bc70-cbd0e2a066da\n28617035-7557-3cb9-99c2-754f72fd34b4\n924116d9-0a48-3d97-b8c9-0d16b087c16a\n3e7c4d87-dba1-3e22-a303-4f402f89cd20\n2451c219-3002-3b2e-8fa9-2b7fea168b3b\n5d9c1080-e6e9-3222-96a2-37ca7286a874\naa105408-2974-35e7-ae76-35060cfde21a\n9efe1171-6faf-3427-8451-8f6469f7678e\n9441ffdd-f06e-36e0-839e-b836b0f19bc9\n20f785b0-e11a-3757-be79-b0731286c998\n9fd55542-e982-361f-814f-61ad4ad07adf\n2583a8ee-867d-3db6-b039-35b913fb8f70\nb275d09d-9da2-380b-a748-528ee28bc9af\ne10475f7-0d56-3a75-870d-d4206fa165d7\n120d7ac7-cce3-359e-a19c-1b9c0abd6be2\n226199ab-c791-32a7-8bab-ab92878eb199\nadbb2a17-a503-32cd-a9ed-b523b3e4da0b\nb8ce75e5-c1d2-3447-9249-70ab3d42389f\n3b60751b-7a71-3a47-a743-96b96f0d9b2b\n285ac213-8caf-31a4-b0fa-c240580f7f69\n988ab841-c422-3d08-bb52-a09f8fdb6ab2\n2ec904db-41aa-397c-a1e3-2e2ca0c8e8fb\n5fe10166-ab1e-36d5-aa2b-c0d6f680f2c7\nba737c78-2ef2-3643-a5b2-4804dfff9d93\n0526e68e-2ff1-3e53-b0f8-45df02e45a93\n8934694e-8085-3673-96dd-eacebe691ed1\n070bbf42-31d3-3aa9-aca4-c262afc9077d\n5f8f4a26-59b1-3f70-bcab-b5e3e615d3bc\n7de2e535-81df-3d5f-a5ca-62e4b940eb54\ncd22abca-9150-3279-87a4-cb00ba517372\nd89f80be-76d0-3853-8daa-76605cf4ce5e\na7636fca-4d9e-3052-bef2-af0ce5d1df74\nfbd62533-2d32-3c95-8590-7fd81bd68c87\n7a8ec82c-1149-308b-8a12-477460843f35\ne35a6aae-3608-38a7-b6e9-b5d6108b921d\n5ea3cd9c-15d0-3b80-9cc4-02c8b5ad523a\ned5fc860-c172-39c5-91c0-d712957fb1cd\nf2325996-961e-3f63-bbc0-44b7e76aeac9\n7905533a-694b-35db-b39f-aec9e33fb3de\nc83da752-b12f-3fbd-b728-4abb9551723b\nb2d9d8a5-847b-3c3b-aed1-c414319d20af\n131bd3d9-4f85-3ba3-b569-eb88308d79d5\ne596b305-c951-3081-ae02-85406a473840\neb142141-683a-3a6d-a207-0302b1ff260d\nfdc0f552-4976-36a6-8691-9a8c6a5ba389\ne68d1f0d-eb44-3751-975d-f80609f695ae\n6ee06433-4820-3211-999a-95b79b2c692e\n937093d8-7966-3df3-b334-0835595412b6\n8940f5f1-13e0-3094-99ba-da2d17639774\n919f13de-857f-3b1c-9f8e-7cbe500a60ae\n5f5a25ff-ea07-3133-b5c6-26fada93f90f\nc93a30c8-168c-386c-a25a-cbd8d8410fbe\ne2e921fe-e489-3656-a0a2-5e17bd399ddf\n27be7d34-ecb4-377b-8477-ccfd7cf4d0bc\n9282db22-c361-3456-a7b5-414959f5f25e\nd70660da-4250-3ad1-a2d0-6a2d97b5379f\n840b2b3f-5f52-32ae-b833-ad030063533d\n87621780-827a-3df5-8fa5-a94267d2d807\n307e27f2-6442-39a2-b62c-1e3d000cebaf\n84ed050c-635f-36ec-9c28-8a0c10f5cf11\n"
  },
  {
    "path": "tools/data_converter/nusc_split.py",
    "content": "TRAIN_SCENES = [\n    \"scene-0002\", \"scene-0003\", \"scene-0004\", \"scene-0005\", \"scene-0006\", \n    \"scene-0007\", \"scene-0008\", \"scene-0009\", \"scene-0012\", \"scene-0013\", \n    \"scene-0014\", \"scene-0015\", \"scene-0016\", \"scene-0017\", \"scene-0018\", \n    \"scene-0019\", \"scene-0021\", \"scene-0022\", \"scene-0023\", \"scene-0024\", \n    \"scene-0025\", \"scene-0026\", \"scene-0027\", \"scene-0028\", \"scene-0029\", \n    \"scene-0030\", \"scene-0031\", \"scene-0032\", \"scene-0033\", \"scene-0034\", \n    \"scene-0035\", \"scene-0036\", \"scene-0039\", \"scene-0042\", \"scene-0043\", \n    \"scene-0044\", \"scene-0045\", \"scene-0046\", \"scene-0047\", \"scene-0048\", \n    \"scene-0049\", \"scene-0050\", \"scene-0051\", \"scene-0052\", \"scene-0055\", \n    \"scene-0056\", \"scene-0057\", \"scene-0058\", \"scene-0059\", \"scene-0060\", \n    \"scene-0061\", \"scene-0062\", \"scene-0063\", \"scene-0064\", \"scene-0065\", \n    \"scene-0066\", \"scene-0067\", \"scene-0068\", \"scene-0069\", \"scene-0070\", \n    \"scene-0071\", \"scene-0072\", \"scene-0073\", \"scene-0074\", \"scene-0075\", \n    \"scene-0076\", \"scene-0092\", \"scene-0093\", \"scene-0094\", \"scene-0095\", \n    \"scene-0096\", \"scene-0097\", \"scene-0098\", \"scene-0099\", \"scene-0100\", \n    \"scene-0101\", \"scene-0102\", \"scene-0103\", \"scene-0104\", \"scene-0105\", \n    \"scene-0106\", \"scene-0107\", \"scene-0108\", \"scene-0109\", \"scene-0110\", \n    \"scene-0120\", \"scene-0123\", \"scene-0124\", \"scene-0125\", \"scene-0126\", \n    \"scene-0127\", \"scene-0128\", \"scene-0129\", \"scene-0130\", \"scene-0131\", \n    \"scene-0132\", \"scene-0133\", \"scene-0134\", \"scene-0135\", \"scene-0138\", \n    \"scene-0149\", \"scene-0150\", \"scene-0151\", \"scene-0154\", \"scene-0155\", \n    \"scene-0157\", \"scene-0158\", \"scene-0159\", \"scene-0161\", \"scene-0162\", \n    \"scene-0163\", \"scene-0164\", \"scene-0165\", \"scene-0166\", \"scene-0167\", \n    \"scene-0168\", \"scene-0170\", \"scene-0171\", \"scene-0172\", \"scene-0173\", \n    \"scene-0174\", \"scene-0175\", \"scene-0176\", \"scene-0177\", \"scene-0178\", \n    \"scene-0179\", \"scene-0180\", \"scene-0181\", \"scene-0182\", \"scene-0183\", \n    \"scene-0185\", \"scene-0187\", \"scene-0188\", \"scene-0190\", \"scene-0191\", \n    \"scene-0192\", \"scene-0193\", \"scene-0194\", \"scene-0195\", \"scene-0196\", \n    \"scene-0199\", \"scene-0200\", \"scene-0202\", \"scene-0203\", \"scene-0204\", \n    \"scene-0206\", \"scene-0207\", \"scene-0208\", \"scene-0209\", \"scene-0210\", \n    \"scene-0211\", \"scene-0212\", \"scene-0213\", \"scene-0214\", \"scene-0218\", \n    \"scene-0219\", \"scene-0220\", \"scene-0221\", \"scene-0222\", \"scene-0224\", \n    \"scene-0225\", \"scene-0226\", \"scene-0227\", \"scene-0228\", \"scene-0229\", \n    \"scene-0230\", \"scene-0231\", \"scene-0232\", \"scene-0233\", \"scene-0234\", \n    \"scene-0235\", \"scene-0236\", \"scene-0237\", \"scene-0238\", \"scene-0239\", \n    \"scene-0240\", \"scene-0241\", \"scene-0242\", \"scene-0243\", \"scene-0244\", \n    \"scene-0245\", \"scene-0246\", \"scene-0247\", \"scene-0248\", \"scene-0249\", \n    \"scene-0250\", \"scene-0251\", \"scene-0252\", \"scene-0253\", \"scene-0254\", \n    \"scene-0255\", \"scene-0256\", \"scene-0257\", \"scene-0258\", \"scene-0259\", \n    \"scene-0260\", \"scene-0261\", \"scene-0262\", \"scene-0263\", \"scene-0264\", \n    \"scene-0268\", \"scene-0270\", \"scene-0271\", \"scene-0272\", \"scene-0273\", \n    \"scene-0274\", \"scene-0275\", \"scene-0276\", \"scene-0277\", \"scene-0278\", \n    \"scene-0283\", \"scene-0284\", \"scene-0285\", \"scene-0286\", \"scene-0287\", \n    \"scene-0288\", \"scene-0289\", \"scene-0290\", \"scene-0291\", \"scene-0292\", \n    \"scene-0293\", \"scene-0294\", \"scene-0295\", \"scene-0296\", \"scene-0297\", \n    \"scene-0298\", \"scene-0299\", \"scene-0300\", \"scene-0301\", \"scene-0302\", \n    \"scene-0303\", \"scene-0304\", \"scene-0305\", \"scene-0306\", \"scene-0315\", \n    \"scene-0316\", \"scene-0317\", \"scene-0318\", \"scene-0321\", \"scene-0323\", \n    \"scene-0324\", \"scene-0328\", \"scene-0329\", \"scene-0330\", \"scene-0331\", \n    \"scene-0332\", \"scene-0344\", \"scene-0345\", \"scene-0346\", \"scene-0349\", \n    \"scene-0350\", \"scene-0351\", \"scene-0352\", \"scene-0353\", \"scene-0354\", \n    \"scene-0355\", \"scene-0356\", \"scene-0357\", \"scene-0358\", \"scene-0359\", \n    \"scene-0360\", \"scene-0361\", \"scene-0362\", \"scene-0363\", \"scene-0364\", \n    \"scene-0365\", \"scene-0367\", \"scene-0370\", \"scene-0371\", \"scene-0372\", \n    \"scene-0373\", \"scene-0374\", \"scene-0375\", \"scene-0376\", \"scene-0377\", \n    \"scene-0379\", \"scene-0380\", \"scene-0381\", \"scene-0382\", \"scene-0383\", \n    \"scene-0384\", \"scene-0385\", \"scene-0386\", \"scene-0388\", \"scene-0399\", \n    \"scene-0400\", \"scene-0401\", \"scene-0402\", \"scene-0403\", \"scene-0405\", \n    \"scene-0406\", \"scene-0407\", \"scene-0408\", \"scene-0420\", \"scene-0421\", \n    \"scene-0422\", \"scene-0423\", \"scene-0424\", \"scene-0425\", \"scene-0426\", \n    \"scene-0427\", \"scene-0428\", \"scene-0429\", \"scene-0430\", \"scene-0431\", \n    \"scene-0432\", \"scene-0433\", \"scene-0434\", \"scene-0435\", \"scene-0436\", \n    \"scene-0437\", \"scene-0438\", \"scene-0439\", \"scene-0440\", \"scene-0441\", \n    \"scene-0442\", \"scene-0443\", \"scene-0444\", \"scene-0445\", \"scene-0446\", \n    \"scene-0447\", \"scene-0448\", \"scene-0449\", \"scene-0450\", \"scene-0451\", \n    \"scene-0452\", \"scene-0453\", \"scene-0454\", \"scene-0455\", \"scene-0456\", \n    \"scene-0457\", \"scene-0458\", \"scene-0459\", \"scene-0461\", \"scene-0462\", \n    \"scene-0463\", \"scene-0464\", \"scene-0465\", \"scene-0467\", \"scene-0468\", \n    \"scene-0469\", \"scene-0471\", \"scene-0472\", \"scene-0474\", \"scene-0475\", \n    \"scene-0476\", \"scene-0477\", \"scene-0478\", \"scene-0479\", \"scene-0480\", \n    \"scene-0499\", \"scene-0500\", \"scene-0501\", \"scene-0502\", \"scene-0504\", \n    \"scene-0505\", \"scene-0506\", \"scene-0507\", \"scene-0508\", \"scene-0509\", \n    \"scene-0510\", \"scene-0511\", \"scene-0512\", \"scene-0513\", \"scene-0514\", \n    \"scene-0515\", \"scene-0517\", \"scene-0518\", \"scene-0519\", \"scene-0520\", \n    \"scene-0521\", \"scene-0522\", \"scene-0523\", \"scene-0524\", \"scene-0552\", \n    \"scene-0553\", \"scene-0554\", \"scene-0555\", \"scene-0559\", \"scene-0560\", \n    \"scene-0561\", \"scene-0562\", \"scene-0563\", \"scene-0564\", \"scene-0565\", \n    \"scene-0584\", \"scene-0585\", \"scene-0586\", \"scene-0587\", \"scene-0588\", \n    \"scene-0589\", \"scene-0590\", \"scene-0591\", \"scene-0592\", \"scene-0593\", \n    \"scene-0594\", \"scene-0595\", \"scene-0596\", \"scene-0597\", \"scene-0598\", \n    \"scene-0599\", \"scene-0600\", \"scene-0625\", \"scene-0626\", \"scene-0627\", \n    \"scene-0629\", \"scene-0630\", \"scene-0632\", \"scene-0633\", \"scene-0634\", \n    \"scene-0635\", \"scene-0636\", \"scene-0637\", \"scene-0638\", \"scene-0639\", \n    \"scene-0640\", \"scene-0652\", \"scene-0653\", \"scene-0654\", \"scene-0655\", \n    \"scene-0656\", \"scene-0657\", \"scene-0658\", \"scene-0659\", \"scene-0660\", \n    \"scene-0661\", \"scene-0662\", \"scene-0663\", \"scene-0664\", \"scene-0665\", \n    \"scene-0666\", \"scene-0667\", \"scene-0668\", \"scene-0669\", \"scene-0670\", \n    \"scene-0671\", \"scene-0672\", \"scene-0673\", \"scene-0674\", \"scene-0675\", \n    \"scene-0676\", \"scene-0677\", \"scene-0678\", \"scene-0679\", \"scene-0681\", \n    \"scene-0683\", \"scene-0684\", \"scene-0685\", \"scene-0686\", \"scene-0687\", \n    \"scene-0688\", \"scene-0689\", \"scene-0695\", \"scene-0696\", \"scene-0697\", \n    \"scene-0698\", \"scene-0700\", \"scene-0701\", \"scene-0703\", \"scene-0704\", \n    \"scene-0705\", \"scene-0706\", \"scene-0707\", \"scene-0708\", \"scene-0709\", \n    \"scene-0710\", \"scene-0711\", \"scene-0712\", \"scene-0713\", \"scene-0714\", \n    \"scene-0715\", \"scene-0716\", \"scene-0717\", \"scene-0718\", \"scene-0719\", \n    \"scene-0726\", \"scene-0727\", \"scene-0728\", \"scene-0730\", \"scene-0731\", \n    \"scene-0733\", \"scene-0734\", \"scene-0735\", \"scene-0736\", \"scene-0737\", \n    \"scene-0738\", \"scene-0780\", \"scene-0781\", \"scene-0782\", \"scene-0783\", \n    \"scene-0784\", \"scene-0786\", \"scene-0787\", \"scene-0789\", \"scene-0790\", \n    \"scene-0791\", \"scene-0792\", \"scene-0802\", \"scene-0806\", \"scene-0808\", \n    \"scene-0809\", \"scene-0810\", \"scene-0811\", \"scene-0812\", \"scene-0813\", \n    \"scene-0815\", \"scene-0816\", \"scene-0817\", \"scene-0819\", \"scene-0820\", \n    \"scene-0821\", \"scene-0822\", \"scene-0847\", \"scene-0848\", \"scene-0849\", \n    \"scene-0850\", \"scene-0851\", \"scene-0852\", \"scene-0853\", \"scene-0854\", \n    \"scene-0855\", \"scene-0856\", \"scene-0858\", \"scene-0860\", \"scene-0861\", \n    \"scene-0862\", \"scene-0863\", \"scene-0864\", \"scene-0865\", \"scene-0866\", \n    \"scene-0868\", \"scene-0869\", \"scene-0870\", \"scene-0871\", \"scene-0872\", \n    \"scene-0873\", \"scene-0875\", \"scene-0876\", \"scene-0877\", \"scene-0878\", \n    \"scene-0880\", \"scene-0882\", \"scene-0883\", \"scene-0884\", \"scene-0885\", \n    \"scene-0886\", \"scene-0887\", \"scene-0888\", \"scene-0889\", \"scene-0890\", \n    \"scene-0891\", \"scene-0892\", \"scene-0893\", \"scene-0894\", \"scene-0895\", \n    \"scene-0896\", \"scene-0897\", \"scene-0898\", \"scene-0899\", \"scene-0900\", \n    \"scene-0901\", \"scene-0902\", \"scene-0903\", \"scene-0904\", \"scene-0905\", \n    \"scene-0906\", \"scene-0907\", \"scene-0908\", \"scene-0909\", \"scene-0916\", \n    \"scene-0917\", \"scene-0921\", \"scene-0922\", \"scene-0923\", \"scene-0925\", \n    \"scene-0926\", \"scene-0927\", \"scene-0928\", \"scene-0929\", \"scene-0930\", \n    \"scene-0931\", \"scene-0945\", \"scene-0947\", \"scene-0949\", \"scene-0952\", \n    \"scene-0953\", \"scene-0955\", \"scene-0956\", \"scene-0957\", \"scene-0958\", \n    \"scene-0959\", \"scene-0960\", \"scene-0961\", \"scene-0966\", \"scene-0967\", \n    \"scene-0968\", \"scene-0969\", \"scene-0971\", \"scene-0972\", \"scene-0975\", \n    \"scene-0976\", \"scene-0977\", \"scene-0978\", \"scene-0979\", \"scene-0980\", \n    \"scene-0981\", \"scene-0982\", \"scene-0983\", \"scene-0984\", \"scene-0988\", \n    \"scene-0989\", \"scene-0990\", \"scene-0991\", \"scene-0992\", \"scene-0994\", \n    \"scene-0995\", \"scene-0996\", \"scene-0997\", \"scene-0998\", \"scene-0999\", \n    \"scene-1000\", \"scene-1001\", \"scene-1004\", \"scene-1005\", \"scene-1006\", \n    \"scene-1007\", \"scene-1008\", \"scene-1009\", \"scene-1010\", \"scene-1011\", \n    \"scene-1012\", \"scene-1013\", \"scene-1014\", \"scene-1015\", \"scene-1019\", \n    \"scene-1020\", \"scene-1021\", \"scene-1022\", \"scene-1023\", \"scene-1024\", \n    \"scene-1025\", \"scene-1044\", \"scene-1045\", \"scene-1046\", \"scene-1047\", \n    \"scene-1048\", \"scene-1049\", \"scene-1050\", \"scene-1051\", \"scene-1052\", \n    \"scene-1053\", \"scene-1054\", \"scene-1064\", \"scene-1065\", \"scene-1066\", \n    \"scene-1067\", \"scene-1068\", \"scene-1069\", \"scene-1070\", \"scene-1071\", \n    \"scene-1072\", \"scene-1073\", \"scene-1074\", \"scene-1075\", \"scene-1076\", \n    \"scene-1077\", \"scene-1078\", \"scene-1079\", \"scene-1080\", \"scene-1081\", \n    \"scene-1082\", \"scene-1083\", \"scene-1084\", \"scene-1085\", \"scene-1086\", \n    \"scene-1087\", \"scene-1088\", \"scene-1089\", \"scene-1090\", \"scene-1091\", \n    \"scene-1092\", \"scene-1093\", \"scene-1094\", \"scene-1095\", \"scene-1096\", \n    \"scene-1097\", \"scene-1098\", \"scene-1099\", \"scene-1100\", \"scene-1101\", \n    \"scene-1102\", \"scene-1104\", \"scene-1105\", \"scene-1106\", \"scene-1107\", \n    \"scene-1108\", \"scene-1109\", \"scene-1110\"]\n\nVAL_SCENES = [\n    \"scene-0001\", \"scene-0010\", \"scene-0011\", \"scene-0020\", \"scene-0038\", \n    \"scene-0041\", \"scene-0053\", \"scene-0054\", \"scene-0121\", \"scene-0122\", \n    \"scene-0139\", \"scene-0152\", \"scene-0160\", \"scene-0184\", \"scene-0269\", \n    \"scene-0347\", \"scene-0348\", \"scene-0366\", \"scene-0368\", \"scene-0369\", \n    \"scene-0378\", \"scene-0389\", \"scene-0390\", \"scene-0391\", \"scene-0392\", \n    \"scene-0393\", \"scene-0394\", \"scene-0395\", \"scene-0396\", \"scene-0397\", \n    \"scene-0398\", \"scene-0411\", \"scene-0412\", \"scene-0413\", \"scene-0414\", \n    \"scene-0415\", \"scene-0416\", \"scene-0417\", \"scene-0418\", \"scene-0419\", \n    \"scene-0525\", \"scene-0526\", \"scene-0527\", \"scene-0528\", \"scene-0529\", \n    \"scene-0530\", \"scene-0531\", \"scene-0532\", \"scene-0533\", \"scene-0534\", \n    \"scene-0535\", \"scene-0536\", \"scene-0537\", \"scene-0538\", \"scene-0539\", \n    \"scene-0541\", \"scene-0542\", \"scene-0543\", \"scene-0544\", \"scene-0545\", \n    \"scene-0546\", \"scene-0556\", \"scene-0557\", \"scene-0558\", \"scene-0566\", \n    \"scene-0568\", \"scene-0570\", \"scene-0571\", \"scene-0572\", \"scene-0573\", \n    \"scene-0574\", \"scene-0575\", \"scene-0576\", \"scene-0577\", \"scene-0578\", \n    \"scene-0580\", \"scene-0582\", \"scene-0583\", \"scene-0642\", \"scene-0643\", \n    \"scene-0644\", \"scene-0645\", \"scene-0646\", \"scene-0647\", \"scene-0648\", \n    \"scene-0649\", \"scene-0650\", \"scene-0651\", \"scene-0739\", \"scene-0740\", \n    \"scene-0741\", \"scene-0744\", \"scene-0746\", \"scene-0747\", \"scene-0749\", \n    \"scene-0750\", \"scene-0751\", \"scene-0752\", \"scene-0757\", \"scene-0758\", \n    \"scene-0759\", \"scene-0760\", \"scene-0761\", \"scene-0762\", \"scene-0763\", \n    \"scene-0764\", \"scene-0765\", \"scene-0767\", \"scene-0768\", \"scene-0769\", \n    \"scene-0770\", \"scene-0771\", \"scene-0775\", \"scene-0777\", \"scene-0778\", \n    \"scene-0794\", \"scene-0795\", \"scene-0796\", \"scene-0797\", \"scene-0798\", \n    \"scene-0799\", \"scene-0800\", \"scene-0803\", \"scene-0804\", \"scene-0911\", \n    \"scene-0912\", \"scene-0913\", \"scene-0914\", \"scene-0915\", \"scene-0919\", \n    \"scene-0920\", \"scene-0924\", \"scene-0962\", \"scene-0963\", \"scene-1002\", \n    \"scene-1003\", \"scene-1016\", \"scene-1017\", \"scene-1018\", \"scene-1055\", \n    \"scene-1056\", \"scene-1057\", \"scene-1058\", \"scene-1059\", \"scene-1060\", \n    \"scene-1061\", \"scene-1062\", \"scene-1063\"]\n\n\nCALIBRATION_SCENES = [\n    \"scene-0852\", \"scene-0429\", \"scene-0956\", \"scene-0194\", \"scene-0811\", \n    \"scene-1110\", \"scene-1107\", \"scene-0294\", \"scene-0900\", \"scene-0596\", \n    \"scene-0296\", \"scene-0885\", \"scene-0866\", \"scene-0105\", \"scene-0782\", \n    \"scene-0191\", \"scene-0876\", \"scene-0133\", \"scene-0231\", \"scene-0847\", \n    \"scene-0363\", \"scene-0026\", \"scene-0791\", \"scene-0909\", \"scene-0002\", \n    \"scene-0283\", \"scene-0007\", \"scene-0251\", \"scene-1100\", \"scene-0668\", \n    \"scene-0584\", \"scene-0287\", \"scene-0260\", \"scene-0171\", \"scene-0789\", \n    \"scene-0108\", \"scene-0190\", \"scene-0206\", \"scene-0635\", \"scene-0815\", \n    \"scene-0058\", \"scene-0710\", \"scene-0302\", \"scene-0639\", \"scene-0166\", \n    \"scene-0094\", \"scene-0735\", \"scene-0321\", \"scene-1091\", \"scene-0344\"\n]"
  },
  {
    "path": "tools/data_converter/nuscenes_converter.py",
    "content": "import mmcv\nimport numpy as np\nfrom os import path as osp\nfrom pyquaternion import Quaternion\nimport argparse\nfrom nusc_split import TRAIN_SCENES, VAL_SCENES\n\nnus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',\n                  'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',\n                  'barrier')\n\nnus_attributes = ('cycle.with_rider', 'cycle.without_rider',\n                  'pedestrian.moving', 'pedestrian.standing',\n                  'pedestrian.sitting_lying_down', 'vehicle.moving',\n                  'vehicle.parked', 'vehicle.stopped', 'None')\n\nFAIL_SCENES = ['scene-0499', 'scene-0502', 'scene-0515', 'scene-0517']\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Data converter arg parser')\n    parser.add_argument(\n        '--data-root',\n        type=str,\n        help='specify the root path of dataset')\n    parser.add_argument(\n        '--newsplit',\n        action='store_true')\n    parser.add_argument(\n        '-v','--version',\n        choices=['v1.0-mini', 'v1.0-trainval', 'v1.0-test'],\n        default='v1.0-trainval')\n    \n    args = parser.parse_args()\n    return args\n\ndef create_nuscenes_infos_map(root_path,\n                            dest_path=None,\n                            info_prefix='nuscenes',\n                            version='v1.0-trainval',\n                            new_split=False):\n    \"\"\"Create info file for map learning task on nuscene dataset.\n\n    Given the raw data, generate its related info file in pkl format.\n\n    Args:\n        root_path (str): Path of the data root.\n        info_prefix (str): Prefix of the info file to be generated.\n        version (str): Version of the data.\n            Default: 'v1.0-trainval'\n    \"\"\"\n    from nuscenes.nuscenes import NuScenes\n    nusc = NuScenes(version=version, dataroot=root_path, verbose=True)\n    from nuscenes.utils import splits\n    assert version in ['v1.0-trainval', 'v1.0-test', 'v1.0-mini']\n    if version == 'v1.0-trainval':\n        train_scenes = splits.train\n        val_scenes = splits.val\n    elif version == 'v1.0-test':\n        train_scenes = splits.test\n        val_scenes = []\n    else:\n        train_scenes = splits.mini_train\n        val_scenes = splits.mini_val\n    \n    if new_split:\n        train_scenes = TRAIN_SCENES\n        val_scenes = VAL_SCENES\n\n    test = 'test' in version\n    if test:\n        print('test scene: {}'.format(len(train_scenes)))\n    else:\n        print('train scene: {}, val scene: {}'.format(\n            len(train_scenes), len(val_scenes)))\n    \n    train_samples, val_samples, test_samples = [], [], []\n    \n    train_sample_idx = 0\n    val_sample_idx = 0\n    for sample in mmcv.track_iter_progress(nusc.sample):\n        lidar_token = sample['data']['LIDAR_TOP']\n        sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])\n        cs_record = nusc.get('calibrated_sensor',\n                             sd_rec['calibrated_sensor_token'])\n        pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])\n        lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)\n\n        #mmcv.check_file_exist(lidar_path)\n\n        scene_record = nusc.get('scene', sample['scene_token'])\n        log_record = nusc.get('log', scene_record['log_token'])\n        location = log_record['location']\n        scene_name = scene_record['name']\n        if scene_name in FAIL_SCENES: continue\n        info = {\n            'lidar_path': lidar_path,\n            'token': sample['token'],\n            'cams': {},\n            \n            'lidar2ego_translation': cs_record['translation'],\n            'lidar2ego_rotation': cs_record['rotation'],\n            'e2g_translation': pose_record['translation'],\n            'e2g_rotation': pose_record['rotation'],\n            'timestamp': sample['timestamp'],\n            'location': location,\n            'scene_name': scene_name\n        }\n\n        # obtain 6 image's information per frame\n        camera_types = [\n            'CAM_FRONT',\n            'CAM_FRONT_RIGHT',\n            'CAM_FRONT_LEFT',\n            'CAM_BACK',\n            'CAM_BACK_LEFT',\n            'CAM_BACK_RIGHT',\n        ]\n        for cam in camera_types:\n            cam_token = sample['data'][cam]\n            sd_rec = nusc.get('sample_data', cam_token)\n            cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])\n\n            cam2ego_rotation = Quaternion(cs_record['rotation']).rotation_matrix\n            cam2ego_translation = np.array(cs_record['translation'])\n\n            ego2cam_rotation = cam2ego_rotation.T\n            ego2cam_translation = ego2cam_rotation.dot(-cam2ego_translation)\n\n            transform_matrix = np.eye(4) #ego2cam\n            transform_matrix[:3, :3] = ego2cam_rotation\n            transform_matrix[:3, 3] = ego2cam_translation\n\n            cam_info = dict(\n                extrinsics=transform_matrix, # ego2cam\n                intrinsics=cs_record['camera_intrinsic'],\n                img_fpath=str(nusc.get_sample_data_path(sd_rec['token']))\n            )\n            info['cams'][cam] = cam_info\n        \n        if scene_name in train_scenes:\n            info.update({\n                'sample_idx': train_sample_idx,\n                'prev': train_sample_idx - 1,\n                'next': train_sample_idx + 1,\n            })\n            if sample['prev'] == '':\n                info['prev'] = -1\n            if sample['next'] == '':\n                info['next'] = -1\n            train_samples.append(info)\n            train_sample_idx += 1\n        elif scene_name in val_scenes:\n            info.update({\n                'sample_idx': val_sample_idx,\n                'prev': val_sample_idx - 1,\n                'next': val_sample_idx + 1,\n            })\n            if sample['prev'] == '':\n                info['prev'] = -1\n            if sample['next'] == '':\n                info['next'] = -1\n            val_sample_idx += 1\n            val_samples.append(info)\n        else:\n            test_samples.append(info)\n    \n    if dest_path is None:\n        dest_path = root_path\n    \n    if test:\n        info_path = osp.join(dest_path, f'{info_prefix}_map_infos_test.pkl')\n        print(f'saving test set to {info_path}')\n        mmcv.dump(test_samples, info_path)\n\n    else:\n        # for training set\n        if new_split:\n            info_path = osp.join(dest_path, f'{info_prefix}_map_infos_train_newsplit.pkl')\n        else:\n            info_path = osp.join(dest_path, f'{info_prefix}_map_infos_train.pkl')\n        print(f'saving training set to {info_path}')\n        mmcv.dump(train_samples, info_path)\n\n        # for val set\n        if new_split:\n            info_path = osp.join(dest_path, f'{info_prefix}_map_infos_val_newsplit.pkl')\n        else:\n            info_path = osp.join(dest_path, f'{info_prefix}_map_infos_val.pkl')\n        print(f'saving validation set to {info_path}')\n        mmcv.dump(val_samples, info_path)\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    create_nuscenes_infos_map(root_path=args.data_root, version=args.version, new_split=args.newsplit)"
  },
  {
    "path": "tools/dist_test.sh",
    "content": "#!/usr/bin/env bash\n\nCONFIG=$1\nCHECKPOINT=$2\nGPUS=$3\nPORT=${PORT:-29500}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \\\n    $(dirname \"$0\")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}\n"
  },
  {
    "path": "tools/dist_train.sh",
    "content": "#!/usr/bin/env bash\n\nCONFIG=$1\nGPUS=$2\nPORT=${PORT:-29500}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \\\n    $(dirname \"$0\")/train.py $CONFIG --launcher pytorch ${@:3}\n"
  },
  {
    "path": "tools/mmdet_test.py",
    "content": "import os.path as osp\nimport pickle\nimport shutil\nimport tempfile\nimport time\n\nimport mmcv\nimport torch\nimport torch.distributed as dist\nfrom mmcv.image import tensor2imgs\nfrom mmcv.runner import get_dist_info\n\nfrom mmdet.core import encode_mask_results\n\n\ndef single_gpu_test(model,\n                    data_loader,\n                    show=False,\n                    out_dir=None,\n                    show_score_thr=0.3):\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    prog_bar = mmcv.ProgressBar(len(dataset))\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n\n        batch_size = len(result)\n        if show or out_dir:\n            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):\n                img_tensor = data['img'][0]\n            else:\n                img_tensor = data['img'][0].data[0]\n            img_metas = data['img_metas'][0].data[0]\n            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])\n            assert len(imgs) == len(img_metas)\n\n            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):\n                h, w, _ = img_meta['img_shape']\n                img_show = img[:h, :w, :]\n\n                ori_h, ori_w = img_meta['ori_shape'][:-1]\n                img_show = mmcv.imresize(img_show, (ori_w, ori_h))\n\n                if out_dir:\n                    out_file = osp.join(out_dir, img_meta['ori_filename'])\n                else:\n                    out_file = None\n\n                model.module.show_result(\n                    img_show,\n                    result[i],\n                    show=show,\n                    out_file=out_file,\n                    score_thr=show_score_thr)\n\n        # encode mask results\n        if isinstance(result[0], tuple):\n            result = [(bbox_results, encode_mask_results(mask_results))\n                      for bbox_results, mask_results in result]\n        results.extend(result)\n\n        for _ in range(batch_size):\n            prog_bar.update()\n    return results\n\n\ndef multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):\n    \"\"\"Test model with multiple gpus.\n\n    This method tests model with multiple gpus and collects the results\n    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'\n    it encodes results to gpu tensors and use gpu communication for results\n    collection. On cpu mode it saves the results on different gpus to 'tmpdir'\n    and collects them by the rank 0 worker.\n\n    Args:\n        model (nn.Module): Model to be tested.\n        data_loader (nn.Dataloader): Pytorch data loader.\n        tmpdir (str): Path of directory to save the temporary results from\n            different gpus under cpu mode.\n        gpu_collect (bool): Option to use either gpu or cpu to collect results.\n\n    Returns:\n        list: The prediction results.\n    \"\"\"\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    rank, world_size = get_dist_info()\n    if rank == 0:\n        prog_bar = mmcv.ProgressBar(len(dataset))\n    time.sleep(2)  # This line can prevent deadlock problem in some cases.\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n            # encode mask results\n            # if isinstance(result[0], tuple):\n            #     result = [(bbox_results, encode_mask_results(mask_results))\n            #               for bbox_results, mask_results in result]\n        results.extend(result)\n\n        if rank == 0:\n            batch_size = len(result)\n            for _ in range(batch_size * world_size):\n                prog_bar.update()\n\n    # collect results from all ranks\n    if gpu_collect:\n        results = collect_results_gpu(results, len(dataset))\n    else:\n        results = collect_results_cpu(results, len(dataset), tmpdir)\n    return results\n\n\ndef collect_results_cpu(result_part, size, tmpdir=None):\n    rank, world_size = get_dist_info()\n    # create a tmp dir if it is not specified\n    if tmpdir is None:\n        MAX_LEN = 512\n        # 32 is whitespace\n        dir_tensor = torch.full((MAX_LEN, ),\n                                32,\n                                dtype=torch.uint8,\n                                device='cuda')\n        if rank == 0:\n            mmcv.mkdir_or_exist('.dist_test')\n            tmpdir = tempfile.mkdtemp(dir='.dist_test')\n            tmpdir = torch.tensor(\n                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')\n            dir_tensor[:len(tmpdir)] = tmpdir\n        dist.broadcast(dir_tensor, 0)\n        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()\n    else:\n        mmcv.mkdir_or_exist(tmpdir)\n    # dump the part result to the dir\n    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))\n    dist.barrier()\n    # collect all parts\n    if rank != 0:\n        return None\n    else:\n        # load results of all parts from tmp dir\n        part_list = []\n        for i in range(world_size):\n            part_file = osp.join(tmpdir, f'part_{i}.pkl')\n            part_list.append(mmcv.load(part_file))\n        # sort the results\n        ordered_results = []\n        for res in zip(*part_list):\n            ordered_results.extend(list(res))\n        # the dataloader may pad some samples\n        ordered_results = ordered_results[:size]\n        # remove tmp dir\n        shutil.rmtree(tmpdir)\n        return ordered_results\n\n\ndef collect_results_gpu(result_part, size):\n    rank, world_size = get_dist_info()\n    # dump result part to tensor with pickle\n    part_tensor = torch.tensor(\n        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')\n    # gather all result part tensor shape\n    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')\n    shape_list = [shape_tensor.clone() for _ in range(world_size)]\n    dist.all_gather(shape_list, shape_tensor)\n    # padding result part tensor to max length\n    shape_max = torch.tensor(shape_list).max()\n    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')\n    part_send[:shape_tensor[0]] = part_tensor\n    part_recv_list = [\n        part_tensor.new_zeros(shape_max) for _ in range(world_size)\n    ]\n    # gather all result part\n    dist.all_gather(part_recv_list, part_send)\n\n    if rank == 0:\n        part_list = []\n        for recv, shape in zip(part_recv_list, shape_list):\n            part_list.append(\n                pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))\n        # sort the results\n        ordered_results = []\n        for res in zip(*part_list):\n            ordered_results.extend(list(res))\n        # the dataloader may pad some samples\n        ordered_results = ordered_results[:size]\n        return ordered_results\n"
  },
  {
    "path": "tools/mmdet_train.py",
    "content": "import random\nimport warnings\n\nimport numpy as np\nimport torch\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\nfrom mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,\n                         Fp16OptimizerHook, OptimizerHook, build_optimizer,\n                         build_runner)\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.core import DistEvalHook, EvalHook\nfrom mmdet.datasets import (build_dataloader, build_dataset,\n                            replace_ImageToTensor)\nfrom mmdet.utils import get_root_logger\n\n\ndef set_random_seed(seed, deterministic=False):\n    \"\"\"Set random seed.\n\n    Args:\n        seed (int): Seed to be used.\n        deterministic (bool): Whether to set the deterministic option for\n            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`\n            to True and `torch.backends.cudnn.benchmark` to False.\n            Default: False.\n    \"\"\"\n    random.seed(seed)\n    np.random.seed(seed)\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n    if deterministic:\n        torch.backends.cudnn.deterministic = True\n        torch.backends.cudnn.benchmark = False\n\n\ndef train_detector(model,\n                   dataset,\n                   cfg,\n                   distributed=False,\n                   validate=False,\n                   timestamp=None,\n                   meta=None):\n    logger = get_root_logger(cfg.log_level)\n\n    # prepare data loaders\n    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]\n    if 'imgs_per_gpu' in cfg.data:\n        logger.warning('\"imgs_per_gpu\" is deprecated in MMDet V2.0. '\n                       'Please use \"samples_per_gpu\" instead')\n        if 'samples_per_gpu' in cfg.data:\n            logger.warning(\n                f'Got \"imgs_per_gpu\"={cfg.data.imgs_per_gpu} and '\n                f'\"samples_per_gpu\"={cfg.data.samples_per_gpu}, \"imgs_per_gpu\"'\n                f'={cfg.data.imgs_per_gpu} is used in this experiments')\n        else:\n            logger.warning(\n                'Automatically set \"samples_per_gpu\"=\"imgs_per_gpu\"='\n                f'{cfg.data.imgs_per_gpu} in this experiments')\n        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu\n\n    data_loaders = [\n        build_dataloader(\n            ds,\n            cfg.data.samples_per_gpu,\n            cfg.data.workers_per_gpu,\n            # cfg.gpus will be ignored if distributed\n            len(cfg.gpu_ids),\n            dist=distributed,\n            seed=cfg.seed) for ds in dataset\n    ]\n\n    # put model on gpus\n    if distributed:\n        find_unused_parameters = cfg.get('find_unused_parameters', False)\n        # Sets the `find_unused_parameters` parameter in\n        # torch.nn.parallel.DistributedDataParallel\n        model = MMDistributedDataParallel(\n            model.cuda(),\n            device_ids=[torch.cuda.current_device()],\n            broadcast_buffers=False,\n            find_unused_parameters=find_unused_parameters)\n    else:\n        model = MMDataParallel(\n            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)\n\n    # build runner\n    optimizer = build_optimizer(model, cfg.optimizer)\n\n    if 'runner' not in cfg:\n        cfg.runner = {\n            'type': 'EpochBasedRunner',\n            'max_epochs': cfg.total_epochs\n        }\n        warnings.warn(\n            'config is now expected to have a `runner` section, '\n            'please set `runner` in your config.', UserWarning)\n    else:\n        if 'total_epochs' in cfg:\n            assert cfg.total_epochs == cfg.runner.max_epochs\n\n    runner = build_runner(\n        cfg.runner,\n        default_args=dict(\n            model=model,\n            optimizer=optimizer,\n            work_dir=cfg.work_dir,\n            logger=logger,\n            meta=meta))\n\n    # an ugly workaround to make .log and .log.json filenames the same\n    runner.timestamp = timestamp\n\n    # fp16 setting\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        optimizer_config = Fp16OptimizerHook(\n            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)\n    elif distributed and 'type' not in cfg.optimizer_config:\n        optimizer_config = OptimizerHook(**cfg.optimizer_config)\n    else:\n        optimizer_config = cfg.optimizer_config\n\n    # register hooks\n    runner.register_training_hooks(cfg.lr_config, optimizer_config,\n                                   cfg.checkpoint_config, cfg.log_config,\n                                   cfg.get('momentum_config', None))\n    if distributed:\n        if isinstance(runner, EpochBasedRunner):\n            runner.register_hook(DistSamplerSeedHook())\n\n    # register eval hooks\n    if validate:\n        # Support batch_size > 1 in validation\n        val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)\n        if val_samples_per_gpu > 1:\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.val.pipeline = replace_ImageToTensor(\n                cfg.data.val.pipeline)\n        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))\n        val_dataloader = build_dataloader(\n            val_dataset,\n            samples_per_gpu=val_samples_per_gpu,\n            workers_per_gpu=cfg.data.workers_per_gpu,\n            dist=distributed,\n            shuffle=False)\n        eval_cfg = cfg.get('evaluation', {})\n        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'\n        eval_hook = DistEvalHook if distributed else EvalHook\n        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))\n\n    # user-defined hooks\n    if cfg.get('custom_hooks', None):\n        custom_hooks = cfg.custom_hooks\n        assert isinstance(custom_hooks, list), \\\n            f'custom_hooks expect list type, but got {type(custom_hooks)}'\n        for hook_cfg in cfg.custom_hooks:\n            assert isinstance(hook_cfg, dict), \\\n                'Each item in custom_hooks expects dict type, but got ' \\\n                f'{type(hook_cfg)}'\n            hook_cfg = hook_cfg.copy()\n            priority = hook_cfg.pop('priority', 'NORMAL')\n            hook = build_from_cfg(hook_cfg, HOOKS)\n            runner.register_hook(hook, priority=priority)\n\n    if cfg.resume_from:\n        runner.resume(cfg.resume_from)\n    elif cfg.load_from:\n        runner.load_checkpoint(cfg.load_from)\n    runner.run(data_loaders, cfg.workflow)\n"
  },
  {
    "path": "tools/slurm_test.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nCHECKPOINT=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nPY_ARGS=${@:5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "tools/slurm_train.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nWORK_DIR=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\nPY_ARGS=${@:5}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "tools/test.py",
    "content": "import argparse\nimport mmcv\nimport os\nimport os.path as osp\nimport torch\nimport warnings\nfrom mmcv import Config, DictAction\nfrom mmcv.cnn import fuse_conv_bn\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\nfrom mmcv.runner import (get_dist_info, init_dist, load_checkpoint,\n                         wrap_fp16_model)\n\nfrom mmdet3d.apis import single_gpu_test\nfrom mmdet3d.datasets import build_dataset\nfrom mmdet3d.models import build_model\n# from mmdet_test import multi_gpu_test\nfrom mmdet_train import set_random_seed\nfrom mmdet.datasets import replace_ImageToTensor\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet test (and eval) a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', type=str, help='checkpoint file')\n    parser.add_argument('--work-dir', help='the dir to save logs and models')\n    parser.add_argument('--result-path', \n        help='submission file in pickle format to be evaluated')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    parser.add_argument(\n        '--format-only',\n        action='store_true',\n        help='Format the output results without perform evaluation. It is'\n        'useful when you want to format the result to a specific format and '\n        'submit it to the test server')\n    parser.add_argument(\n        '--eval',\n        action='store_true',\n        help='whether to run evaluation.')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument(\n        '--show-dir', help='directory where results will be saved')\n    parser.add_argument(\n        '--gpu-collect',\n        action='store_true',\n        help='whether to use gpu to collect results.')\n    parser.add_argument(\n        '--tmpdir',\n        help='tmp directory used for collecting results from multiple '\n        'workers, available when gpu-collect is not specified')\n    parser.add_argument('--seed', type=int, default=0, help='random seed')\n    parser.add_argument(\n        '--deterministic',\n        action='store_true',\n        help='whether to set deterministic options for CUDNN backend.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function (deprecate), '\n        'change to --eval-options instead.')\n    parser.add_argument(\n        '--eval-options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n\n    if args.options and args.eval_options:\n        raise ValueError(\n            '--options and --eval-options cannot be both specified, '\n            '--options is deprecated in favor of --eval-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --eval-options')\n        args.eval_options = args.options\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    assert args.eval or args.format_only or args.show \\\n        or args.show_dir, \\\n        ('Please specify at least one operation (save/eval/format/show the '\n         'results / save the results) with the argument \"--out\", \"--eval\"'\n         ', \"--format-only\", \"--show\" or \"--show-dir\"')\n\n    if args.eval and args.format_only:\n        raise ValueError('--eval and --format_only cannot be both specified')\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    # import modules from string list.\n    if cfg.get('custom_imports', None):\n        from mmcv.utils import import_modules_from_strings\n        import_modules_from_strings(**cfg['custom_imports'])\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n\n    # import modules from plguin/xx, registry will be updated\n    import sys\n    sys.path.append(os.path.abspath('.'))  \n    if hasattr(cfg, 'plugin'):\n        if cfg.plugin:\n            import importlib\n            if hasattr(cfg, 'plugin_dir'):\n                def import_path(plugin_dir):\n                    _module_dir = os.path.dirname(plugin_dir)\n                    _module_dir = _module_dir.split('/')\n                    _module_path = _module_dir[0]\n\n                    for m in _module_dir[1:]:\n                        _module_path = _module_path + '.' + m\n                    print(_module_path)\n                    plg_lib = importlib.import_module(_module_path)\n\n                plugin_dirs = cfg.plugin_dir\n                if not isinstance(plugin_dirs,list):\n                    plugin_dirs = [plugin_dirs,]\n                for plugin_dir in plugin_dirs:\n                    import_path(plugin_dir)\n                \n            else:\n                # import dir is the dirpath for the config file\n                _module_dir = os.path.dirname(args.config)\n                _module_dir = _module_dir.split('/')\n                _module_path = _module_dir[0]\n                for m in _module_dir[1:]:\n                    _module_path = _module_path + '.' + m\n                print(_module_path)\n                plg_lib = importlib.import_module(_module_path)\n\n    cfg.model.pretrained = None\n    # in case the test dataset is concatenated\n    samples_per_gpu = 1\n    if isinstance(cfg.data.test, dict):\n        cfg.data.test.test_mode = True\n        samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)\n        if samples_per_gpu > 1:\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.test.pipeline = replace_ImageToTensor(\n                cfg.data.test.pipeline)\n    elif isinstance(cfg.data.test, list):\n        for ds_cfg in cfg.data.test:\n            ds_cfg.test_mode = True\n        samples_per_gpu = max(\n            [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])\n        if samples_per_gpu > 1:\n            for ds_cfg in cfg.data.test:\n                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n\n    # set random seeds\n    if args.seed is not None:\n        set_random_seed(args.seed, deterministic=args.deterministic)\n\n    # build the dataloader\n    if args.work_dir is not None:\n        # update configs according to CLI args if args.work_dir is not None\n        cfg.work_dir = args.work_dir\n    elif cfg.get('work_dir', None) is None:\n        # use config filename as default work_dir if cfg.work_dir is None\n        cfg.work_dir = osp.join('./work_dirs',\n                                osp.splitext(osp.basename(args.config))[0]) \n\n    cfg.data.test.work_dir = cfg.work_dir\n    print('work_dir: ',cfg.work_dir)\n    dataset = build_dataset(cfg.data.test)\n    \n    if args.result_path:\n        outputs = args.result_path\n        dataset._evaluate(args.result_path)\n        return\n    \n    from plugin.datasets.builder import build_dataloader\n\n    data_loader = build_dataloader(\n            dataset,\n            samples_per_gpu=1,\n            workers_per_gpu=cfg.data.workers_per_gpu,\n            dist=distributed,\n            shuffle=False,\n            shuffler_sampler=cfg.data.shuffler_sampler,  # dict(type='DistributedGroupSampler'),\n            nonshuffler_sampler=cfg.data.nonshuffler_sampler,  # dict(type='DistributedSampler'),\n        )\n    \n    from plugin.core.apis.test import custom_multi_gpu_test as multi_gpu_test\n    # build the model and load checkpoint\n    cfg.model.train_cfg = None\n    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')\n    # embed()\n\n    if args.fuse_conv_bn:\n        model = fuse_conv_bn(model)\n    if not distributed:\n        model = MMDataParallel(model, device_ids=[0])\n        outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)\n    else:\n        model = MMDistributedDataParallel(\n            model.cuda(),\n            device_ids=[torch.cuda.current_device()],\n            broadcast_buffers=False)\n        outputs = multi_gpu_test(model, data_loader, args.tmpdir,\n                                args.gpu_collect)\n\n    rank, _ = get_dist_info()\n    if rank == 0:\n        kwargs = {} if args.eval_options is None else args.eval_options\n        if args.format_only:\n            dataset.format_results(outputs, **kwargs)\n        if args.eval:\n            eval_kwargs = cfg.get('evaluation', {}).copy()\n            if args.eval_options is not None:\n                eval_kwargs.update(args.eval_options)\n            # hard-code way to remove EvalHook args\n            for key in [\n                    'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',\n                    'rule'\n            ]:\n                eval_kwargs.pop(key, None)\n            print('start evaluation!')\n            print(dataset.evaluate(outputs, **eval_kwargs))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/tracking/calculate_cmap.py",
    "content": "import argparse\nfrom mmcv import Config\nfrom mmdet3d.datasets import build_dataset\nimport cv2\nimport torch\nimport numpy as np\nimport pickle\nimport time\n\nfrom cmap_utils.utils import *\nfrom cmap_utils.match_utils import *\nfrom cmap_utils.data_utils import *\n\nfont                   = cv2.FONT_HERSHEY_SIMPLEX\nlocation               = (200,60)\nfontScale              = 2\nfontColor              = (255,0,0)\nthickness              = 2\nlineType               = 2\n\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\n\nid2cat = {\n    0:'ped_crossing',\n    1:'divider',\n    2:'boundary',\n}\n\nCOLOR_MAPS_BGR = {\n    # bgr colors\n    'divider': (0, 0, 255),\n    'boundary': (0, 255, 0),\n    'ped_crossing': (255, 0, 0),\n    'centerline': (51, 183, 255),\n    'drivable_area': (171, 255, 255)\n}\n\nCOLOR_MAPS_PLT = {\n    'divider': 'r',\n    'boundary': 'g',\n    'ped_crossing': 'b',\n    'centerline': 'orange',\n    'drivable_area': 'y',\n}\n\nINTERP_NUM = 200\nN_WORKERS = 0\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Visualize groundtruth and results')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument('--thr', \n        type=float,\n        default=0.4,\n        help='score threshold to filter predictions')\n    parser.add_argument(\n        '--result_path',\n        default=None,\n        help='directory to submission file')\n    parser.add_argument(\n        '--consist',\n        default=1,\n        type=int,\n        help='whether to use the consistent criterion'\n    )\n    parser.add_argument(\n        '--cons_frames',\n        default=5,\n        help='consective frames for cons metric'\n    )\n    args = parser.parse_args()\n    return args\n\ndef instance_match(pred_lines, scores, gt_lines, threshold, metric='chamfer'):\n    ### obtain tp,fp,score for a frame based on chamfer distance\n\n    num_preds = pred_lines.shape[0]\n    num_gts = gt_lines.shape[0]\n\n    # tp and fp\n    tp = np.zeros((num_preds), dtype=np.float32)\n    fp = np.zeros((num_preds), dtype=np.float32)\n\n    if num_gts == 0:\n        fp[...] = 1\n        return (tp.copy(),fp.copy())\n    \n    if num_preds == 0:\n        return (tp.copy(),fp.copy())\n\n    assert pred_lines.shape[1] == gt_lines.shape[1], \\\n        \"sample points num should be the same\"\n\n    matrix = np.zeros((num_preds, num_gts))\n    matrix = chamfer_distance_batch(pred_lines, gt_lines)\n    matrix_min = matrix.min(axis=1)\n    matrix_argmin = matrix.argmin(axis=1)\n    sort_inds = np.argsort(-scores)\n    tp = np.zeros((num_preds), dtype=np.float32)\n    fp = np.zeros((num_preds), dtype=np.float32)\n    gt_covered = np.zeros(num_gts, dtype=bool)\n    for i in sort_inds:\n        if matrix_min[i] <= threshold:\n            matched_gt = matrix_argmin[i]\n            if not gt_covered[matched_gt]:\n                gt_covered[matched_gt] = True\n                tp[i] = 1\n            else:\n                fp[i] = 1\n        else:\n            fp[i] = 1\n    return (tp.copy(),fp.copy())\n\ndef _evaluate_single(pred_vectors, scores, gt_vectors, threshold, metric='chamfer'):\n    ### collect tp-fp-score information\n\n    pred_lines = np.array(pred_vectors)\n    gt_lines = np.array(gt_vectors)\n    \n    if len(pred_lines) == 0 or len(gt_lines)==0:\n        tp_fp_score = np.zeros((0,3))\n        return tp_fp_score\n    scores = np.array(scores)\n    tp_fp_list = instance_match(pred_lines, scores, gt_lines, threshold, metric) # (M, 2)\n\n    tp, fp = tp_fp_list\n    tp_fp_score = np.hstack([tp[:, None], fp[:, None], scores[:, None]])\n    return tp_fp_score\n\ndef match_gt_w_pred(curr_data,curr_data_gt,thresh):\n    ### find local id matching between predicted vector and gt vectors\n\n    curr_vectors_np = {label: [] for label in cat2id.values()}\n    curr_scores_np = {label: [] for label in cat2id.values()}\n    for i in range(len(curr_data['labels'])):\n        score = curr_data['scores'][i]\n        label = curr_data['labels'][i]\n        v = curr_data['vectors'][i]\n        curr_vectors_np[label].append(v)\n        curr_scores_np[label].append(score)\n    curr_vectors = {}\n    for label, vecs in curr_vectors_np.items():\n        if len(vecs) > 0:\n            vecs = np.stack(vecs, 0)\n            vecs = torch.tensor(vecs)\n            curr_vectors[label] = vecs\n        else:\n            curr_vectors[label] = vecs\n    curr_vectors_gt_np = curr_data_gt\n    curr_vectors_gt = {}\n    for label, vecs in curr_vectors_gt_np.items():\n        if len(vecs) > 0:\n            vecs_np = []\n            for vec in vecs:\n                vecs_np.append(vec)\n            vecs = np.stack(vecs_np, 0)\n            vecs = torch.tensor(vecs)\n            curr_vectors_gt[label] = vecs\n        else:\n            curr_vectors_gt[label] = vecs\n    pred2gt_matchings = find_matchings_chamfer(curr_vectors,curr_vectors_gt,curr_scores_np,thresh=thresh)\n\n    return pred2gt_matchings\n\ndef get_scene_matching_result(gts,pred_results,scene_name2token,scene_name,thresh=1.5):\n    ### obtain local id matching of a scene \n\n    start_token = scene_name2token[scene_name][0]\n    vectors_seq = []\n    scores_seq = []\n    pred_matching_seq = []\n    vectors_gt_seq = []\n    pred2gt_matchings_seq = []\n\n    choose_scene = pred_results[start_token]['scene_name']\n    for local_idx,token in enumerate(scene_name2token[scene_name]):\n        prev_data = pred_results[token]\n        gt_vectors = gts[token]\n\n        assert prev_data['scene_name']  == choose_scene\n        assert prev_data['local_idx'] == local_idx\n\n        vectors_gt_seq.append(gt_vectors)\n\n        vectors = {label: [] for label in cat2id.values()}\n        scores = {label: [] for label in cat2id.values()}\n        pred_matching = {label: [] for label in cat2id.values()}\n        for i in range(len(prev_data['labels'])):\n            score, label, v,pred_glb_id = \\\n                prev_data['scores'][i], prev_data['labels'][i], prev_data['vectors'][i], prev_data['global_ids'][i]\n            vectors[label].append(v)\n            scores[label].append(score)\n            pred_matching[label].append(pred_glb_id)\n        pred_matching_seq.append(pred_matching)\n        vectors_seq.append(vectors)\n        scores_seq.append(scores)\n        pred2gt_matchings = match_gt_w_pred(prev_data,gt_vectors, thresh)\n        pred2gt_matchings_seq.append(pred2gt_matchings)\n\n    return vectors_seq, pred_matching_seq, pred2gt_matchings_seq\n\ndef pred2gt_global_matching(ids_info,ids_info_gt,pred2gt_seq):\n    ### obtain global id matching between predicted vectors and gt vectors of a scene\n\n    pred2gt_global_seq = []\n    for frame_idx in range(len(pred2gt_seq)):\n        f_match = pred2gt_seq[frame_idx]\n        f_ids_info = ids_info[frame_idx]\n        f_ids_info_gt = ids_info_gt[frame_idx]\n        pred2gt_match_dict = {}\n        for label in f_ids_info.keys():\n            pred2gt_match_dict[label] = {}\n            f_label_match = f_match[label][0]\n            f_ids_label_info,f_ids_label_info_gt = f_ids_info[label],f_ids_info_gt[label]\n            for pred_match_idx, gt_match_idx in enumerate(f_label_match):\n                pred_glb_match_idx = f_ids_label_info[pred_match_idx]\n\n                if gt_match_idx != -1:\n                    gt_glb_match_idx = f_ids_label_info_gt[gt_match_idx]\n                else:\n                    gt_glb_match_idx = -1\n                pred2gt_match_dict[label][pred_glb_match_idx] = gt_glb_match_idx\n        pred2gt_global_seq.append(pred2gt_match_dict)\n\n    return pred2gt_global_seq\n\ndef get_tpfp_from_scene_single(scene_name,args,scene_name2token,pred_results,gts,\n        gt_matching,threshold):\n    \n    ### generate tp-fp list in a single scene\n    tpfp_score_record = {0:[],1:[],2:[]}\n    scene_gt_matching = gt_matching[scene_name]['instance_ids']\n\n    if args.consist:\n        vectors_seq, scene_pred_matching,pred2gt_seq \\\n            = get_scene_matching_result(gts,pred_results,scene_name2token,scene_name,threshold)\n        pred2gt_global_seq = pred2gt_global_matching(scene_pred_matching,scene_gt_matching,pred2gt_seq)\n\n    vectors_seq = []\n    scores_seq = []\n    gt_flag_dict = {label:{} for label in cat2id.values()}\n    for frame_idx, token in enumerate(scene_name2token[scene_name]):\n        prev_data = pred_results[token]\n        vectors_gt = gts[token]\n\n        vectors = {label: [] for label in cat2id.values()}\n        scores = {label: [] for label in cat2id.values()}\n        for i in range(len(prev_data['labels'])):\n            score, label, v = prev_data['scores'][i], prev_data['labels'][i], prev_data['vectors'][i]\n            vectors[label].append(v)\n            scores[label].append(score)\n        \n        for label in cat2id.values():\n            tpfp_score = _evaluate_single(vectors[label], scores[label], vectors_gt[label] ,threshold)\n            if args.consist:\n                #### deal with the consistency part\n                for vec_idx,single_tpfp_score in enumerate(tpfp_score):\n                    curr_pred2gt_match = pred2gt_global_seq[frame_idx][label]  ### pred_global_id: gt_global_id\n\n                    pred_local2global_mapping = scene_pred_matching[frame_idx][label]\n                    match_glb_pred_idx = pred_local2global_mapping[vec_idx]    ### \n                    match_glb_gt_idx = curr_pred2gt_match[match_glb_pred_idx]\n\n                    if match_glb_gt_idx not in gt_flag_dict[label].keys():\n                        gt_flag_dict[label][match_glb_gt_idx] = match_glb_pred_idx\n                    else:\n                        if match_glb_pred_idx != gt_flag_dict[label][match_glb_gt_idx]:\n                            tpfp_score[vec_idx][:2] = np.array([0,1])\n            tpfp_score_record[label].append(tpfp_score)\n\n        vectors_seq.append(vectors)\n        scores_seq.append(scores)\n\n    return tpfp_score_record\n\ndef get_mAP(tpfp_score_record,num_gts,threshold):\n\n    ### calculate mean AP given tp-fp-score record\n    result_dict = {}\n    for cat_name,label in cat2id.items():\n        sum_AP = 0\n        result_dict[cat_name] = {}\n        tp_fp_score = [np.vstack(i[label]) for i in tpfp_score_record]\n        tp_fp_score = np.vstack(tp_fp_score)\n\n        sort_inds = np.argsort(-tp_fp_score[:, -1])\n\n        tp = tp_fp_score[sort_inds, 0]\n        fp = tp_fp_score[sort_inds, 1]\n        tp = np.cumsum(tp, axis=0)\n        fp = np.cumsum(fp, axis=0)\n        eps = np.finfo(np.float32).eps\n        recalls = tp / np.maximum(num_gts[label], eps)\n        precisions = tp/np.maximum(tp+fp, eps)\n\n        AP = average_precision(recalls, precisions, 'area')\n        sum_AP += AP\n        result_dict[cat_name].update({f'AP@{threshold}': AP})\n    return result_dict\n\ndef main():\n    args = parse_args()\n    cfg = Config.fromfile(args.config)\n    import_plugin(cfg)\n    dataset = build_dataset(cfg.eval_config)\n\n    dataset[0]\n    scene_name2idx = {}\n    scene_name2token = {}\n    for idx, sample in enumerate(dataset.samples):\n        scene = sample['scene_name']\n        token = sample['token']\n        if scene not in scene_name2idx:\n            scene_name2idx[scene] = []\n            scene_name2token[scene] = []\n        scene_name2idx[scene].append(idx)\n        scene_name2token[scene].append(token)\n    all_scene_names = sorted(list(scene_name2idx.keys()))\n\n    gt_matching_path = cfg.eval_config.ann_file.replace('.pkl','_gt_tracks.pkl',)\n    with open(gt_matching_path,'rb') as pf:\n        gt_matching = pickle.load(pf)\n    \n\n    pred_matching_path = args.result_path\n    with open(pred_matching_path,'rb') as ppf:\n        pred_matching_result_raw = pickle.load(ppf)\n\n    roi_size = torch.tensor(cfg.roi_size).numpy()\n    origin = torch.tensor(cfg.pc_range[:2]).numpy()\n\n    if roi_size[0] == 60:\n        thresholds_list = [0.5,1.0,1.5]\n    elif roi_size[0] == 100:\n        thresholds_list = [1.0, 1.5, 2.0]\n    else:\n        raise ValueError('roi size {} not supported, check again...'.format(roi_size))\n\n    if 'newsplit' in args.result_path:\n        gts = get_gts(dataset,new_split=True)\n    else:\n        gts = get_gts(dataset)\n\n    ### interpolate vector data\n    start_time = time.time()\n    denormed_gts,pred_matching_result,num_gts,num_preds = \\\n        get_data(pred_matching_result_raw,gts,origin,roi_size,INTERP_NUM,result_path=args.result_path,denorm=False)\n    print('Preparing Data Time {}'.format(time.time()-start_time))\n\n    ### obtain mAP for each threshold\n    scene_name_list = []\n    for single_scene_name in all_scene_names:\n        scene_name_list.append( (single_scene_name,args) )\n    result_dict = {thr:{} for thr in thresholds_list}\n    for threshold in thresholds_list:\n        tpfp_score_list =[]\n        for (scene_name,args) in scene_name_list:\n            tpfp_score = get_tpfp_from_scene_single(scene_name,args,scene_name2token,pred_matching_result,\n                        denormed_gts,gt_matching,threshold)\n            tpfp_score_list.append(tpfp_score)\n        result_dict[threshold] = get_mAP(tpfp_score_list,num_gts,threshold)\n        print(result_dict[threshold])\n    \n    cat_mean_AP = np.array([0.,0.,0.])\n    mean_AP = 0\n    for thr in thresholds_list:\n        for cat_name in cat2id.keys():\n            mean_AP += result_dict[thr][cat_name]['AP@{}'.format(thr)]\n            cat_mean_AP[cat2id[cat_name]] += result_dict[thr][cat_name]['AP@{}'.format(thr)]\n\n    cat_map_dict = {cat:cat_mean_AP[idx]/len(thresholds_list) for cat,idx in cat2id.items() }\n    print('Category mean AP',cat_map_dict)\n    print('mean AP ',mean_AP/(len(cat2id)*len(thresholds_list)))\n    print('Overall Time',time.time()-start_time)\n\nif __name__ == '__main__':\n    main()"
  },
  {
    "path": "tools/tracking/cmap_utils/__init__.py",
    "content": ""
  },
  {
    "path": "tools/tracking/cmap_utils/data_utils.py",
    "content": "import mmcv\nimport os\nfrom mmdet3d.datasets import build_dataloader\nimport numpy as np\nfrom copy import deepcopy\nfrom functools import partial\nfrom multiprocessing import Pool\n\nfrom .utils import *\nfrom .match_utils import *\n\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\n\ndef get_gts(dataset,new_split=False,N_WORKERS=16):\n    roi_size = dataset.roi_size\n    if 'av2' in dataset.ann_file:\n        dataset_name = 'av2'\n    else:\n        dataset_name = 'nusc'\n    if new_split:\n        tmp_file = f'./tmp_gts_{dataset_name}_{roi_size[0]}x{roi_size[1]}_newsplit.pkl'\n    else:\n        tmp_file = f'./tmp_gts_{dataset_name}_{roi_size[0]}x{roi_size[1]}.pkl'\n    if os.path.exists(tmp_file):\n        print(f'loading cached gts from {tmp_file}')\n        gts = mmcv.load(tmp_file)\n    else:\n        print('collecting gts...')\n        gts = {}\n        # pdb.set_trace()\n        dataloader = build_dataloader(\n            dataset, samples_per_gpu=1, workers_per_gpu=N_WORKERS, shuffle=False, dist=False)\n        pbar = mmcv.ProgressBar(len(dataloader))\n        for data in dataloader:\n            token = deepcopy(data['img_metas'].data[0][0]['token'])\n            gt = deepcopy(data['vectors'].data[0][0])\n            # pdb.set_trace()\n            gts[token] = gt\n            pbar.update()\n            del data # avoid dataloader memory crash\n    \n    for token, gt in gts.items():\n        for label, vectors in gt.items():\n            label_vecs = []\n            for vec in vectors:\n                label_vecs.append(interp_fixed_num(vec,20))\n            gt[label] = label_vecs\n        gts[token] = gt\n    return gts\n\ndef prepare_data_multi(token,idx,pred,gts,origin,roi_size,interp_num,dataset,denorm=False):\n    num_gts = np.array([0,0,0])\n    num_preds = np.array([0,0,0])\n    denorm_gt = {}\n\n    gt = gts[token]\n    denorm_gt = {label:[] for label in cat2id.values()}\n    scores_by_cls = {label: [] for label in cat2id.values()}\n\n    vector_list = []\n    for i in range(len(pred['labels'])):\n        score = pred['scores'][i]\n        vector = pred['vectors'][i].reshape(-1,2)\n        label = pred['labels'][i]\n        scores_by_cls[label].append(score)\n        if not denorm:\n            vector_list.append(interp_fixed_num(vector,interp_num))\n        else:\n            vector_list.append(interp_fixed_num(vector*roi_size+origin,interp_num))\n\n    for label in cat2id.values():\n        for vec in gt[label]:\n            denorm_gt[label].append(interp_fixed_num(vec,interp_num))\n\n    for label in cat2id.values():\n        num_gts[label] += len(gt[label])\n        num_preds[label] += len(scores_by_cls[label])\n    return token,idx,denorm_gt, vector_list, num_gts,num_preds\n\ndef get_data(pred_matching_result_raw,gts,origin,roi_size,num_interp,result_path,denorm=False):\n    ### collect data, interpolate with multi_processing\n    token_list = []\n    for idx,pred_res in enumerate(pred_matching_result_raw):\n        token = pred_res['meta']['token']\n        token_list.append( (token,idx,pred_matching_result_raw[idx]) )\n    dataset = 'av2' if 'av2' in result_path else 'nusc'\n    fn = partial(prepare_data_multi,gts=gts,origin=origin,roi_size=roi_size,interp_num=num_interp,dataset=dataset,denorm=denorm)\n\n    denormed_gts = {}\n    pred_matching_result = {}\n    num_gts = np.zeros(3)\n    num_preds = np.zeros(3)\n    with Pool(processes=16) as pool:\n        data_infos = pool.starmap(fn,token_list)\n    for data_info in data_infos:\n        token,idx, denorm_gt,pred_vector, num_gts_single,num_preds_single = data_info\n        denormed_gts[token] = denorm_gt\n        pred_matching_result_raw[idx]['vectors'] = pred_vector\n        pred_matching_result[token] = pred_matching_result_raw[idx]\n        num_gts  = num_gts + num_gts_single\n        num_preds = num_preds + num_preds_single\n\n\n    return denormed_gts,pred_matching_result,num_gts,num_preds"
  },
  {
    "path": "tools/tracking/cmap_utils/match_utils.py",
    "content": "import torch\nimport numpy as np\nfrom scipy.optimize import linear_sum_assignment\n\nfrom .utils import *\n\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\n\ndef get_prev2curr_matrix(prev_meta,curr_meta):\n    # get relative pose\n    prev_e2g_trans = torch.tensor(prev_meta['ego2global_translation'], dtype=torch.float64)\n    prev_e2g_rot = torch.tensor(prev_meta['ego2global_rotation'], dtype=torch.float64)\n    curr_e2g_trans = torch.tensor(curr_meta['ego2global_translation'], dtype=torch.float64)\n    curr_e2g_rot = torch.tensor(curr_meta['ego2global_rotation'], dtype=torch.float64)\n    \n    prev_e2g_matrix = torch.eye(4, dtype=torch.float64)\n    prev_e2g_matrix[:3, :3] = prev_e2g_rot\n    prev_e2g_matrix[:3, 3] = prev_e2g_trans\n\n    curr_g2e_matrix = torch.eye(4, dtype=torch.float64)\n    curr_g2e_matrix[:3, :3] = curr_e2g_rot.T\n    curr_g2e_matrix[:3, 3] = -(curr_e2g_rot.T @ curr_e2g_trans)\n\n    prev2curr_matrix = curr_g2e_matrix @ prev_e2g_matrix\n    return prev2curr_matrix\n\n\ndef find_matchings_iou(src_masks, tgt_masks, thresh=0.1):\n    \"\"\"Find the matching of map elements between two temporally \n    connected frame\n\n    Args:\n        src_masks (_type_): instance masks of prev frame\n        tgt_masks (_type_): instance masks of current frame\n        thresh (float, optional): IOU threshold for matching. Defaults to 0.1.\n    \"\"\"\n    def _mask_iou(mask1, mask2):\n        intersection = (mask1 * mask2).sum()\n        if intersection == 0:\n            return 0.0\n        union = np.logical_or(mask1, mask2).sum()\n        return intersection / union\n    \n    matchings = {}\n    for label, src_instances in src_masks.items():\n        tgt_instances = tgt_masks[label]\n        cost = np.zeros([len(src_instances), len(tgt_instances)])\n        for i, src_ins in enumerate(src_instances):\n            for j, tgt_ins in enumerate(tgt_instances):\n                iou = _mask_iou(src_ins, tgt_ins)\n                cost[i, j] = -iou\n        row_ind, col_ind = linear_sum_assignment(cost)\n        \n        label_matching = [-1 for _ in range(len(src_instances))]\n        label_matching_reverse = [-1 for _ in range(len(tgt_instances))]\n\n        for i, j in zip(row_ind, col_ind):\n            if -cost[i, j] > thresh:\n                label_matching[i] = j\n                label_matching_reverse[j] = i\n        \n        matchings[label] = (label_matching, label_matching_reverse)\n    return matchings\n\ndef find_matchings_chamfer(pred_vectors, gt_vectors, score_dict,thresh=0.5):\n    matchings = {}\n    for label, src_instances in pred_vectors.items():\n        tgt_instances = gt_vectors[label]\n        num_gts = len(tgt_instances)\n        num_preds = len(src_instances)\n        label_matching = [-1 for _ in range(len(src_instances))]\n        label_matching_reverse = [-1 for _ in range(len(tgt_instances))]\n        if len(src_instances) == 0 or len(tgt_instances)==0:\n            matchings[label] = (label_matching, label_matching_reverse)\n            continue\n        cdist = chamfer_distance_batch(src_instances, tgt_instances)\n        label_score = np.array(score_dict[label])\n        matrix_min = cdist.min(axis=1)\n\n        # for each det, which gt is the closest to it\n        matrix_argmin = cdist.argmin(axis=1)\n        sort_inds = np.argsort(-label_score)\n        gt_covered = np.zeros(num_gts, dtype=bool)\n\n        tp = np.zeros((num_preds), dtype=np.float32)\n        fp = np.zeros((num_preds), dtype=np.float32)\n        for i in sort_inds:\n            if matrix_min[i] <= thresh:\n                matched_gt = matrix_argmin[i]\n                if not gt_covered[matched_gt]:\n                    gt_covered[matched_gt] = True\n                    label_matching[i] = matched_gt\n                    label_matching_reverse[matched_gt] = i\n        matchings[label] = (label_matching, label_matching_reverse)\n    return matchings\n\ndef get_consecutive_vectors(prev_vectors,curr_vectors,prev2curr_matrix,origin,roi_size):\n    # transform prev vectors\n    prev2curr_vectors = dict()\n    for label, vecs in prev_vectors.items():\n        if len(vecs) > 0:\n            vecs = np.stack(vecs, 0)\n            vecs = torch.tensor(vecs)\n            N, num_points, _ = vecs.shape\n            denormed_vecs = vecs * roi_size + origin # (num_prop, num_pts, 2)\n            denormed_vecs = torch.cat([\n                denormed_vecs,\n                denormed_vecs.new_zeros((N, num_points, 1)), # z-axis\n                denormed_vecs.new_ones((N, num_points, 1)) # 4-th dim\n            ], dim=-1) # (num_prop, num_pts, 4)\n\n            transformed_vecs = torch.einsum('lk,ijk->ijl', prev2curr_matrix, denormed_vecs.double()).float()\n            normed_vecs = (transformed_vecs[..., :2] - origin) / roi_size # (num_prop, num_pts, 2)\n            normed_vecs = torch.clip(normed_vecs, min=0., max=1.)\n            prev2curr_vectors[label] = normed_vecs\n        else:\n            prev2curr_vectors[label] = vecs\n\n    # convert to ego space for visualization\n    for label in prev2curr_vectors:\n        if len(prev2curr_vectors[label]) > 0:\n            prev2curr_vectors[label] = prev2curr_vectors[label] * roi_size + origin\n        if len(curr_vectors[label]) > 0:\n            curr_vecs = torch.tensor(np.stack(curr_vectors[label]))\n            curr_vectors[label] = curr_vecs * roi_size + origin\n        if len(prev_vectors[label]) > 0:\n            prev_vecs = torch.tensor(np.stack(prev_vectors[label]))\n            prev_vectors[label] = prev_vecs * roi_size + origin\n    \n    return prev_vectors, curr_vectors, prev2curr_vectors\n\ndef filter_vectors(data_info, origin,roi_size,thr,num_interp=20):\n    ### filter vectors over threshold\n    filtered_vectors = {label: [] for label in cat2id.values()}\n    for i in range(len(data_info['labels'])):\n        score = data_info['scores'][i]\n        label = data_info['labels'][i]\n        v = data_info['vectors'][i]\n        if score > thr:\n            interp_v = interp_fixed_num(v,num_interp)\n            filtered_vectors[label].append( (np.array(interp_v) - origin)/roi_size )\n    return filtered_vectors\n"
  },
  {
    "path": "tools/tracking/cmap_utils/utils.py",
    "content": "import cv2\nfrom PIL import Image, ImageDraw\nimport os\nimport torch\nimport numpy as np\nfrom shapely.geometry import LineString\n\ndef import_plugin(cfg):\n    '''\n        import modules from plguin/xx, registry will be update\n    '''\n    import sys\n    sys.path.append(os.path.abspath('.'))    \n    if hasattr(cfg, 'plugin'):\n        if cfg.plugin:\n            import importlib\n            \n            def import_path(plugin_dir):\n                _module_dir = os.path.dirname(plugin_dir)\n                _module_dir = _module_dir.split('/')\n                _module_path = _module_dir[0]\n                for m in _module_dir[1:]:\n                    _module_path = _module_path + '.' + m\n                print(_module_path)\n                plg_lib = importlib.import_module(_module_path)\n\n            plugin_dirs = cfg.plugin_dir\n            if not isinstance(plugin_dirs, list):\n                plugin_dirs = [plugin_dirs,]\n            for plugin_dir in plugin_dirs:\n                import_path(plugin_dir)\n\ndef draw_polylines(vecs, roi_size, origin, cfg):\n    results = []\n    for line_coords in vecs:\n        canvas = np.zeros((cfg.canvas_size[1], cfg.canvas_size[0]), dtype=np.uint8)\n        coords = (line_coords - origin) / roi_size * torch.tensor(cfg.canvas_size)\n        coords = coords.numpy()\n        cv2.polylines(canvas, np.int32([coords]), False, color=1, thickness=cfg.thickness)\n        result = np.flipud(canvas)\n        if result.sum() < 20:\n            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))\n            result = cv2.dilate(result, kernel, iterations=1)\n        results.append(result)\n    return results\n\ndef draw_polygons(vecs, roi_size, origin, cfg):\n    results = []\n    for poly_coords in vecs:\n        mask = Image.new(\"L\", size=(cfg.canvas_size[0], cfg.canvas_size[1]), color=0)\n        coords = (poly_coords - origin) / roi_size * torch.tensor(cfg.canvas_size)\n        coords = coords.numpy()\n        vert_list = [(x, y) for x, y in coords]\n        if not (coords[0] == coords[-1]).all():\n            vert_list.append(vert_list[0])\n        ImageDraw.Draw(mask).polygon(vert_list, outline=1, fill=1)\n        result = np.flipud(np.array(mask))\n        if result.sum() < 20:\n            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))\n            result = cv2.dilate(result, kernel, iterations=1)\n        results.append(result)\n    return results\n    \n\ndef draw_instance_masks(vectors, roi_size, origin, cfg):\n    masks = {}\n    canvas = np.zeros((cfg.canvas_size[1], cfg.canvas_size[0]))\n    for label, vecs in vectors.items():\n        if label == 0:\n            masks[label] = draw_polygons(vecs, roi_size, origin, cfg)\n        else:\n            masks[label] = draw_polylines(vecs, roi_size, origin, cfg)\n        for mask in masks[label]:\n            canvas += mask\n    return masks, canvas\n\n\ndef interp_fixed_num(vector, num_pts):\n    line = LineString(vector)\n\n    distances = np.linspace(0, line.length, num_pts)\n    sampled_points = np.array([list(line.interpolate(distance).coords) \n        for distance in distances]).squeeze()\n    \n    return sampled_points\n\ndef chamfer_distance_batch(pred_lines, gt_lines):\n\n    _, num_pts, coord_dims = pred_lines.shape\n    \n    if not isinstance(pred_lines, torch.Tensor):\n        pred_lines = torch.tensor(pred_lines)\n    if not isinstance(gt_lines, torch.Tensor):\n        gt_lines = torch.tensor(gt_lines)\n    dist_mat = torch.cdist(pred_lines.view(-1, coord_dims), \n                    gt_lines.view(-1, coord_dims), p=2) \n    # (num_query*num_points, num_gt*num_points)\n    dist_mat = torch.stack(torch.split(dist_mat, num_pts)) \n    # (num_query, num_points, num_gt*num_points)\n    dist_mat = torch.stack(torch.split(dist_mat, num_pts, dim=-1)) \n    # (num_gt, num_q, num_pts, num_pts)\n\n    dist1 = dist_mat.min(-1)[0].sum(-1)\n    dist2 = dist_mat.min(-2)[0].sum(-1)\n\n    dist_matrix = (dist1 + dist2).transpose(0, 1) / (2 * num_pts)\n    \n    return dist_matrix.numpy()\n\ndef average_precision(recalls, precisions, mode='area'):\n\n    recalls = recalls[np.newaxis, :]\n    precisions = precisions[np.newaxis, :]\n    assert recalls.shape == precisions.shape and recalls.ndim == 2\n    num_scales = recalls.shape[0]\n    ap = 0.\n    if mode == 'area':\n        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)\n        ones = np.ones((num_scales, 1), dtype=recalls.dtype)\n        mrec = np.hstack((zeros, recalls, ones))\n        mpre = np.hstack((zeros, precisions, zeros))\n        for i in range(mpre.shape[1] - 1, 0, -1):\n            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])\n        \n        ind = np.where(mrec[0, 1:] != mrec[0, :-1])[0]\n        ap = np.sum(\n            (mrec[0, ind + 1] - mrec[0, ind]) * mpre[0, ind + 1])\n    \n    elif mode == '11points':\n        for thr in np.arange(0, 1 + 1e-3, 0.1):\n            precs = precisions[0, recalls[i, :] >= thr]\n            prec = precs.max() if precs.size > 0 else 0\n            ap += prec\n        ap /= 11\n    else:\n        raise ValueError(\n            'Unrecognized mode, only \"area\" and \"11points\" are supported')\n\n    return ap\n"
  },
  {
    "path": "tools/tracking/prepare_gt_tracks.py",
    "content": "import argparse\nimport mmcv\nfrom mmcv import Config\nimport os\nfrom mmdet3d.datasets import build_dataset, build_dataloader\nimport cv2\nimport torch\nimport numpy as np\nfrom PIL import Image, ImageDraw\nimport copy\nimport imageio\nfrom scipy.optimize import linear_sum_assignment\nimport pickle\nfrom functools import partial\nfrom multiprocessing import Pool\n\n\nfont                   = cv2.FONT_HERSHEY_SIMPLEX\nlocation               = (200,60)\nfontScale              = 2\nfontColor              = (255,0,0)\nthickness              = 2\nlineType               = 2\n\nN_WORKERS = 16\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Visualize groundtruth and results')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument('--result', \n        default=None,\n        help='prediction result to visualize'\n        'If submission file is not provided, only gt will be visualized')\n    parser.add_argument(\n        '--out-dir', \n        default='demo',\n        help='directory where visualize results will be saved')\n    parser.add_argument(\n        '--visualize', \n        action=\"store_true\",\n        default=False,\n        help='whether visualize the formed gt tracks')\n    args = parser.parse_args()\n\n    return args\n\ndef import_plugin(cfg):\n    '''\n        import modules from plguin/xx, registry will be update\n    '''\n\n    import sys\n    sys.path.append(os.path.abspath('.'))    \n    if hasattr(cfg, 'plugin'):\n        if cfg.plugin:\n            import importlib\n            \n            def import_path(plugin_dir):\n                _module_dir = os.path.dirname(plugin_dir)\n                _module_dir = _module_dir.split('/')\n                _module_path = _module_dir[0]\n\n                for m in _module_dir[1:]:\n                    _module_path = _module_path + '.' + m\n                print(_module_path)\n                plg_lib = importlib.import_module(_module_path)\n\n            plugin_dirs = cfg.plugin_dir\n            if not isinstance(plugin_dirs, list):\n                plugin_dirs = [plugin_dirs,]\n            for plugin_dir in plugin_dirs:\n                import_path(plugin_dir)\n                \n\ndef draw_polylines(vecs, roi_size, origin, cfg):\n    results = []\n    for line_coords in vecs:\n        canvas = np.zeros((cfg.canvas_size[1], cfg.canvas_size[0]), dtype=np.uint8)\n        coords = (line_coords - origin) / roi_size * torch.tensor(cfg.canvas_size)\n        coords = coords.numpy()\n        cv2.polylines(canvas, np.int32([coords]), False, color=1, thickness=cfg.thickness)\n        result = np.flipud(canvas)\n        if result.sum() < 20:\n            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))\n            result = cv2.dilate(result, kernel, iterations=1)\n        results.append(result)\n    return results\n        \n\ndef draw_polygons(vecs, roi_size, origin, cfg):\n    results = []\n    for poly_coords in vecs:\n        mask = Image.new(\"L\", size=(cfg.canvas_size[0], cfg.canvas_size[1]), color=0)\n        coords = (poly_coords - origin) / roi_size * torch.tensor(cfg.canvas_size)\n        coords = coords.numpy()\n        vert_list = [(x, y) for x, y in coords]\n        if not (coords[0] == coords[-1]).all():\n            vert_list.append(vert_list[0])\n        ImageDraw.Draw(mask).polygon(vert_list, outline=1, fill=1)\n        result = np.flipud(np.array(mask))\n        if result.sum() < 20:\n            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))\n            result = cv2.dilate(result, kernel, iterations=1)\n        results.append(result)\n    return results\n        \n\ndef draw_instance_masks(vectors, roi_size, origin, cfg):\n    masks = {}\n    for label, vecs in vectors.items():\n        if label == 0:\n            masks[label] = draw_polygons(vecs, roi_size, origin, cfg)\n        else:\n            masks[label] = draw_polylines(vecs, roi_size, origin, cfg)\n    return masks\n\n\ndef _mask_iou(mask1, mask2):\n    intersection = (mask1 * mask2).sum()\n    if intersection == 0:\n        return 0.0\n    union = np.logical_or(mask1, mask2).sum()\n    return intersection / union\n\n\ndef find_matchings(src_masks, tgt_masks, thresh=0.1):\n    \"\"\"Find the matching of map elements between two temporally \n    connected frame\n\n    Args:\n        src_masks (_type_): instance masks of prev frame\n        tgt_masks (_type_): instance masks of current frame\n        thresh (float, optional): IOU threshold for matching. Defaults to 0.1.\n    \"\"\"\n    matchings = {}\n    for label, src_instances in src_masks.items():\n        tgt_instances = tgt_masks[label]\n        cost = np.zeros([len(src_instances), len(tgt_instances)])\n        for i, src_ins in enumerate(src_instances):\n            for j, tgt_ins in enumerate(tgt_instances):\n                iou = _mask_iou(src_ins, tgt_ins)\n                cost[i, j] = -iou\n        row_ind, col_ind = linear_sum_assignment(cost)\n        \n        label_matching = [-1 for _ in range(len(src_instances))]\n        label_matching_reverse = [-1 for _ in range(len(tgt_instances))]\n        for i, j in zip(row_ind, col_ind):\n            if -cost[i, j] > thresh:\n                label_matching[i] = j\n                label_matching_reverse[j] = i\n        \n        matchings[label] = (label_matching, label_matching_reverse)\n    return matchings\n                \n        \ndef match_two_consecutive_frames(prev_data, curr_data, roi_size, origin, cfg):\n    # get relative pose\n    prev_e2g_trans = torch.tensor(prev_data['img_metas'].data['ego2global_translation'], dtype=torch.float64)\n    prev_e2g_rot = torch.tensor(prev_data['img_metas'].data['ego2global_rotation'], dtype=torch.float64)\n    curr_e2g_trans  = torch.tensor(curr_data['img_metas'].data['ego2global_translation'], dtype=torch.float64)\n    curr_e2g_rot = torch.tensor(curr_data['img_metas'].data['ego2global_rotation'], dtype=torch.float64)\n    prev_e2g_matrix = torch.eye(4, dtype=torch.float64)\n    prev_e2g_matrix[:3, :3] = prev_e2g_rot\n    prev_e2g_matrix[:3, 3] = prev_e2g_trans\n\n    curr_g2e_matrix = torch.eye(4, dtype=torch.float64)\n    curr_g2e_matrix[:3, :3] = curr_e2g_rot.T\n    curr_g2e_matrix[:3, 3] = -(curr_e2g_rot.T @ curr_e2g_trans)\n\n    prev2curr_matrix = curr_g2e_matrix @ prev_e2g_matrix\n\n    # get vector data\n    prev_vectors = copy.deepcopy(prev_data['vectors'].data)\n    curr_vectors = copy.deepcopy(curr_data['vectors'].data)\n\n    #meta_info = curr_data['img_metas'].data\n    #imgs = [mmcv.imread(i) for i in meta_info['img_filenames']]\n    #cam_extrinsics = meta_info['cam_extrinsics']\n    #cam_intrinsics = meta_info['cam_intrinsics']\n    #ego2cams = meta_info['ego2cam']\n    \n    # transform prev vectors\n    prev2curr_vectors = dict()\n    for label, vecs in prev_vectors.items():\n        if len(vecs) > 0:\n            vecs = np.stack(vecs, 0)\n            vecs = torch.tensor(vecs)\n            N, num_points, _ = vecs.shape\n            denormed_vecs = vecs * roi_size + origin # (num_prop, num_pts, 2)\n            denormed_vecs = torch.cat([\n                denormed_vecs,\n                denormed_vecs.new_zeros((N, num_points, 1)), # z-axis\n                denormed_vecs.new_ones((N, num_points, 1)) # 4-th dim\n            ], dim=-1) # (num_prop, num_pts, 4)\n\n            transformed_vecs = torch.einsum('lk,ijk->ijl', prev2curr_matrix, denormed_vecs.double()).float()\n            normed_vecs = (transformed_vecs[..., :2] - origin) / roi_size # (num_prop, num_pts, 2)\n            normed_vecs = torch.clip(normed_vecs, min=0., max=1.)\n            prev2curr_vectors[label] = normed_vecs\n        else:\n            prev2curr_vectors[label] = vecs\n    \n    # convert to ego space for visualization\n    for label in prev2curr_vectors:\n        if len(prev2curr_vectors[label]) > 0:\n            prev2curr_vectors[label] = prev2curr_vectors[label] * roi_size + origin\n        if len(curr_vectors[label]) > 0:\n            curr_vecs = torch.tensor(np.stack(curr_vectors[label]))\n            curr_vectors[label] = curr_vecs * roi_size + origin\n    \n    prev2curr_masks = draw_instance_masks(prev2curr_vectors, roi_size, origin, cfg)\n    curr_masks = draw_instance_masks(curr_vectors, roi_size, origin, cfg)\n    \n    prev2curr_matchings = find_matchings(prev2curr_masks, curr_masks, thresh=0.01)\n\n    # For viz purpose, may display the maps in perspective images\n    #viz_dir = os.path.join(scene_dir, '{}_viz_perspective'.format(local_idx))\n    #if not os.path.exists(viz_dir):\n    #    os.makedirs(viz_dir)\n    #renderer.render_camera_views_from_vectors(curr_vectors, imgs, \n    #            cam_extrinsics, cam_intrinsics, ego2cams, 2, viz_dir)\n\n    #renderer.render_bev_from_vectors(curr_vectors, out_dir=None, specified_path='cur.png')\n    #renderer.render_bev_from_vectors(prev2curr_vectors, out_dir=None, specified_path='prev2cur.png')\n    #from PIL import Image \n    #background = Image.open(\"cur.png\")\n    #overlay = Image.open(\"prev2cur.png\")\n    #background = background.convert(\"RGBA\")\n    #overlay = overlay.convert(\"RGBA\")\n    #new_img = Image.blend(background, overlay, 0.5)\n    #new_img.save(\"cur_overlapped.png\",\"PNG\")\n    #import pdb; pdb.set_trace()\n    \n    return prev2curr_matchings\n\n\ndef assign_global_ids(matchings_seq, vectors_seq):\n    ids_seq = []\n    global_map_index = {\n        0: 0,\n        1: 0,\n        2: 0,\n    }\n    \n    ids_0 = dict()\n    for label, vectors in vectors_seq[0].items():\n        id_mapping = dict()\n        for i, _ in enumerate(vectors):\n            id_mapping[i] = global_map_index[label]\n            global_map_index[label] += 1\n        ids_0[label] = id_mapping\n    ids_seq.append(ids_0)\n\n    # Trace all frames following the consecutive matching\n    for t, vectors_t in enumerate(vectors_seq[1:]):\n        ids_t = dict()\n        for label, vectors in vectors_t.items():\n            reverse_matching = matchings_seq[t][label][1]\n            id_mapping = dict()\n            for i, _ in enumerate(vectors):\n                if reverse_matching[i] != -1:\n                    prev_id = reverse_matching[i]\n                    global_id = ids_seq[-1][label][prev_id]\n                else:\n                    global_id = global_map_index[label]\n                    global_map_index[label] += 1\n                id_mapping[i] = global_id\n            ids_t[label] = id_mapping\n        ids_seq.append(ids_t)\n    return ids_seq\n\n\ndef _denorm(vectors, roi_size, origin):\n    for label in vectors:\n        for i, vec in enumerate(vectors[label]):\n            vectors[label][i] = vec * roi_size + origin\n    return vectors\n\n\ndef form_gt_track_single(scene_name, scene_name2idx, dataset, out_dir, cfg, args):\n    print('Process scene {}'.format(scene_name))\n\n    renderer = dataset.renderer\n\n    roi_size = torch.tensor(cfg.roi_size)\n    origin = torch.tensor(cfg.pc_range[:2])\n\n    start_idx = scene_name2idx[scene_name][0]\n    matchings_seq = []\n    vectors_seq = []\n\n    for idx in scene_name2idx[scene_name]:\n        local_idx = idx - start_idx\n        if idx == start_idx:\n            prev_data = dataset[idx]\n        if idx == scene_name2idx[scene_name][-1]: # prev_data is the last frame\n            vectors_seq.append(prev_data['vectors'].data)\n            break\n\n        curr_data = dataset[idx+1]\n        matchings = match_two_consecutive_frames(prev_data, curr_data, roi_size, origin, cfg)\n        matchings_seq.append(matchings)\n        vectors_seq.append(prev_data['vectors'].data)\n\n        prev_data = curr_data\n    \n    # Derive global ids...\n    # get global ids by traversing all consecutive matching results\n    ids_info = assign_global_ids(matchings_seq, vectors_seq)\n\n    matching_meta = {\n        'sample_ids':scene_name2idx[scene_name],\n        'instance_ids': ids_info,\n    }\n\n    if args.visualize:\n        print('Visualize gt tracks for scene {}'.format(scene_name))\n        scene_dir = os.path.join(out_dir, scene_name)\n        os.makedirs(scene_dir, exist_ok=True)\n        # visualize with matched track ids\n        imgs = []\n        for idx, (id_info, vectors) in enumerate(zip(ids_info, vectors_seq)):\n            vectors = _denorm(vectors, roi_size.numpy(), origin.numpy())\n            save_path = os.path.join(scene_dir, f'{idx}_with_id.png')\n            renderer.render_bev_from_vectors(vectors, out_dir=None, specified_path=save_path, id_info=id_info)\n            viz_img = np.ascontiguousarray(cv2.imread(save_path)[:, :, ::-1], dtype=np.uint8)\n            if idx == 0:\n                img_shape = (viz_img.shape[1], viz_img.shape[0])\n            else:\n                viz_img = cv2.resize(viz_img, img_shape)\n            cv2.putText(viz_img, 't={}'.format(idx), location, font, fontScale, fontColor,\n            thickness, lineType)\n            imgs.append(viz_img)\n        gif_path = os.path.join(scene_dir, 'matching.gif')\n        imageio.mimsave(gif_path, imgs, duration=500)\n    \n    return scene_name, matching_meta\n        \n        \ndef main():\n    args = parse_args()\n    cfg = Config.fromfile(args.config)\n    import_plugin(cfg)\n\n    for split in ['train', 'val']:\n        if split == 'train' and split not in cfg.match_config.ann_file:\n            cfg.match_config.ann_file = cfg.match_config.ann_file.replace('val', 'train')\n        if split == 'val' and split not in cfg.match_config.ann_file:\n            cfg.match_config.ann_file = cfg.match_config.ann_file.replace('train', 'val')\n\n        # build the dataset\n        dataset = build_dataset(cfg.match_config)\n\n        scene_name2idx = {}\n        for idx, sample in enumerate(dataset.samples):\n            scene = sample['scene_name']\n            if scene not in scene_name2idx:\n                scene_name2idx[scene] = []\n            scene_name2idx[scene].append(idx)\n            \n        all_scene_names = sorted(list(scene_name2idx.keys()))\n        all_scene_matching_meta = {}\n\n        out_dir = os.path.join(args.out_dir, split)\n        if not os.path.exists(out_dir):\n            os.makedirs(out_dir)\n\n        all_scene_infos = []\n        for scene_idx, scene_name in enumerate(all_scene_names):\n            all_scene_infos.append((scene_name,))\n            \n        if N_WORKERS > 0:\n            fn = partial(form_gt_track_single, scene_name2idx=scene_name2idx,\n                dataset=dataset, cfg=cfg, out_dir=out_dir, args=args)\n            pool = Pool(N_WORKERS)\n            matching_results = pool.starmap(fn, all_scene_infos)\n            pool.close()\n        else:\n            matching_results =[]\n            for scene_info in all_scene_infos:\n                scene_name = scene_info[0]\n                single_matching_result = form_gt_track_single(scene_name=scene_name, scene_name2idx=scene_name2idx,\n                        dataset=dataset, cfg=cfg, out_dir=out_dir, args=args)\n                matching_results.append(single_matching_result)\n        \n        for scene_name, matching_meta in matching_results:\n            all_scene_matching_meta[scene_name] = matching_meta\n        \n        track_gt_path = cfg.match_config.ann_file[:-4] + '_gt_tracks.pkl'\n        with open(track_gt_path, 'wb') as f:\n            pickle.dump(all_scene_matching_meta, f, protocol=pickle.HIGHEST_PROTOCOL)\n\n        \nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/tracking/prepare_pred_tracks.py",
    "content": "import argparse\nimport mmcv\nfrom mmcv import Config\nimport os\nfrom mmdet3d.datasets import build_dataset\nimport cv2\nimport torch\nimport numpy as np\nimport imageio\nimport pickle\nfrom functools import partial\nfrom multiprocessing import Pool\nimport time\nfrom cmap_utils.utils import *\nfrom cmap_utils.match_utils import get_prev2curr_matrix, find_matchings_iou, get_consecutive_vectors,filter_vectors\n\nfont                   = cv2.FONT_HERSHEY_SIMPLEX\nlocation               = (200,60)\nfontScale              = 2\nfontColor              = (255,0,0)\nthickness              = 2\nlineType               = 2\n\ncat2id = {\n    'ped_crossing': 0,\n    'divider': 1,\n    'boundary': 2,\n}\n\nN_WORKERS = 10\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Visualize groundtruth and results')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument('--thr', \n        type=float,\n        default=0.4,\n        help='score threshold to filter predictions')\n    parser.add_argument(\n        '--result_path',\n        default=None,\n        help='directory to submission file')\n    parser.add_argument(\n        '--cons_frames',\n        default=5,\n        type=int,\n        help='consective frames for matchings'\n    )\n    parser.add_argument(\n        '--visual',\n        default=0,\n        type=int,\n        help='whether to visual'\n    )\n    args = parser.parse_args()\n    return args\n\ndef match_two_consecutive_frames_pred(args,prev_data,prev_meta,  curr_data, curr_meta,roi_size, origin, cfg):\n\n    prev2curr_matrix = get_prev2curr_matrix(prev_meta,curr_meta)\n\n    prev_vectors = filter_vectors(prev_data,origin,roi_size,args.thr)\n    curr_vectors = filter_vectors(curr_data,origin,roi_size,args.thr)\n\n    prev_vectors, curr_vectors, prev2curr_vectors = get_consecutive_vectors(prev_vectors,curr_vectors,\n                                    prev2curr_matrix,origin,roi_size) \n\n    prev2curr_masks, prev2curr_viz = draw_instance_masks(prev2curr_vectors, roi_size, origin, cfg)\n    curr_masks, curr_viz = draw_instance_masks(curr_vectors, roi_size, origin, cfg)\n\n    prev2curr_matchings = find_matchings_iou(prev2curr_masks, curr_masks, thresh=0.001)\n    curr2prev_matchings = {label:[match_info[1],match_info[0]]  for label,match_info in prev2curr_matchings.items()}\n    return curr2prev_matchings\n\ndef collect_pred(data,thr):\n    vectors = {label: [] for label in cat2id.values()}\n    scores = {label: [] for label in cat2id.values()}\n    for i in range(len(data['labels'])):\n        score, label, v = data['scores'][i], data['labels'][i], data['vectors'][i]\n        if score > thr:\n            vectors[label].append(np.array(v))\n            scores[label].append(score)\n    return vectors, scores\n\ndef get_scene_matching_result(args,cfg,pred_results,dataset,origin,roi_size,\n                              scene_name2idx):\n    ### obtain local id sequence matching results of predictions\n    vectors_seq = []\n    scores_seq = []\n\n    ids_seq = []\n    global_map_index = {\n        0: 0,\n        1: 0,\n        2: 0,\n    }\n    frame_token_list = []\n    pred_data_list = []\n    meta_list = []\n\n    for idx in scene_name2idx:\n        token = dataset[idx]['img_metas'].data['token']\n        pred_data = pred_results[token]\n        frame_token_list.append(token)\n        meta_list.append(dataset[idx]['img_metas'].data)\n        pred_data_list.append(pred_data)\n\n    for local_idx in range(len(frame_token_list)):\n        curr_pred_data = pred_data_list[local_idx]\n        vectors_info, scores = collect_pred(curr_pred_data,args.thr)\n        vectors_seq.append(vectors_info)\n        scores_seq.append(scores)\n\n        ### assign global id for the first frame\n        if local_idx == 0:\n            ids_0 = dict()\n            for label, vectors in vectors_info.items():\n                id_mapping = dict()\n                for i, _ in enumerate(vectors):\n                    id_mapping[i] = global_map_index[label]\n                    global_map_index[label] += 1\n                ids_0[label] = id_mapping\n            ids_seq.append(ids_0)\n            continue\n\n        ### from the farthest to the nearest\n        history_range = range(max(local_idx-args.cons_frames,0),local_idx)\n        tmp_ids_list = []\n        for comeback_idx,prev_idx in enumerate(history_range):\n\n            tmp_ids = {label:{} for label in cat2id.values()} \n            curr_pred_data = pred_data_list[local_idx]\n            comeback_pred_data = pred_data_list[prev_idx]\n            curr_meta = meta_list[local_idx]\n            comeback_meta = meta_list[prev_idx]\n\n            curr2prev_matching = match_two_consecutive_frames_pred(args,comeback_pred_data,comeback_meta,\n                                            curr_pred_data, curr_meta,roi_size, origin, cfg)\n            \n            for label,match_info in curr2prev_matching.items():\n                for curr_match_local_idx,prev_match_local_idx in enumerate(match_info[0]):\n                    if prev_match_local_idx == -1:\n                        tmp_ids[label][curr_match_local_idx] = -1\n                    else:\n                        prev_match_global_idx = ids_seq[prev_idx][label][prev_match_local_idx]\n                        tmp_ids[label][curr_match_local_idx] = prev_match_global_idx\n\n            tmp_ids_list.append(tmp_ids)\n\n        ids_n = {label:{} for label in cat2id.values()}\n\n        ### assign global id based on previous k frames' global id\n        missing_matchings = {label:[] for label in cat2id.values()}\n        for tmp_match in tmp_ids_list[::-1]:\n            for label, matching in tmp_match.items():\n                for vec_local_idx, vec_glb_idx in matching.items():\n                    if vec_local_idx not in ids_n[label].keys():\n                        if vec_glb_idx != -1 and vec_glb_idx not in ids_n[label].values():\n                            ids_n[label][vec_local_idx] = vec_glb_idx\n                            if vec_local_idx in missing_matchings[label]:\n                                missing_matchings[label].remove(vec_local_idx)\n                        else:\n                            missing_matchings[label].append(vec_local_idx)\n\n        ### assign new id if one vector is not matched \n        for label,miss_match in missing_matchings.items():\n            for miss_idx in miss_match:\n                if miss_idx not in ids_n[label].keys():\n                    ids_n[label][miss_idx] = global_map_index[label]\n                    global_map_index[label] += 1\n        ids_seq.append(ids_n)\n\n    return ids_seq, vectors_seq, scores_seq, meta_list\n\ndef generate_results(ids_info,vectors_seq,scores_seq,meta_list,scene_name):\n    ### assign global id \n\n    global_gt_idx = {}\n    result_list = []\n    instance_count = 0\n    for f_idx in range(len(ids_info)):\n        output_dict = {'vectors':[],'global_ids':[],'labels':[],'scores':[],'local_idx':[]}\n        output_dict['scene_name'] = scene_name\n        output_dict['meta'] = meta_list[f_idx]\n        for label in cat2id.values():\n            for local_idx, global_label_idx in ids_info[f_idx][label].items():\n                overall_count_idx = label*100 + global_label_idx\n                if overall_count_idx not in global_gt_idx.keys():\n                    overall_global_idx = instance_count\n                    global_gt_idx[overall_count_idx] = overall_global_idx\n                    instance_count += 1\n                else:\n                    overall_global_idx = global_gt_idx[overall_count_idx]\n                output_dict['global_ids'].append(overall_global_idx)\n                output_dict['vectors'].append(vectors_seq[f_idx][label][local_idx])\n                output_dict['scores'].append(scores_seq[f_idx][label][local_idx])\n                output_dict['labels'].append(label)\n        output_dict['local_idx'] = f_idx\n\n        result_list.append(output_dict)\n    return result_list\n\ndef get_matching_single(scene_name,args,scene_name2idx,dataset,cfg,pred_results,origin,roi_size):\n    name2idx = scene_name2idx[scene_name]\n    ids_info, vectors_seq,scores_seq,meta_list = get_scene_matching_result(args,cfg,pred_results,dataset,\n            origin,roi_size,name2idx)\n    gen_result = generate_results(ids_info,vectors_seq,scores_seq,meta_list,scene_name)\n\n    return (scene_name,ids_info,gen_result)\n\n\ndef main():\n    args = parse_args()\n    cfg = Config.fromfile(args.config)\n    import_plugin(cfg)\n    dataset = build_dataset(cfg.match_config)\n\n    scene_name2idx = {}\n    scene_name2token = {}\n    for idx, sample in enumerate(dataset.samples):\n        scene = sample['scene_name']\n        token = sample['token']\n        if scene not in scene_name2idx:\n            scene_name2idx[scene] = []\n            scene_name2token[scene] = []\n        scene_name2idx[scene].append(idx)\n\n    submission = mmcv.load(args.result_path)\n    results = submission['results']\n\n    all_scene_names = sorted(list(scene_name2idx.keys()))\n    all_scene_matching_meta = {}\n\n    scene_info_list = []\n\n    for single_scene_name in all_scene_names:\n        scene_info_list.append( (single_scene_name,args) )\n\n    roi_size = torch.tensor(cfg.roi_size).numpy()\n    origin = torch.tensor(cfg.pc_range[:2]).numpy()\n\n    start_time = time.time()\n\n    if N_WORKERS > 0:\n        fn = partial(get_matching_single, scene_name2idx=scene_name2idx,dataset=dataset,cfg=cfg,\n                    pred_results=results,origin=origin,roi_size=roi_size)\n        pool = Pool(N_WORKERS)\n        matching_results = pool.starmap(fn,scene_info_list)\n        pool.close()\n    else:\n        matching_results =[]\n        for scene_info in scene_info_list:\n            scene_name = scene_info[0]\n            single_matching_result = get_matching_single(scene_name=scene_name, scene_name2idx=scene_name2idx,\n                    args=args,  dataset=dataset,cfg=cfg,pred_results=results,origin=origin,roi_size=roi_size)\n            matching_results.append(single_matching_result)\n\n    final_reuslt = []\n    for single_matching_info in matching_results:\n        scene_name = single_matching_info[0]\n        single_matching = single_matching_info[1]\n        all_scene_matching_meta[scene_name] = single_matching\n        final_reuslt.extend(single_matching_info[2])\n\n    meta_path = args.result_path.replace('submission_vector.json','pos_predictions_{}.pkl'.format(args.cons_frames))\n    with open(meta_path, 'wb') as f:\n        pickle.dump(final_reuslt, f, protocol=pickle.HIGHEST_PROTOCOL)\n    print('Matching Time',time.time()-start_time)\n\n\nif __name__ == '__main__':\n    main()"
  },
  {
    "path": "tools/train.py",
    "content": "# ---------------------------------------------\n# Copyright (c) OpenMMLab. All rights reserved.\n# ---------------------------------------------\n#  Modified by Zhiqi Li\n# ---------------------------------------------\n \nfrom __future__ import division\n\nimport argparse\nimport copy\nimport mmcv\nimport os\nimport time\nimport torch\nimport warnings\nfrom mmcv import Config, DictAction\nfrom mmcv.runner import get_dist_info, init_dist, wrap_fp16_model\nfrom os import path as osp\n\nfrom mmdet import __version__ as mmdet_version\nfrom mmdet3d import __version__ as mmdet3d_version\nfrom mmdet3d.apis import train_model\nfrom mmdet3d.datasets import build_dataset\nfrom mmdet3d.models import build_model\nfrom mmdet3d.utils import collect_env, get_root_logger\nfrom mmdet.apis import set_random_seed\nfrom mmseg import __version__ as mmseg_version\nfrom mmcv.utils import TORCH_VERSION, digit_version\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Train a detector')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument('--work-dir', help='the dir to save logs and models')\n    parser.add_argument(\n        '--resume-from', help='the checkpoint file to resume from')\n    parser.add_argument(\n        '--no-validate',\n        action='store_true',\n        help='whether not to evaluate the checkpoint during training')\n    group_gpus = parser.add_mutually_exclusive_group()\n    group_gpus.add_argument(\n        '--gpus',\n        type=int,\n        help='number of gpus to use '\n        '(only applicable to non-distributed training)')\n    group_gpus.add_argument(\n        '--gpu-ids',\n        type=int,\n        nargs='+',\n        help='ids of gpus to use '\n        '(only applicable to non-distributed training)')\n    parser.add_argument('--seed', type=int, default=0, help='random seed')\n    parser.add_argument(\n        '--deterministic',\n        action='store_true',\n        help='whether to set deterministic options for CUDNN backend.')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file (deprecate), '\n        'change to --cfg-options instead.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    parser.add_argument(\n        '--autoscale-lr',\n        action='store_true',\n        help='automatically scale lr with the number of gpus')\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n\n    if args.options and args.cfg_options:\n        raise ValueError(\n            '--options and --cfg-options cannot be both specified, '\n            '--options is deprecated in favor of --cfg-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --cfg-options')\n        args.cfg_options = args.options\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    # import modules from string list.\n    if cfg.get('custom_imports', None):\n        from mmcv.utils import import_modules_from_strings\n        import_modules_from_strings(**cfg['custom_imports'])\n\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n\n    # import modules from plguin/xx, registry will be updated\n    import sys\n    sys.path.append(os.path.abspath('.'))    \n    if hasattr(cfg, 'plugin'):\n        if cfg.plugin:\n            import importlib\n            if hasattr(cfg, 'plugin_dir'):\n                def import_path(plugin_dir):\n                    _module_dir = os.path.dirname(plugin_dir)\n                    _module_dir = _module_dir.split('/')\n                    _module_path = _module_dir[0]\n\n                    for m in _module_dir[1:]:\n                        _module_path = _module_path + '.' + m\n                    print(_module_path)\n                    plg_lib = importlib.import_module(_module_path)\n\n                plugin_dirs = cfg.plugin_dir\n                if not isinstance(plugin_dirs,list):\n                    plugin_dirs = [plugin_dirs,]\n                for plugin_dir in plugin_dirs:\n                    import_path(plugin_dir)\n                \n            else:\n                # import dir is the dirpath for the config file\n                _module_dir = os.path.dirname(args.config)\n                _module_dir = _module_dir.split('/')\n                _module_path = _module_dir[0]\n                for m in _module_dir[1:]:\n                    _module_path = _module_path + '.' + m\n                print(_module_path)\n                plg_lib = importlib.import_module(_module_path)\n\n    # work_dir is determined in this priority: CLI > segment in file > filename\n    if args.work_dir is not None:\n        # update configs according to CLI args if args.work_dir is not None\n        cfg.work_dir = args.work_dir\n    elif cfg.get('work_dir', None) is None:\n        # use config filename as default work_dir if cfg.work_dir is None\n        cfg.work_dir = osp.join('./work_dirs',\n                                osp.splitext(osp.basename(args.config))[0])\n    if args.resume_from is not None:\n        cfg.resume_from = args.resume_from\n    if args.gpu_ids is not None:\n        cfg.gpu_ids = args.gpu_ids\n    else:\n        cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)\n    if digit_version(TORCH_VERSION) == digit_version('1.8.1') and cfg.optimizer['type'] == 'AdamW':\n        cfg.optimizer['type'] = 'AdamW2' # fix bug in Adamw\n    if args.autoscale_lr:\n        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)\n        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n        # re-set gpu_ids with distributed training mode\n        _, world_size = get_dist_info()\n        cfg.gpu_ids = range(world_size)\n\n    # create work_dir\n    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n    # dump config\n    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))\n    # init the logger before other steps\n    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())\n    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')\n    # specify logger name, if we still use 'mmdet', the output info will be\n    # filtered and won't be saved in the log_file\n    # TODO: ugly workaround to judge whether we are training det or seg model\n    if cfg.model.type in ['EncoderDecoder3D']:\n        logger_name = 'mmseg'\n    else:\n        logger_name = 'mmdet'\n    logger = get_root_logger(\n        log_file=log_file, log_level=cfg.log_level, name=logger_name)\n\n    # init the meta dict to record some important information such as\n    # environment info and seed, which will be logged\n    meta = dict()\n    # log env info\n    env_info_dict = collect_env()\n    env_info = '\\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])\n    dash_line = '-' * 60 + '\\n'\n    logger.info('Environment info:\\n' + dash_line + env_info + '\\n' +\n                dash_line)\n    meta['env_info'] = env_info\n    meta['config'] = cfg.pretty_text\n\n    # log some basic info\n    logger.info(f'Distributed training: {distributed}')\n    logger.info(f'Config:\\n{cfg.pretty_text}')\n\n    # set random seeds\n    if args.seed is not None:\n        logger.info(f'Set random seed to {args.seed}, '\n                    f'deterministic: {args.deterministic}')\n        set_random_seed(args.seed, deterministic=args.deterministic)\n    cfg.seed = args.seed\n    meta['seed'] = args.seed\n    meta['exp_name'] = osp.basename(args.config)\n\n    model = build_model(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n\n    model.init_weights()\n\n    if cfg.get('SyncBN', False):\n        import torch.nn as nn\n        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)\n        logger.info(\"Using SyncBN\")\n        \n    logger.info(f'Model:\\n{model}')\n    cfg.data.train.work_dir = cfg.work_dir\n    cfg.data.val.work_dir = cfg.work_dir\n    datasets = [build_dataset(cfg.data.train)]\n    if len(cfg.workflow) == 2:\n        val_dataset = copy.deepcopy(cfg.data.val)\n        # in case we use a dataset wrapper\n        if 'dataset' in cfg.data.train:\n            val_dataset.pipeline = cfg.data.train.dataset.pipeline\n        else:\n            val_dataset.pipeline = cfg.data.train.pipeline\n        # set test_mode=False here in deep copied config\n        # which do not affect AP/AR calculation later\n        # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa\n        val_dataset.test_mode = False\n        datasets.append(build_dataset(val_dataset))\n    if cfg.checkpoint_config is not None:\n        # save mmdet version, config file content and class names in\n        # checkpoints as meta data\n        cfg.checkpoint_config.meta = dict(\n            mmdet_version=mmdet_version,\n            mmseg_version=mmseg_version,\n            mmdet3d_version=mmdet3d_version,\n            config=cfg.pretty_text,\n            CLASSES=None,\n            PALETTE=datasets[0].PALETTE  # for segmentors\n            if hasattr(datasets[0], 'PALETTE') else None)\n    # add an attribute for visualization convenience\n    # model.CLASSES = datasets[0].CLASSES\n    from plugin.core.apis import custom_train_model\n    custom_train_model(\n        model,\n        datasets,\n        cfg,\n        distributed=distributed,\n        validate=(not args.no_validate),\n        timestamp=timestamp,\n        meta=meta)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "tools/visualization/vis_global.py",
    "content": "import sys\nimport os\nSCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(os.path.dirname(SCRIPT_DIR))\n\nimport argparse     \nimport mmcv\nfrom mmcv import Config\nimport matplotlib.transforms as transforms\nfrom mmdet3d.datasets import build_dataset\nimport cv2\nimport torch\nimport numpy as np\nfrom PIL import Image\nimport pickle\nfrom collections import defaultdict\nimport matplotlib.pyplot as plt\nfrom shapely.geometry import LineString, Point\nfrom shapely.ops import nearest_points\nfrom scipy.spatial import ConvexHull\nfrom PIL import Image\nimport cv2\nimport imageio\nimport math\nfrom tracking.cmap_utils.match_utils import *\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Visualize groundtruth and results')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument(\n        '--out_dir',\n        required=True,\n        default=\"\",\n        help='')\n    parser.add_argument(\n        '--data_path',\n        required=True,\n        default=\"\",\n        help='Directory to submission file')\n    parser.add_argument(\n        '--scene_id',\n        type=str, \n        nargs='+',\n        default=None,\n        help='Specify the scene_id to visulize')\n    parser.add_argument(\n        '--option',\n        required=True,\n        default=\"vis-pred\",\n        help='vis-pred, vis-gt')\n    parser.add_argument(\n        '--simplify',\n        default=0.5,\n        type=float,\n        help='Line simplification tolerance'\n    )\n    parser.add_argument(\n        '--line_opacity',\n        default=0.75,\n        type=float,\n        help='Line opacity'\n    )\n    parser.add_argument(\n        '--overwrite',\n        default=1,\n        type=int,\n        help='Whether to overwrite the existing visualization files'\n    )\n    parser.add_argument(\n        '--per_frame_result',\n        default=1,\n        type=int,\n        help='Whether to visualize per frame result'\n    )\n    parser.add_argument(\n        '--dpi',\n        default=20,\n        type=int,\n        help='DPI of the output image'\n    )\n    parser.add_argument(\n        '--transparent',\n        default=False,\n        action='store_true',\n        help='Whether to use transparent background'\n    )\n    \n    args = parser.parse_args()\n\n    return args\n\ndef combine_images_with_labels(image_paths, labels, output_path, font_scale=0.5, font_color=(0, 0, 0)):\n    # Load images\n    images = [cv2.imread(path) for path in image_paths]\n    \n    # Determine the maximum dimensions\n    max_height = max(image.shape[0] for image in images)\n    max_width = max(image.shape[1] for image in images)\n    \n    # Create a blank white canvas to hold the 2x2 grid of images\n    final_image = np.ones((max_height * 1, max_width * 2, 3), dtype=np.uint8) * 255\n    \n    # Font settings\n    font = cv2.FONT_HERSHEY_SIMPLEX\n    \n    for i, img in enumerate(images):\n        # Resize image if necessary\n        img = cv2.resize(img, (max_width, max_height))\n        \n        # Calculate position for each image\n        x_offset = (i % 2) * max_width\n        y_offset = (i // 2) * max_height\n        \n        # Place image in the canvas\n        final_image[y_offset:y_offset+max_height, x_offset:x_offset+max_width] = img\n        \n        # Add label\n        cv2.putText(final_image, labels[i], (x_offset + 5, y_offset + 15), font, font_scale, font_color, 1, cv2.LINE_AA)\n    \n    # Save the final image\n    cv2.imwrite(output_path, final_image)\n\n\ndef merge_corssing(polylines):\n    convex_hull_polygon = find_largest_convex_hull(polylines)\n    return convex_hull_polygon\n\n\ndef find_largest_convex_hull(polylines):\n    # Merge all points from the polylines into a single collection\n    all_points = []\n    for polyline in polylines:\n        all_points.extend(list(polyline.coords))\n    \n    # Convert the points to a NumPy array for processing with scipy\n    points_array = np.array(all_points)\n    \n    # Compute the convex hull using scipy\n    hull = ConvexHull(points_array)\n    \n    # Extract the vertices of the convex hull\n    hull_points = points_array[hull.vertices]\n    \n    # Create a shapely Polygon object representing the convex hull\n    convex_hull_polygon = LineString(hull_points).convex_hull\n    \n    return convex_hull_polygon\n\n\ndef project_point_onto_line(point, line):\n    \"\"\"Project a point onto a line segment and return the projected point.\"\"\"\n    line_start, line_end = np.array(line.coords[0]), np.array(line.coords[1])\n    line_vec = line_end - line_start\n    point_vec = np.array(point.coords[0]) - line_start\n    line_len = np.linalg.norm(line_vec)\n    line_unitvec = line_vec / line_len\n    point_vec_scaled = point_vec / line_len\n    t = np.dot(line_unitvec, point_vec_scaled)    \n    t = np.clip(t, 0.0, 1.0)\n    nearest = line_start + t * line_vec\n    return Point(nearest)\n\n\ndef find_nearest_projection_on_polyline(point, polyline):\n    \"\"\"Find the nearest projected point of a point onto a polyline.\"\"\"\n    min_dist = float('inf')\n    nearest_point = None\n    for i in range(len(polyline.coords) - 1):\n        segment = LineString(polyline.coords[i:i+2])\n        proj_point = project_point_onto_line(point, segment)\n        dist = point.distance(proj_point)\n        if dist < min_dist:\n            min_dist = dist\n            nearest_point = proj_point\n    return np.array(nearest_point.coords)\n\n\ndef find_and_sort_intersections(segmenet1, segment2):\n    # Convert polylines to LineString objects\n\n    # Find the intersection between the two LineStrings\n    intersection = segmenet1.intersection(segment2)\n\n    # Prepare a list to store intersection points\n    intersections = []\n\n    # Check the type of intersection\n    if \"Point\" in intersection.geom_type:\n        # Single point or multiple points\n        if intersection.geom_type == \"MultiPoint\":\n            intersections.extend(list(intersection))\n        else:\n            intersections.append(intersection)\n    elif \"LineString\" in intersection.geom_type:\n        # In case of lines or multiline, get boundary points (start and end points of line segments)\n        if intersection.geom_type == \"MultiLineString\":\n            for line in intersection:\n                intersections.extend(list(line.boundary))\n        else:\n            intersections.extend(list(intersection.boundary))\n\n    # Remove duplicates and ensure they are Point objects\n    unique_intersections = [Point(coords) for coords in set(pt.coords[0] for pt in intersections)]\n\n    # Sort the intersection points by their distance along the first polyline\n    sorted_intersections = sorted(unique_intersections, key=lambda pt: segmenet1.project(pt))\n\n    return sorted_intersections\n\n\ndef get_intersection_point_on_line(line, intersection):\n    intersection_points  = find_and_sort_intersections(LineString(line), intersection)\n    if len(intersection_points) >= 2:\n        line_intersect_start = intersection_points[0]\n        line_intersect_end = intersection_points[-1]\n    elif len(intersection_points) == 1:\n        if intersection.contains(Point(line[0])):\n            line_intersect_start = Point(line[0])\n            line_intersect_end = intersection_points[0]\n        elif intersection.contains(Point(line[-1])):\n            line_intersect_start = Point(line[-1])\n            line_intersect_end = intersection_points[0]\n        else:\n            return None, None            \n    else:\n        return None, None            \n    return line_intersect_start, line_intersect_end\n\ndef merge_l2_points_to_l1(line1, line2, line2_intersect_start, line2_intersect_end):\n    # get nearest point on line2 to line2_intersect_start\n    line2_point_to_merge = []\n    line2_intersect_start_dis = line2.project(line2_intersect_start)\n    line2_intersect_end_dis = line2.project(line2_intersect_end)\n    for point in np.array(line2.coords):\n        point_geom = Point(point)\n        dis = line2.project(point_geom)\n        if dis > line2_intersect_start_dis and dis < line2_intersect_end_dis:\n            line2_point_to_merge.append(point)\n            \n    # merged the points\n    merged_line2_points = []\n    for point in line2_point_to_merge:\n        # Use the `project` method to find the distance along the polyline to the closest point\n        point_geom = Point(point)\n        # Use the `interpolate` method to find the actual point on the polyline\n        closest_point_on_line = find_nearest_projection_on_polyline(point_geom, line1)\n        if len(closest_point_on_line) == 0:\n            merged_line2_points.append(point)\n        else:\n            merged_line2_points.append(((closest_point_on_line + point) / 2)[0])\n\n    if len(merged_line2_points) == 0:\n        merged_line2_points = np.array([]).reshape(0, 2)\n    else:\n        merged_line2_points = np.array(merged_line2_points)\n        \n    return merged_line2_points        \n\ndef segment_line_based_on_merged_area(line, merged_points):\n    \n    if len(merged_points) == 0:\n        return  np.array(line.coords),  np.array([]).reshape(0, 2)\n    \n    first_merged_point = merged_points[0]\n    last_merged_point = merged_points[-1]\n    \n    start_dis = line.project(Point(first_merged_point))\n    end_dis = line.project(Point(last_merged_point))\n    \n    start_segmenet = []\n    for point in np.array(line.coords):\n        point_geom = Point(point)\n        if line.project(point_geom) < start_dis:\n            start_segmenet.append(point)\n    \n    end_segmenet = []\n    for point in np.array(line.coords):\n        point_geom = Point(point)\n        if line.project(point_geom) > end_dis:\n            end_segmenet.append(point)\n            \n    if len(start_segmenet) == 0:\n        start_segmenet = np.array([]).reshape(0, 2)\n    else:\n        start_segmenet = np.array(start_segmenet)\n        \n    if len(end_segmenet) == 0:\n        end_segmenet = np.array([]).reshape(0, 2)\n    else:\n        end_segmenet = np.array(end_segmenet)\n    \n    return start_segmenet, end_segmenet\n    \ndef get_bbox_size_for_points(points):\n    if len(points) == 0:\n        return 0, 0\n    \n    # Initialize min and max coordinates with the first point\n    min_x, min_y = points[0]\n    max_x, max_y = points[0]\n\n    # Iterate through each point to update min and max coordinates\n    for x, y in points[1:]:\n        min_x = min(min_x, x)\n        min_y = min(min_y, y)\n        max_x = max(max_x, x)\n        max_y = max(max_y, y)\n    return max_x - min_x, max_y - min_y\n\ndef get_longer_segmenent_to_merged_points(l1_segment, l2_segment, merged_line2_points, segment_type=\"start\"):\n    # remove points from segments if it's too close to merged_line2_points\n    l1_segment_temp = []\n    if len(merged_line2_points) > 1:\n        merged_polyline = LineString(merged_line2_points)\n        for point in l1_segment:\n            if merged_polyline.distance(Point(point)) > 0.1:\n                l1_segment_temp.append(point)\n    elif len(merged_line2_points) == 1:\n        for point in l1_segment:\n            if Point(point).distance(Point(merged_line2_points[0])) > 0.1:\n                l1_segment_temp.append(point)\n    elif len(merged_line2_points) == 0:\n        l1_segment_temp = l1_segment\n        \n                \n    l1_segment = np.array(l1_segment_temp)\n    \n    l2_segmenet_temp = []\n    if len(merged_line2_points) > 1:\n        merged_polyline = LineString(merged_line2_points)\n        for point in l2_segment:\n            if merged_polyline.distance(Point(point)) > 0.1:\n                l2_segmenet_temp.append(point)\n    elif len(merged_line2_points) == 1:\n        for point in l2_segment:\n            if Point(point).distance(Point(merged_line2_points[0])) > 0.1:\n                l2_segmenet_temp.append(point)\n    elif len(merged_line2_points) == 0:\n        l2_segmenet_temp = l2_segment\n                \n    l2_segment = np.array(l2_segmenet_temp)\n    \n    if segment_type == \"start\":\n        \n        temp = l1_segment.tolist()\n        if len(merged_line2_points) > 0:\n            temp.append(merged_line2_points[0])\n        \n        l1_start_box_size = get_bbox_size_for_points(temp)\n        \n        temp = l2_segment.tolist()\n        if len(merged_line2_points) > 0:\n            temp.append(merged_line2_points[0])\n        l2_start_box_size = get_bbox_size_for_points(temp)\n    \n        if l2_start_box_size[0]*l2_start_box_size[1] >= l1_start_box_size[0]*l1_start_box_size[1]:\n            longer_segment = l2_segment\n        else:\n            longer_segment = l1_segment\n    else:\n        temp = l1_segment.tolist()\n        if len(merged_line2_points) > 0:\n            temp.append(merged_line2_points[-1])\n        l1_end_box_size = get_bbox_size_for_points(temp)\n        \n        temp = l2_segment.tolist()\n        if len(merged_line2_points) > 0:\n            temp.append(merged_line2_points[-1])\n        l2_end_box_size = get_bbox_size_for_points(temp)\n    \n        if l2_end_box_size[0]*l2_end_box_size[1] >= l1_end_box_size[0]*l1_end_box_size[1]:\n            longer_segment = l2_segment\n        else:\n            longer_segment = l1_segment\n    \n    if len(longer_segment) == 0:\n        longer_segment = np.array([]).reshape(0, 2)\n    else:\n        longer_segment = np.array(longer_segment)\n        \n    return longer_segment\n    \ndef get_line_lineList_max_intersection(merged_lines, line, thickness=4):\n    pre_line = merged_lines[-1]\n    max_iou = 0\n    merged_line_index = 0\n    for line_index, one_merged_line in enumerate(merged_lines):\n        line1 = LineString(one_merged_line)\n        line2 = LineString(line)\n        thick_line1 = line1.buffer(thickness)\n        thick_line2 = line2.buffer(thickness)\n        intersection = thick_line1.intersection(thick_line2)\n        if intersection.area / thick_line2.area > max_iou:\n            max_iou = intersection.area / thick_line2.area\n            pre_line = np.array(line1.coords)\n            merged_line_index = line_index\n    return intersection, pre_line, merged_line_index\n    \ndef algin_l2_with_l1(line1, line2):\n    \n    if len(line1) > len(line2):\n        l2_len = len(line2)\n        line1_geom = LineString(line1)\n        interval_length = line1_geom.length / (l2_len - 1)\n        line1 = [np.array(line1_geom.interpolate(interval_length * i)) for i in range(l2_len)]\n        \n    elif len(line1) < len(line2):\n        l1_len = len(line1)\n        line2_geom = LineString(line2)\n        interval_length = line2_geom.length / (l1_len - 1)\n        line2 = [np.array(line2_geom.interpolate(interval_length * i)) for i in range(l1_len)]\n    \n    # make line1 and line2 same direction, pre_line.coords[0] shold be closer to line2.coords[0]\n    line1_geom = LineString(line1)\n    line2_flip = np.flip(line2, axis=0)\n    \n    line2_traj_len = 0\n    for point_idx, point in enumerate(line2):\n        line2_traj_len += np.linalg.norm(point - line1[point_idx])\n    \n    flip_line2_traj_len = 0\n    for point_idx, point in enumerate(line2_flip):\n        flip_line2_traj_len += np.linalg.norm(point - line1[point_idx])\n    \n        \n    if abs(flip_line2_traj_len - line2_traj_len) < 3:\n        # get the trajectory length\n        line2_walk_len = 0\n        for point in line2:\n            point_geom = Point(point)\n            proj_point = find_nearest_projection_on_polyline(point_geom, line1_geom)\n            if len(proj_point) != 0:\n                line2_walk_len += line1_geom.project(Point(proj_point[0]))\n        \n        flip_line2_walk_len = 0\n        for point in line2:\n            point_geom = Point(point)\n            proj_point = find_nearest_projection_on_polyline(point_geom, line1_geom)\n            if len(proj_point) != 0:\n                flip_line2_walk_len += line1_geom.project(Point(proj_point[0]))\n        \n        if flip_line2_walk_len < line2_walk_len:\n            return line2_flip\n        else:\n            return line2\n        \n    \n    if flip_line2_traj_len < line2_traj_len:\n        return line2_flip\n    else:\n        return line2\n\ndef _is_u_shape(line, direction):\n    assert direction in ['left', 'right'], 'Wrong direction argument {}'.format(direction)\n    line_geom = LineString(line)\n    length = line_geom.length\n    mid_point = np.array(line_geom.interpolate(length / 2).coords)[0]\n    start = line[0]\n    end = line[-1]\n\n    if direction == 'left':\n        cond1 = mid_point[0] < start[0] and mid_point[0] < end[0]\n    else:\n        cond1 = mid_point[0] > start[0] and mid_point[0] > end[0]\n    \n    dist_start_end = np.sqrt((start[0] - end[0])**2 + (start[1]-end[1])**2)\n    cond2 = length >= math.pi / 2 * dist_start_end\n\n    return cond1 and cond2\n\ndef check_circle(pre_line, vec):\n\n    # if the last line in merged_lines is a circle\n    if np.linalg.norm(pre_line[0] - pre_line[-1]) == 0:\n        return True\n    \n    # if the last line in merged_lines is almost a circle and the new line is close to the circle\n    if np.linalg.norm(pre_line[0] - pre_line[-1]) < 0.1:\n        vec_2_circle_distance = 0\n        for point in vec:\n            vec_2_circle_distance += LineString(pre_line).distance(Point(point))\n        if vec_2_circle_distance < 3:\n            return True\n    return False\n        \ndef connect_polygon(merged_polyline, merged_lines):\n    start_end_connect = [merged_polyline[0], merged_polyline[-1]]\n    iou = []\n    length_ratio = []\n    for one_merged_line in merged_lines:\n        line1 = LineString(one_merged_line)\n        line2 = LineString(start_end_connect)\n        thickness = 1\n        thick_line1 = line1.buffer(thickness)\n        thick_line2 = line2.buffer(thickness)\n        intersection = thick_line1.intersection(thick_line2)\n        iou.append(intersection.area / thick_line2.area)\n        length_ratio.append(line1.length / line2.length)\n\n    if max(iou) > 0.95 and max(length_ratio) > 3.0:\n        merged_polyline = np.concatenate((merged_polyline, [merged_polyline[0]]), axis=0)\n    return merged_polyline\n    \ndef iou_merge_boundry(merged_lines, vec, thickness=1):\n\n    # intersection : the intersection area between the new line and the line in the merged_lines; is a polygon\n    intersection, pre_line, merged_line_index = get_line_lineList_max_intersection(merged_lines, vec, thickness)\n\n    # corner case: check if the last line in merged_lines is a circle\n    if check_circle(pre_line, vec):\n        return merged_lines\n\n    # Handle U-shape, the main corner case\n    if _is_u_shape(pre_line, 'left'):\n        if _is_u_shape(vec, 'right'):\n            # Two u shapes with opposite directions, directly generate a polygon exterior\n            polygon = find_largest_convex_hull([LineString(pre_line), LineString(vec)])\n            merged_lines[-1] = np.array(polygon.exterior.coords)\n            return merged_lines\n        elif not _is_u_shape(vec, 'left'):\n            line_geom1 = LineString(pre_line)\n            line1_dists = np.array([line_geom1.project(Point(x)) for x in pre_line])\n            split_mask = line1_dists > line_geom1.length / 2\n            split_1 = LineString(pre_line[~split_mask])\n            split_2 = LineString(pre_line[split_mask])\n\n            # get the projected distance\n            np1 = np.array(nearest_points(split_1, Point(Point(pre_line[-1])))[0].coords)[0]\n            np2 = np.array(nearest_points(split_2, Point(Point(pre_line[0])))[0].coords)[0]\n            dist1 = np.linalg.norm(np1-pre_line[-1])\n            dist2 = np.linalg.norm(np2-pre_line[0])\n            dist = min(dist1, dist2)\n\n            if dist < thickness:\n                line_geom2 = LineString(vec)\n                dist1 = line_geom2.distance(Point(pre_line[0]))\n                dist2 = line_geom2.distance(Point(pre_line[-1]))\n                pt = pre_line[0] if dist1 <= dist2 else pre_line[-1]\n                if vec[0][0] > vec[1][0]:\n                    vec = np.array(vec[::-1])\n                    line_geom2 = LineString(vec)\n                proj_length = line_geom2.project(Point(pt))\n                l2_select_mask = np.array([line_geom2.project(Point(x)) > proj_length for x in vec])\n                selected_l2 = vec[l2_select_mask]\n                merged_result = np.concatenate([pre_line[:-1, :], pt[None, ...], selected_l2], axis=0)\n                merged_lines[-1] = merged_result\n                return merged_lines\n    \n    # align the new line with the line in the merged_lines so that points on two lines are traversed in the same direction\n    vec = algin_l2_with_l1(pre_line, vec)\n    line1 = LineString(pre_line)\n    line2 = LineString(vec)\n    \n    # get the intersection points between IOU area and two lines\n    line1_intersect_start, line1_intersect_end = get_intersection_point_on_line(pre_line, intersection)\n    line2_intersect_start, line2_intersect_end = get_intersection_point_on_line(vec, intersection)\n    \n    # If no intersection points are found, use the last point of the line1 and the first point of the line2 as the intersection points --> this is a corner case that we will connect the two lines head to tail directly\n    if line1_intersect_start is None or line1_intersect_end is None or line2_intersect_start is None or line2_intersect_end is None:\n        line1_intersect_start = Point(pre_line[-1])\n        line1_intersect_end = Point(pre_line[-1])\n        line2_intersect_start = Point(vec[0])\n        line2_intersect_end = Point(vec[0])\n    \n    # merge the points on line2's intersection area towards line1\n    merged_line2_points = merge_l2_points_to_l1(line1, line2, line2_intersect_start, line2_intersect_end)\n    # merge the points on line1's intersection area towards line2\n    merged_line1_points = merge_l2_points_to_l1(line2, line1, line1_intersect_start, line1_intersect_end)\n    \n    # segment the lines based on the merged points (intersection area); split the line in to start segment and merged segment and end segment\n    l2_start_segment, l2_end_segment = segment_line_based_on_merged_area(line2, merged_line2_points)\n    l1_start_segment, l1_end_segment = segment_line_based_on_merged_area(line1, merged_line1_points)\n    \n    # choose the longer segment between line1 and line2 to be the final start segment and end segment\n    start_segment = get_longer_segmenent_to_merged_points(l1_start_segment, l2_start_segment, merged_line2_points, segment_type=\"start\")\n    end_segment = get_longer_segmenent_to_merged_points(l1_end_segment, l2_end_segment, merged_line2_points, segment_type=\"end\")\n    merged_polyline = np.concatenate((start_segment, merged_line2_points, end_segment), axis=0)\n    \n    # corner case : check if need to connect the polyline to form a circle\n    merged_polyline = connect_polygon(merged_polyline, merged_lines)\n    \n    merged_lines[merged_line_index] = merged_polyline\n  \n    return merged_lines\n\ndef iou_merge_divider(merged_lines, vec, thickness=1):\n    # intersection : the intersection area between the new line and the line in the merged_lines; is a polygon\n    # pre_line : the line in merged_lines that has max IOU with the new line\n    intersection, pre_line, merged_line_index = get_line_lineList_max_intersection(merged_lines, vec, thickness)\n    # align the new line with the line in the merged_lines so that points on two lines are traversed in the same direction\n    vec = algin_l2_with_l1(pre_line, vec)\n    \n    line1 = LineString(pre_line)\n    line2 = LineString(vec)\n    \n    # get the intersection points between IOU area and two lines\n    line1_intersect_start, line1_intersect_end = get_intersection_point_on_line(pre_line, intersection)\n    line2_intersect_start, line2_intersect_end = get_intersection_point_on_line(vec, intersection)\n    \n    # If no intersection points are found, use the last point of the line1 and the first point of the line2 as the intersection points --> this is a corner case that we will connect the two lines head to tail directly\n    if line1_intersect_start is None or line1_intersect_end is None or line2_intersect_start is None or line2_intersect_end is None:\n        line1_intersect_start = Point(pre_line[-1])\n        line1_intersect_end = Point(pre_line[-1])\n        line2_intersect_start = Point(vec[0])\n        line2_intersect_end = Point(vec[0])\n    \n    # merge the points on line2's intersection area towards line1\n    merged_line2_points = merge_l2_points_to_l1(line1, line2, line2_intersect_start, line2_intersect_end)\n    # merge the points on line1's intersection area towards line2\n    merged_line1_points = merge_l2_points_to_l1(line2, line1, line1_intersect_start, line1_intersect_end)\n    \n    # segment the lines based on the merged points (intersection area); split the line in to start segment and merged segment and end segment\n    l2_start_segment, l2_end_segment = segment_line_based_on_merged_area(line2, merged_line2_points)\n    l1_start_segment, l1_end_segment = segment_line_based_on_merged_area(line1, merged_line1_points)\n    \n    # choose the longer segment between line1 and line2 to be the final start segment and end segment\n    start_segment = get_longer_segmenent_to_merged_points(l1_start_segment, l2_start_segment, merged_line2_points, segment_type=\"start\")\n    end_segment = get_longer_segmenent_to_merged_points(l1_end_segment, l2_end_segment, merged_line2_points, segment_type=\"end\")\n    merged_polyline = np.concatenate((start_segment, merged_line2_points, end_segment), axis=0)\n    \n    # update the merged_lines\n    merged_lines[merged_line_index] = merged_polyline\n    \n    return merged_lines\n\ndef merge_divider(vecs=None, thickness=1):\n    merged_lines = []\n    for vec in vecs:\n        \n        # if the merged_lines is empty, add the first line\n        if len(merged_lines) == 0:\n            merged_lines.append(vec)\n            continue\n        \n        # thicken the vec (the new line) and the merged_lines calculate the max IOU between the new line and the merged_lines\n        iou = []\n        for one_merged_line in merged_lines:\n            line1 = LineString(one_merged_line)\n            line2 = LineString(vec)\n            thick_line1 = line1.buffer(thickness)\n            thick_line2 = line2.buffer(thickness)\n            intersection = thick_line1.intersection(thick_line2)\n            iou.append(intersection.area / thick_line2.area)\n        \n        # If the max IOU is 0, add the new line to the merged_lines\n        if max(iou) == 0:\n            merged_lines.append(vec)\n        # If IOU is not 0, merge the new line with the line in the merged_lines\n        else:\n            merged_lines = iou_merge_divider(merged_lines, vec, thickness=thickness)\n\n           \n    return merged_lines\n\ndef merge_boundary(vecs=None, thickness=1, iou_threshold=0.95):\n    merged_lines = []\n    for vec in vecs:\n\n        # if the merged_lines is empty, add the first line\n        if len(merged_lines) == 0:\n            merged_lines.append(vec)\n            continue\n        \n        # thicken the vec (the new line) and the merged_lines calculate the max IOU between the new line and the merged_lines\n        iou = []\n        for one_merged_line in merged_lines:\n            line1 = LineString(one_merged_line)\n            line2 = LineString(vec)\n            thick_line1 = line1.buffer(thickness)\n            thick_line2 = line2.buffer(thickness)\n            intersection = thick_line1.intersection(thick_line2)\n            iou.append(intersection.area / thick_line2.area)\n        \n        # If the max IOU larger than the threshold, skip the new line\n        if max(iou) > iou_threshold:\n            continue\n        \n        # If IOU is not 0, merge the new line with the line in the merged_lines\n        if max(iou) > 0:\n            merged_lines = iou_merge_boundry(merged_lines, vec, thickness=thickness)\n        else:\n            merged_lines.append(vec)\n           \n    return merged_lines\n\ndef get_consecutive_vectors_with_opt(prev_vectors=None,prev2curr_matrix=None,origin=None,roi_size=None, denormalize=False, clip=False):\n    # transform prev vectors\n    prev2curr_vectors = dict()\n    for label, vecs in prev_vectors.items():\n        if len(vecs) > 0:\n            vecs = np.stack(vecs, 0)\n            vecs = torch.tensor(vecs)\n            N, num_points, _ = vecs.shape\n            if denormalize:\n                denormed_vecs = vecs * roi_size + origin # (num_prop, num_pts, 2)\n            else:\n                denormed_vecs = vecs\n            denormed_vecs = torch.cat([\n                denormed_vecs,\n                denormed_vecs.new_zeros((N, num_points, 1)), # z-axis\n                denormed_vecs.new_ones((N, num_points, 1)) # 4-th dim\n            ], dim=-1) # (num_prop, num_pts, 4)\n\n            transformed_vecs = torch.einsum('lk,ijk->ijl', prev2curr_matrix, denormed_vecs.double()).float()\n            normed_vecs = (transformed_vecs[..., :2] - origin) / roi_size # (num_prop, num_pts, 2)\n            if clip:\n                normed_vecs = torch.clip(normed_vecs, min=0., max=1.)\n            prev2curr_vectors[label] = normed_vecs\n        else:\n            prev2curr_vectors[label] = vecs\n\n    # convert to ego space for visualization\n    for label in prev2curr_vectors:\n        if len(prev2curr_vectors[label]) > 0:\n            prev2curr_vectors[label] = prev2curr_vectors[label] * roi_size + origin\n    return prev2curr_vectors\n\ndef get_prev2curr_vectors(vecs=None, prev2curr_matrix=None,origin=None,roi_size=None, denormalize=False, clip=False):\n    # transform prev vectors\n    if len(vecs) > 0:\n        vecs = np.stack(vecs, 0)\n        vecs = torch.tensor(vecs)\n        N, num_points, _ = vecs.shape\n        if denormalize:\n            denormed_vecs = vecs * roi_size + origin # (num_prop, num_pts, 2)\n        else:\n            denormed_vecs = vecs\n        denormed_vecs = torch.cat([\n            denormed_vecs,\n            denormed_vecs.new_zeros((N, num_points, 1)), # z-axis\n            denormed_vecs.new_ones((N, num_points, 1)) # 4-th dim\n        ], dim=-1) # (num_prop, num_pts, 4)\n\n        transformed_vecs = torch.einsum('lk,ijk->ijl', prev2curr_matrix, denormed_vecs.double()).float()\n        vecs = (transformed_vecs[..., :2] - origin) / roi_size # (num_prop, num_pts, 2)\n        if clip:\n            vecs = torch.clip(vecs, min=0., max=1.)\n        # vecs = vecs * roi_size + origin\n    \n    return vecs\n\ndef plot_fig_merged_per_frame(num_frames, car_trajectory, x_min, x_max, y_min, y_max, pred_save_folder, id_prev2curr_pred_vectors, id_prev2curr_pred_frame, args):\n    os.makedirs(pred_save_folder, exist_ok=True)\n  \n    # key the current status of the instance, add into the dict when it first appears\n    instance_bank = dict()\n\n    # trace the path reversely, get the sub-sampled traj for visualizing the car\n    pre_center = car_trajectory[-1][0]\n    selected_traj_timesteps = []\n    for timestep, (car_center, rotation_degrees) in enumerate(car_trajectory[::-1]):\n        if np.linalg.norm(car_center - pre_center) < 5 and timestep > 0 and timestep < len(car_trajectory)-1:\n            continue\n        selected_traj_timesteps.append(len(car_trajectory)-1-timestep)\n        pre_center = car_center\n    selected_traj_timesteps = selected_traj_timesteps[::-1]\n\n    image_list = [pred_save_folder + f'/{frame_timestep}.png' for frame_timestep in range(num_frames)]\n    #save_t(len(image_list), pred_save_folder) # save the timestep text mp4 file\n\n    # plot the figure at each frame\n    for frame_timestep in range(num_frames):\n        plt.figure(facecolor='lightgreen')\n        fig = plt.figure(figsize=(int(abs(x_min) + abs(x_max)) + 10 , int(abs(y_min) + abs(y_max)) + 10))\n        ax = fig.add_subplot(1, 1, 1)\n        ax.set_xlim(x_min, x_max)\n        ax.set_ylim(y_min, y_max)\n        \n        # setup the figure with car\n        car_img = Image.open('resources/car-orange.png')\n        faded_rate = np.linspace(0.2, 1, num=len(car_trajectory))\n        pre_center = car_trajectory[0][0]\n\n        for t in selected_traj_timesteps: # only plot the car at the selected timesteps\n            if t > frame_timestep: # if the car has not appeared at this frame\n                break\n            car_center, rotation_degrees = car_trajectory[t]\n            translation = transforms.Affine2D().translate(car_center[0], car_center[1])\n            rotation = transforms.Affine2D().rotate_deg(rotation_degrees)\n            rotation_translation = rotation + translation\n            ax.imshow(car_img, extent=[-2.2, 2.2, -2, 2], transform=rotation_translation+ ax.transData, alpha=faded_rate[t])\n        \n        for vec_tag, vec_all_frames in id_prev2curr_pred_vectors.items():\n            vec_frame_info = id_prev2curr_pred_frame[vec_tag] \n            first_appear_frame = sorted(list(vec_frame_info.keys()))[0]\n\n            need_merge = False\n            if frame_timestep < first_appear_frame : # the instance has not appeared\n                continue\n            elif frame_timestep in vec_frame_info:\n                need_merge = True\n                vec_index_in_instance = vec_frame_info[frame_timestep]\n\n            label, vec_glb_idx = vec_tag.split('_')\n            label = int(label)\n            vec_glb_idx = int(vec_glb_idx)\n\n            if need_merge:\n                curr_vec = vec_all_frames[vec_index_in_instance]\n                curr_vec_polyline = LineString(curr_vec)\n                if vec_tag not in instance_bank: # if the instance first appears\n                    polylines = [curr_vec_polyline,]\n                else: # if the instance has appeared before, polylines = previous merged polyline + current polyline\n                    polylines = instance_bank[vec_tag] + [curr_vec_polyline,]\n            else: # if the instance has not appeared in this frame\n                polylines = instance_bank[vec_tag]\n\n            if label == 0: # ped_crossing\n                color = 'b'\n            elif label == 1: # divider\n                color = 'r'\n            elif label == 2: # boundary\n                color = 'g'\n            \n            if label == 0: # crossing, merged by convex hull\n                if need_merge:\n                    polygon = merge_corssing(polylines)\n                    polygon = polygon.simplify(args.simplify)\n                    vector = np.array(polygon.exterior.coords) \n                else: # if no new instance, use the previous merged polyline to plot\n                    vector = np.array(polylines[0].coords) \n\n                pts = vector[:, :2]\n                x = np.array([pt[0] for pt in pts])\n                y = np.array([pt[1] for pt in pts])\n                ax.plot(x, y, '-', color=color, linewidth=20, markersize=50, alpha=args.line_opacity)\n                ax.plot(x, y, \"o\", color=color, markersize=50)\n\n                # update instance bank for ped\n                updated_polyline = LineString(vector)\n                instance_bank[vec_tag] = [updated_polyline, ]\n\n            elif label == 1: # divider, merged fitting a polyline\n                if need_merge:\n                    polylines_vecs = [np.array(one_line.coords) for one_line in polylines]\n                    polylines_vecs = merge_divider(polylines_vecs)\n                else:  # if no new instance, use the previous merged polyline to plot\n                    polylines_vecs = [np.array(line.coords) for line in polylines]\n\n                for one_line in polylines_vecs:\n                    one_line = np.array(LineString(one_line).simplify(args.simplify*2).coords)\n                    pts = one_line[:, :2]\n                    x = np.array([pt[0] for pt in pts])\n                    y = np.array([pt[1] for pt in pts])\n                    ax.plot(x, y, '-', color=color, linewidth=20, markersize=50, alpha=args.line_opacity)\n                    ax.plot(x, y, \"o\", color=color, markersize=50)\n\n                # update instance bank for line\n                updated_polylines = [LineString(vec) for vec in polylines_vecs]\n                instance_bank[vec_tag] = updated_polylines\n\n            elif label == 2: # boundary, do not merge\n                if need_merge:\n                    polylines_vecs = [np.array(one_line.coords) for one_line in polylines]\n                    polylines_vecs = merge_boundary(polylines_vecs)\n                else: # if no new instance, use the previous merged polyline to plot\n                    polylines_vecs = [np.array(line.coords) for line in polylines]\n\n                for one_line in polylines_vecs:\n                    one_line = np.array(LineString(one_line).simplify(args.simplify).coords)\n                    pts = one_line[:, :2]\n                    x = np.array([pt[0] for pt in pts])\n                    y = np.array([pt[1] for pt in pts])\n                    ax.plot(x, y, '-', color=color, linewidth=20, markersize=50, alpha=args.line_opacity)\n                    ax.plot(x, y, \"o\", color=color, markersize=50)\n\n                # update instance bank for line\n                updated_polylines = [LineString(vec) for vec in polylines_vecs]\n                instance_bank[vec_tag] = updated_polylines\n        \n        pred_save_path = pred_save_folder + f'/{frame_timestep}.png'\n        plt.grid(False)\n        plt.savefig(pred_save_path, bbox_inches='tight', transparent=args.transparent, dpi=args.dpi)\n        plt.clf() \n        plt.close(fig)\n        print(\"image saved to : \", pred_save_path)\n\n    image_list = [pred_save_folder + f'/{frame_timestep}.png' for frame_timestep in range(num_frames)]\n    gif_output_path = pred_save_folder + '/vis.gif'\n    save_as_video(image_list, gif_output_path)\n\n# merge the vectors across all frames and plot the merged vectors\ndef plot_fig_merged(car_trajectory, x_min, x_max, y_min, y_max, pred_save_path, id_prev2curr_pred_vectors, args):\n                \n    # setup the figure with car\n    fig = plt.figure(figsize=(int(abs(x_min) + abs(x_max)) + 10 , int(abs(y_min) + abs(y_max)) + 10))\n    ax = fig.add_subplot(1, 1, 1)\n    ax.set_xlim(x_min, x_max)\n    ax.set_ylim(y_min, y_max)\n    car_img = Image.open('resources/car-orange.png')\n    \n    faded_rate = np.linspace(0.2, 1, num=len(car_trajectory))\n\n    # trace the path reversely, get the sub-sampled traj for visualizing the car\n    pre_center = car_trajectory[-1][0]\n    selected_traj = []\n    selected_timesteps = []\n    for timestep, (car_center, rotation_degrees) in enumerate(car_trajectory[::-1]):\n        if np.linalg.norm(car_center - pre_center) < 5 and timestep > 0 and timestep < len(car_trajectory)-1:\n            continue\n        selected_traj.append([car_center, rotation_degrees])\n        selected_timesteps.append(len(car_trajectory)-1-timestep)\n        pre_center = car_center\n    selected_traj = selected_traj[::-1]\n    selected_timesteps = selected_timesteps[::-1]\n\n    for selected_t, (car_center, rotation_degrees) in zip(selected_timesteps, selected_traj):\n        translation = transforms.Affine2D().translate(car_center[0], car_center[1])\n        rotation = transforms.Affine2D().rotate_deg(rotation_degrees)\n        rotation_translation = rotation + translation\n        ax.imshow(car_img, extent=[-2.2, 2.2, -2, 2], transform=rotation_translation+ ax.transData, \n                alpha=faded_rate[selected_t])\n    \n    # merge the vectors across all frames\n    for tag, vecs in id_prev2curr_pred_vectors.items():\n        label, vec_glb_idx = tag.split('_')\n        label = int(label)\n        vec_glb_idx = int(vec_glb_idx)\n        \n\n        if label == 0: # ped_crossing\n            color = 'b'\n        elif label == 1: # divider\n            color = 'r'\n        elif label == 2: # boundary\n            color = 'g'\n    \n        # get the vectors belongs to the same instance\n        polylines = []\n        for vec in vecs:\n            polylines.append(LineString(vec))\n        if len(polylines) <= 0:\n            continue\n\n        if label == 0: # crossing, merged by convex hull\n            polygon = merge_corssing(polylines)\n            if polygon.area < 2:\n                continue\n            polygon = polygon.simplify(args.simplify)\n            vector = np.array(polygon.exterior.coords) \n            pts = vector[:, :2]\n            x = np.array([pt[0] for pt in pts])\n            y = np.array([pt[1] for pt in pts])\n            ax.plot(x, y, '-', color=color, linewidth=20, markersize=50, alpha=args.line_opacity)\n            ax.plot(x, y, \"o\", color=color, markersize=50)\n        elif label == 1: # divider, merged by interpolation\n            polylines_vecs = [np.array(one_line.coords) for one_line in polylines]\n            polylines_vecs = merge_divider(polylines_vecs)\n            for one_line in polylines_vecs:\n                one_line = np.array(LineString(one_line).simplify(args.simplify).coords)\n                pts = one_line[:, :2]\n                x = np.array([pt[0] for pt in pts])\n                y = np.array([pt[1] for pt in pts])\n                ax.plot(x, y, '-', color=color, linewidth=20, markersize=50, alpha=args.line_opacity)\n                ax.plot(x, y, \"o\", color=color, markersize=50)\n        elif label == 2: # boundary, merged by interpolation\n            polylines_vecs = [np.array(one_line.coords) for one_line in polylines]\n            polylines_vecs = merge_boundary(polylines_vecs)\n            for one_line in polylines_vecs:\n                one_line = np.array(LineString(one_line).simplify(args.simplify).coords)\n                pts = one_line[:, :2]\n                x = np.array([pt[0] for pt in pts])\n                y = np.array([pt[1] for pt in pts])\n                ax.plot(x, y, '-', color=color, linewidth=20, markersize=50, alpha=args.line_opacity)\n                ax.plot(x, y, \"o\", color=color, markersize=50)\n\n    plt.grid(False)\n    plt.savefig(pred_save_path, bbox_inches='tight', transparent=args.transparent, dpi=args.dpi)\n    plt.clf() \n    plt.close(fig)\n    print(\"image saved to : \", pred_save_path)\n\ndef plot_fig_unmerged_per_frame(num_frames, car_trajectory, x_min, x_max, y_min, y_max, pred_save_folder, id_prev2curr_pred_vectors, id_prev2curr_pred_frame, args):\n\n    os.makedirs(pred_save_folder, exist_ok=True)\n\n    # trace the path reversely, get the sub-sampled traj for visualizing the car\n    pre_center = car_trajectory[-1][0]\n    selected_traj_timesteps = []\n    for timestep, (car_center, rotation_degrees) in enumerate(car_trajectory[::-1]):\n        if np.linalg.norm(car_center - pre_center) < 5 and timestep > 0 and timestep < len(car_trajectory)-1:\n            continue\n        selected_traj_timesteps.append(len(car_trajectory)-1-timestep)\n        pre_center = car_center\n    selected_traj_timesteps = selected_traj_timesteps[::-1]\n\n    # setup the figure with car\n    fig = plt.figure(figsize=(int(abs(x_min) + abs(x_max)) + 10 , int(abs(y_min) + abs(y_max)) + 10))\n    ax = fig.add_subplot(1, 1, 1)\n    ax.set_xlim(x_min, x_max)\n    ax.set_ylim(y_min, y_max)\n    car_img = Image.open('resources/car-orange.png')\n\n\n    for frame_timestep in range(num_frames):\n\n        faded_rate = np.linspace(0.2, 1, num=len(car_trajectory))\n        if frame_timestep in selected_traj_timesteps:\n            car_center, rotation_degrees = car_trajectory[frame_timestep]\n            translation = transforms.Affine2D().translate(car_center[0], car_center[1])\n            rotation = transforms.Affine2D().rotate_deg(rotation_degrees)\n            rotation_translation = rotation + translation\n            ax.imshow(car_img, extent=[-2.2, 2.2, -2, 2], transform=rotation_translation+ ax.transData, alpha=faded_rate[frame_timestep])\n        \n        # plot the vectors\n        for vec_tag, vec_all_frames in id_prev2curr_pred_vectors.items():\n            vec_frame_info = id_prev2curr_pred_frame[vec_tag]\n            if frame_timestep not in vec_frame_info: # the instance has not appeared\n                continue\n            else:\n                vec_index_in_instance = vec_frame_info[frame_timestep]\n            \n            curr_vec = vec_all_frames[vec_index_in_instance]\n            label, vec_glb_idx = vec_tag.split('_')\n            label = int(label)\n            vec_glb_idx = int(vec_glb_idx)\n            \n            if label == 0: # ped_crossing\n                color = 'b'\n            elif label == 1: # divider\n                color = 'r'\n            elif label == 2: # boundary\n                color = 'g'\n            \n            polyline = LineString(curr_vec)\n            vector = np.array(polyline.coords)\n            pts = vector[:, :2]\n            x = np.array([pt[0] for pt in pts])\n            y = np.array([pt[1] for pt in pts])\n            ax.plot(x, y, 'o-', color=color, linewidth=20, markersize=50)\n\n        pred_save_path = pred_save_folder + f'/{frame_timestep}.png'\n        plt.savefig(pred_save_path, bbox_inches='tight', transparent=args.transparent, dpi=args.dpi)\n        print(\"image saved to : \", pred_save_path)\n\n    plt.grid(False)\n    plt.clf() \n    plt.close(fig)\n    image_list = [pred_save_folder + f'/{frame_timestep}.png' for frame_timestep in range(num_frames)]\n    gif_output_path = pred_save_folder + '/vis.gif'\n    save_as_video(image_list, gif_output_path)\n\n\ndef plot_fig_unmerged(car_trajectory, x_min, x_max, y_min, y_max, pred_save_path, id_prev2curr_pred_vectors, args):\n                \n    # setup the figure with car\n    fig = plt.figure(figsize=(int(abs(x_min) + abs(x_max)) + 10 , int(abs(y_min) + abs(y_max)) + 10))\n    ax = fig.add_subplot(1, 1, 1)\n    ax.set_xlim(x_min, x_max)\n    ax.set_ylim(y_min, y_max)\n    car_img = Image.open('resources/car-orange.png')\n    \n    # trace the path reversely, get the sub-sampled traj for visualizing the car \n    pre_center = car_trajectory[-1][0]\n    selected_traj = []\n    selected_timesteps = []\n    for timestep, (car_center, rotation_degrees) in enumerate(car_trajectory[::-1]):\n        if np.linalg.norm(car_center - pre_center) < 5 and timestep > 0 and timestep < len(car_trajectory)-1:\n            continue\n        selected_traj.append([car_center, rotation_degrees])\n        selected_timesteps.append(len(car_trajectory)-1-timestep)\n        pre_center = car_center\n    selected_traj = selected_traj[::-1]\n    selected_timesteps = selected_timesteps[::-1]\n\n    # plot the car trajectory with faded_rate \n    faded_rate = np.linspace(0.2, 1, num=len(car_trajectory))\n    for selected_t, (car_center, rotation_degrees) in zip(selected_timesteps, selected_traj):\n        translation = transforms.Affine2D().translate(car_center[0], car_center[1])\n        rotation = transforms.Affine2D().rotate_deg(rotation_degrees)\n        rotation_translation = rotation + translation\n        ax.imshow(car_img, extent=[-2.2, 2.2, -2, 2], transform=rotation_translation+ ax.transData, \n                alpha=faded_rate[selected_t])\n    \n    # plot the unmerged vectors (all the predicted/ gt vectors)\n    for tag, vecs in id_prev2curr_pred_vectors.items():\n        label, vec_glb_idx = tag.split('_')\n        label = int(label)\n        vec_glb_idx = int(vec_glb_idx)\n\n        if label == 0: # ped_crossing\n            color = 'b'\n        elif label == 1: # divider\n            color = 'r'\n        elif label == 2: # boundary\n            color = 'g'\n        \n        polylines = []\n        for vec in vecs:\n            polylines.append(LineString(vec))\n            \n        if len(polylines) <= 0:\n            continue\n\n        for one_line in polylines:\n            vector = np.array(one_line.coords)\n            pts = vector[:, :2]\n            x = np.array([pt[0] for pt in pts])\n            y = np.array([pt[1] for pt in pts])\n            ax.plot(x, y, 'o-', color=color, linewidth=20, markersize=50)\n        \n\n    plt.savefig(pred_save_path, bbox_inches='tight', transparent=args.transparent, dpi=args.dpi)\n    plt.clf()  \n    plt.close(fig)\n    print(\"image saved to : \", pred_save_path)\n\n# the timestep text visualization\ndef save_t(t_max, main_save_folder):\n    txt_save_folder = os.path.join(main_save_folder, 'txt')\n    os.makedirs(txt_save_folder, exist_ok=True)\n    t = range(t_max)\n\n    for i in t:\n        fig, ax = plt.subplots(figsize=(2, 1), dpi=300)  # Increase DPI for higher resolution\n        ax.text(0.1, 0.5, f't = {i}', fontsize=40,ha='left', va='center')\n        ax.axis('off')\n        ax.set_xlim(0, 1)\n        ax.set_ylim(0, 1)\n        fig.subplots_adjust(left=0, right=1, top=1, bottom=0)  # Remove margins around the text\n        plt.savefig(f'{txt_save_folder}/text_{i}.png',pad_inches=0)\n        plt.close(fig)\n\n    text_images = [f'{txt_save_folder}/text_{i}.png' for i in t]\n    frames = [imageio.imread(img_path) for img_path in text_images]\n    mp4_output_path = os.path.join(main_save_folder, 'text.mp4')\n    imageio.mimsave(mp4_output_path, frames, fps=10)  # fps controls the speed of the video\n    print(\"mp4 saved to : \", mp4_output_path)\n\ndef save_as_video(image_list, mp4_output_path, scale=None):\n    mp4_output_path = mp4_output_path.replace('.gif','.mp4')\n    images = [Image.fromarray(imageio.imread(img_path)).convert(\"RGBA\") for img_path in image_list]\n\n    if scale is not None:\n        w, h = images[0].size\n        images = [img.resize((int(w*scale), int(h*scale)), Image.Resampling.LANCZOS) for img in images]\n    # images = [Image.new('RGBA', images[0].size, (255, 255, 255, 255))] + images\n    \n    try:\n        imageio.mimsave(mp4_output_path, images,  format='MP4',fps=10)\n    except ValueError: # in case the shapes are not the same, have to manually adjust\n        resized_images = [img.resize(images[0].size, Image.Resampling.LANCZOS) for img in images]\n        print('Size not all the same, manually adjust...')\n        imageio.mimsave(mp4_output_path, resized_images,  format='MP4',fps=10)\n    print(\"mp4 saved to : \", mp4_output_path)\n\n\ndef vis_pred_data(scene_name=\"\", pred_results=None, origin=None, roi_size=None, args=None):\n    \n\n    # get the item index of the scene\n    index_list = []\n    for index in range(len(pred_results)):\n        if pred_results[index][\"scene_name\"] == scene_name:\n            index_list.append(index)\n    \n    car_trajectory = []\n    id_prev2curr_pred_vectors = defaultdict(list)\n    id_prev2curr_pred_frame_info = defaultdict(list)\n    id_prev2curr_pred_frame = defaultdict(list)\n\n    # iterate through each frame\n    last_index = index_list[-1]\n    for index in index_list:\n        \n        vectors = np.array(pred_results[index][\"vectors\"]).reshape((len(np.array(pred_results[index][\"vectors\"])), 20, 2))\n        if abs(vectors.max()) <= 1:\n            curr_vectors = vectors * roi_size + origin\n        else:\n            curr_vectors = vectors\n            \n        # get the transformation matrix of the last frame\n        prev_e2g_trans =  torch.tensor(pred_results[index]['meta']['ego2global_translation'], dtype=torch.float64)\n        prev_e2g_rot = torch.tensor(pred_results[index]['meta']['ego2global_rotation'], dtype=torch.float64)\n        curr_e2g_trans  = torch.tensor(pred_results[last_index]['meta']['ego2global_translation'], dtype=torch.float64)\n        curr_e2g_rot = torch.tensor(pred_results[last_index]['meta']['ego2global_rotation'], dtype=torch.float64)\n        prev_e2g_matrix = torch.eye(4, dtype=torch.float64)\n        prev_e2g_matrix[:3, :3] = prev_e2g_rot\n        prev_e2g_matrix[:3, 3] = prev_e2g_trans\n\n        curr_g2e_matrix = torch.eye(4, dtype=torch.float64)\n        curr_g2e_matrix[:3, :3] = curr_e2g_rot.T\n        curr_g2e_matrix[:3, 3] = -(curr_e2g_rot.T @ curr_e2g_trans)\n        \n        prev2curr_matrix = curr_g2e_matrix @ prev_e2g_matrix\n        prev2curr_pred_vectors = get_prev2curr_vectors(curr_vectors, prev2curr_matrix,origin,roi_size,False,False)\n        prev2curr_pred_vectors = prev2curr_pred_vectors * roi_size + origin\n        \n        rotation_degrees = np.degrees(np.arctan2(prev2curr_matrix[:3, :3][1, 0], prev2curr_matrix[:3, :3][0, 0]))\n        car_center = get_prev2curr_vectors(np.array((0,0)).reshape(1,1,2), prev2curr_matrix,origin,roi_size,False,False)* roi_size + origin\n        car_trajectory.append([car_center.squeeze(), rotation_degrees])\n        \n        for i, (label, vec_glb_idx) in enumerate(zip(pred_results[index]['labels'], pred_results[index]['global_ids'])):\n            dict_key = \"{}_{}\".format(label, vec_glb_idx)\n            id_prev2curr_pred_vectors[dict_key].append(prev2curr_pred_vectors[i])\n            id_prev2curr_pred_frame_info[dict_key].append([pred_results[index][\"local_idx\"], len(id_prev2curr_pred_frame[dict_key])])\n\n        for key, frame_info in id_prev2curr_pred_frame_info.items():\n            frame_localIdx = dict()\n            for frame_time, local_index in frame_info:\n                frame_localIdx[frame_time] = local_index\n            id_prev2curr_pred_frame[key] = frame_localIdx\n        \n    \n    # sort the id_prev2curr_pred_vectors\n    id_prev2curr_pred_vectors = {key: id_prev2curr_pred_vectors[key] for key in sorted(id_prev2curr_pred_vectors)}\n\n    \n    # set the size of the image\n    x_min = -roi_size[0] / 2\n    x_max = roi_size[0] / 2\n    y_min = -roi_size[1] / 2\n    y_max = roi_size[1] / 2\n\n    all_points = []\n    for vecs in id_prev2curr_pred_vectors.values():\n        points = np.concatenate(vecs, axis=0)\n        all_points.append(points)\n    all_points = np.concatenate(all_points, axis=0)\n\n    x_min = min(x_min, all_points[:,0].min())\n    x_max = max(x_max, all_points[:,0].max())\n    y_min = min(y_min, all_points[:,1].min())\n    y_max = max(y_max, all_points[:,1].max())\n\n    scene_dir = os.path.join(args.out_dir, scene_name)\n    os.makedirs(scene_dir,exist_ok=True)\n    \n    if args.per_frame_result:\n        num_frames = len(index_list)\n        pred_save_folder = os.path.join(scene_dir, f'pred_merged_per_frame')\n        plot_fig_merged_per_frame(num_frames, car_trajectory, x_min, x_max, y_min, y_max, pred_save_folder, id_prev2curr_pred_vectors, id_prev2curr_pred_frame, args)\n        pred_save_folder = os.path.join(scene_dir, f'pred_unmerged_per_frame')\n        plot_fig_unmerged_per_frame(num_frames, car_trajectory, x_min, x_max, y_min, y_max, pred_save_folder, id_prev2curr_pred_vectors, id_prev2curr_pred_frame, args)\n    pred_save_path = os.path.join(scene_dir, f'pred_unmerged.png')\n    plot_fig_unmerged(car_trajectory, x_min, x_max, y_min, y_max, pred_save_path, id_prev2curr_pred_vectors, args)\n    pred_save_path = os.path.join(scene_dir, f'pred_merged.png')\n    plot_fig_merged(car_trajectory, x_min, x_max, y_min, y_max, pred_save_path, id_prev2curr_pred_vectors, args)\n    comb_save_path = os.path.join(scene_dir, f'pred_comb.png')\n    image_paths = [os.path.join(scene_dir, f'pred_merged.png'), os.path.join(scene_dir, f'pred_unmerged.png')]\n    labels = ['Merged', 'Unmerged']\n    combine_images_with_labels(image_paths, labels, comb_save_path)\n    print(\"image saved to : \", comb_save_path)\n\ndef vis_gt_data(scene_name, args, dataset, gt_data, origin, roi_size):\n\n    gt_info = gt_data[scene_name]\n    gt_info_list = []\n    ids_info = []\n\n    # get the item index of the sample\n    for index, one_idx in enumerate(gt_info[\"sample_ids\"]):\n        gt_info_list.append(dataset[one_idx])\n        ids_info.append(gt_info[\"instance_ids\"][index])\n\n    car_trajectory = []\n    scene_dir = os.path.join(args.out_dir,scene_name)\n    os.makedirs(scene_dir,exist_ok=True)\n\n    # key : label, vec_glb_idx ; value : list of vectors in the last frame's coordinate\n    id_prev2curr_pred_vectors = defaultdict(list)\n    # dict to store some information of the vectors\n    id_prev2curr_pred_frame_info = defaultdict(list) \n    # key : label, vec_glb_idx ; value : {frame_time : idx of the vector; idx range from 0 to the number of vectors of the same instance }\n    id_prev2curr_pred_frame = defaultdict(dict)\n\n    scene_len = len(gt_info_list)\n    for idx in range(scene_len):\n        curr_vectors = dict()\n        # denormalize the vectors\n        for label, vecs in gt_info_list[idx]['vectors'].data.items():\n            if len(vecs) > 0: # if vecs != []\n                curr_vectors[label] = vecs * roi_size + origin\n            else:\n                curr_vectors[label] = vecs\n        \n        # get the transformation matrix of the last frame\n        prev_e2g_trans = torch.tensor(gt_info_list[idx]['img_metas'].data['ego2global_translation'], dtype=torch.float64)\n        prev_e2g_rot = torch.tensor(gt_info_list[idx]['img_metas'].data['ego2global_rotation'], dtype=torch.float64)\n        curr_e2g_trans  = torch.tensor(gt_info_list[scene_len-1]['img_metas'].data['ego2global_translation'], dtype=torch.float64)\n        curr_e2g_rot = torch.tensor(gt_info_list[scene_len-1]['img_metas'].data['ego2global_rotation'], dtype=torch.float64)\n        prev_e2g_matrix = torch.eye(4, dtype=torch.float64)\n        prev_e2g_matrix[:3, :3] = prev_e2g_rot\n        prev_e2g_matrix[:3, 3] = prev_e2g_trans\n\n        curr_g2e_matrix = torch.eye(4, dtype=torch.float64)\n        curr_g2e_matrix[:3, :3] = curr_e2g_rot.T\n        curr_g2e_matrix[:3, 3] = -(curr_e2g_rot.T @ curr_e2g_trans)\n        \n        # get the transformed vectors from current frame to the last frame\n        prev2curr_matrix = curr_g2e_matrix @ prev_e2g_matrix\n        prev2curr_pred_vectors = get_consecutive_vectors_with_opt(curr_vectors,prev2curr_matrix,origin,roi_size,False,False)\n        for label, id_info in ids_info[idx].items():\n            for vec_local_idx, vec_glb_idx in id_info.items():\n                dict_key = \"{}_{}\".format(label, vec_glb_idx)\n                id_prev2curr_pred_vectors[dict_key].append(prev2curr_pred_vectors[label][vec_local_idx])\n                # gt_info_list[idx][\"seq_info\"].data[1] stores the frame time that the vector appears\n                id_prev2curr_pred_frame_info[dict_key].append([gt_info_list[idx][\"seq_info\"].data[1], len(id_prev2curr_pred_frame[dict_key])]) # set len(id_prev2curr_pred_frame[dict_key]) to be the index of the vector belongs to the same instance\n        for key, frame_info in id_prev2curr_pred_frame_info.items():\n            frame_localIdx = dict()\n            for frame_time, local_index in frame_info:\n                frame_localIdx[frame_time] = local_index\n            id_prev2curr_pred_frame[key] = frame_localIdx\n        \n        rotation_degrees = np.degrees(np.arctan2(prev2curr_matrix[:3, :3][1, 0], prev2curr_matrix[:3, :3][0, 0]))\n        # get the center of the car in the last frame's coordinate\n        car_center = get_prev2curr_vectors(np.array((0,0)).reshape(1,1,2), prev2curr_matrix,origin,roi_size,False,False)* roi_size + origin\n        car_trajectory.append([car_center.squeeze(), rotation_degrees])\n\n    # sort the id_prev2curr_pred_vectors by label and vec_glb_idx\n    id_prev2curr_pred_vectors = {key: id_prev2curr_pred_vectors[key] for key in sorted(id_prev2curr_pred_vectors)}\n\n    # get the x_min, x_max, y_min, y_max for the figure size\n    x_min = -roi_size[0] / 2\n    x_max = roi_size[0] / 2\n    y_min = -roi_size[1] / 2\n    y_max = roi_size[1] / 2\n\n    all_points = []\n    for vecs in id_prev2curr_pred_vectors.values():\n        points = np.concatenate(vecs, axis=0)\n        all_points.append(points)\n    all_points = np.concatenate(all_points, axis=0)\n\n    x_min = min(x_min, all_points[:,0].min())\n    x_max = max(x_max, all_points[:,0].max())\n    y_min = min(y_min, all_points[:,1].min())\n    y_max = max(y_max, all_points[:,1].max())\n\n    scene_dir = os.path.join(args.out_dir,scene_name)\n    os.makedirs(scene_dir,exist_ok=True)\n\n    # if visulize the per frame result\n    if args.per_frame_result:\n        pred_save_folder = os.path.join(scene_dir, f'gt_merged_per_frame')\n        plot_fig_merged_per_frame(len(gt_info_list), car_trajectory, x_min, x_max, y_min, y_max, pred_save_folder, id_prev2curr_pred_vectors, id_prev2curr_pred_frame, args)\n        pred_save_folder = os.path.join(scene_dir, f'gt_unmerged_per_frame')\n        plot_fig_unmerged_per_frame(len(gt_info_list), car_trajectory, x_min, x_max, y_min, y_max, pred_save_folder, id_prev2curr_pred_vectors, id_prev2curr_pred_frame, args)\n    # plot result for across all frames\n    pred_save_path = os.path.join(scene_dir, f'gt_unmerged.png')\n    plot_fig_unmerged(car_trajectory, x_min, x_max, y_min, y_max, pred_save_path, id_prev2curr_pred_vectors, args)\n    pred_save_path = os.path.join(scene_dir, f'gt_merged.png')\n    plot_fig_merged(car_trajectory, x_min, x_max, y_min, y_max, pred_save_path, id_prev2curr_pred_vectors, args)\n\n    # combine the merged and unmerged images into one plot for comparison\n    comb_save_path = os.path.join(scene_dir, f'gt_comb.png')\n    image_paths = [os.path.join(scene_dir, f'gt_merged.png'), os.path.join(scene_dir, f'gt_unmerged.png')]\n    labels = ['Merged', 'Unmerged']\n    combine_images_with_labels(image_paths, labels, comb_save_path)\n    print(\"image saved to : \", comb_save_path)\n\n\ndef main():\n    args = parse_args()\n    cfg = Config.fromfile(args.config)\n    import_plugin(cfg)\n    dataset = build_dataset(cfg.match_config)\n\n    scene_name2idx = {}\n    scene_name2token = {}\n    \n    for idx, sample in enumerate(dataset.samples):\n        scene = sample['scene_name']\n        if scene not in scene_name2idx:\n            scene_name2idx[scene] = []\n            scene_name2token[scene] = []\n        scene_name2idx[scene].append(idx)\n\n    # load the GT data\n    if args.option == \"vis-gt\": \n        data = mmcv.load(args.data_path)\n    # load the prediction data\n    elif args.option == \"vis-pred\":\n        with open(args.data_path,'rb') as fp:\n            data = pickle.load(fp)\n\n    all_scene_names = sorted(list(scene_name2idx.keys()))\n\n    roi_size = torch.tensor(cfg.roi_size).numpy()\n    origin = torch.tensor(cfg.pc_range[:2]).numpy()\n\n    for scene_name in all_scene_names:\n        if args.scene_id is not None and scene_name not in args.scene_id:\n            continue\n        scene_dir = os.path.join(args.out_dir,scene_name)\n        if os.path.exists(scene_dir) and len(os.listdir(scene_dir)) > 0 and not args.overwrite:\n            print(f\"Scene {scene_name} already generated, skipping...\")\n            continue\n        os.makedirs(scene_dir,exist_ok=True)\n\n        if args.option == \"vis-gt\":\n            # visualize the GT data\n            vis_gt_data(scene_name=scene_name, args=args, dataset=dataset, gt_data=data, origin=origin, roi_size=roi_size)\n        elif args.option == \"vis-pred\":\n            # visualize the prediction results\n            vis_pred_data(scene_name=scene_name, pred_results=data, origin=origin, roi_size=roi_size, args=args)\n        else:\n            raise ValueError('Invalid visualization option {}'.format(args.option))\n\n\nif __name__ == '__main__':\n    main() "
  },
  {
    "path": "tools/visualization/vis_per_frame.py",
    "content": "import sys\nimport os\nSCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))\nsys.path.append(os.path.dirname(SCRIPT_DIR))\n\nimport argparse     \nimport mmcv\nfrom mmcv import Config\nimport os\nfrom mmdet3d.datasets import build_dataset\nimport torch\nimport numpy as np\nfrom PIL import Image\nimport pickle\nfrom collections import defaultdict\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom PIL import Image\nimport imageio\nfrom tracking.cmap_utils.match_utils import *\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Visualize groundtruth and results')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument(\n        '--out_dir', \n        required=True,\n        default='demo',\n        help='directory where visualize results will be saved')\n    parser.add_argument(\n        '--data_path',\n        required=True,\n        default=\"\",\n        help='directory to submission file')\n    parser.add_argument(\n        '--scene_id',\n        type=str, \n        nargs='+',\n        default=None,\n        help='scene_id to visulize')\n    parser.add_argument(\n        '--option',\n        default=\"vis-gt\",\n        help='vis-gt or vis-pred')\n    parser.add_argument(\n        '--line_opacity',\n        default=0.75,\n        type=float,\n        help='Line simplification tolerance'\n    )\n    parser.add_argument(\n        '--overwrite',\n        default=1,\n        type=int,\n        help='whether to overwrite the existing images'\n    )\n    parser.add_argument(\n        '--dpi',\n        default=20,\n        type=int,\n        help='whether to merge boundary lines'\n    )\n    \n    args = parser.parse_args()\n\n    return args\n\ndef save_as_video(image_list, mp4_output_path, scale=None):\n    mp4_output_path = mp4_output_path.replace('.gif','.mp4')\n    images = [Image.fromarray(img).convert(\"RGBA\") for img in image_list]\n    if scale is not None:\n        w, h = images[0].size\n        images = [img.resize((int(w*scale), int(h*scale)), Image.Resampling.LANCZOS) for img in images]\n    images = [Image.new('RGBA', images[0].size, (255, 255, 255, 255))] + images\n    try:\n        imageio.mimsave(mp4_output_path, images,  format='MP4',fps=10)\n    except ValueError: # in case the shapes are not the same, have to manually adjust\n        resized_images = [img.resize(images[0].size, Image.Resampling.LANCZOS) for img in images]\n        print('Size not all the same, manually adjust...')\n        imageio.mimsave(mp4_output_path, resized_images,  format='MP4',fps=10)\n    print(\"mp4 saved to : \", mp4_output_path)\n\ndef plot_one_frame_results(vectors, id_info, roi_size, scene_dir, args):                \n    # setup the figure with car\n    plt.figure(figsize=(roi_size[0], roi_size[1]))\n    plt.xlim(-roi_size[0] / 2, roi_size[0] / 2)\n    plt.ylim(-roi_size[1] / 2, roi_size[1] / 2)\n    plt.axis('off')\n    plt.autoscale(False)\n    car_img = Image.open('resources/car-orange.png')\n    plt.imshow(car_img, extent=[-2.2, 2.2, -2, 2])\n    \n    for label, vecs in vectors.items():\n        if label == 0: # ped_crossing\n            color = 'b'\n            label_text = 'P'\n        elif label == 1: # divider\n            color = 'r'\n            label_text = 'D'\n        elif label == 2: # boundary\n            color = 'g'\n            label_text = 'B'\n        \n        if len(vecs) == 0:\n            continue\n\n        for vec_idx, vec in enumerate(vecs):\n            pts = vec[:, :2]\n            x = np.array([pt[0] for pt in pts])\n            y = np.array([pt[1] for pt in pts])\n            plt.plot(x, y, 'o-', color=color, linewidth=25, markersize=20, alpha=args.line_opacity)\n            vec_id = id_info[label][vec_idx]\n            mid_idx = len(x) // 2\n\n            # Put instance id, prevent the text from changing fig size...\n            if -roi_size[1]/2 <= y[mid_idx] < -roi_size[1]/2 + 2:\n                text_y = y[mid_idx] + 2\n            elif roi_size[1]/2 - 2 < y[mid_idx] <= roi_size[1]/2:\n                text_y = y[mid_idx] - 2\n            else:\n                text_y = y[mid_idx]\n            \n            if -roi_size[0]/2 <= x[mid_idx] < -roi_size[0]/2 + 4:\n                text_x = x[mid_idx] + 4\n            elif roi_size[0]/2 - 4 < x[mid_idx] <= roi_size[0]/2:\n                text_x = x[mid_idx] - 4\n            else:\n                text_x = x[mid_idx]\n\n            plt.text(text_x, text_y, f'{label_text}{vec_id}', fontsize=80, color=color)\n        \n    save_path = os.path.join(scene_dir, 'temp.png')\n    plt.savefig(save_path, bbox_inches='tight', transparent=False, dpi=args.dpi)\n    plt.clf()  \n    plt.close()\n\n    viz_image = imageio.imread(save_path)\n    return viz_image\n    \ndef vis_pred_data(scene_name, args, pred_results, origin,roi_size):\n    \n    # get the item index of the scene\n    scene_idx = defaultdict(list)\n    \n    for index in range(len(pred_results)):\n        scene_idx[pred_results[index][\"scene_name\"]].append(index)\n        \n    index_list = scene_idx[scene_name]\n    \n    scene_dir = os.path.join(args.out_dir,scene_name)\n    os.makedirs(scene_dir,exist_ok=True)\n\n    g2l_id_mapping = dict()\n    label_ins_counter = {0:0, 1:0, 2:0}\n\n    all_viz_images = []\n\n    # iterate through each frame of the pred sequence\n    for index in index_list:\n        vectors = np.array(pred_results[index][\"vectors\"]).reshape((len(np.array(pred_results[index][\"vectors\"])), 20, 2))\n        # some results are normalized, some not...\n        if np.abs(vectors).max() <= 1: \n            vectors = vectors * roi_size + origin\n        labels = np.array(pred_results[index][\"labels\"])\n        global_ids = np.array(pred_results[index][\"global_ids\"])\n\n        per_label_results = defaultdict(list) \n\n        for ins_idx in range(len(vectors)):\n            label = int(labels[ins_idx])\n            global_id = int(global_ids[ins_idx])\n            if global_id not in g2l_id_mapping:\n                local_idx = label_ins_counter[label]\n                g2l_id_mapping[global_id] = (label, local_idx)\n                label_ins_counter[label] += 1\n            else:\n                if label == g2l_id_mapping[global_id][0]:\n                    local_idx = g2l_id_mapping[global_id][1]\n                else: \n                    # label changes for a tracked instance (can happen in our method)\n                    # need to update the global id info\n                    local_idx = label_ins_counter[label]\n                    g2l_id_mapping[global_id] = (label, local_idx)\n                    label_ins_counter[label] += 1\n\n            per_label_results[label].append([vectors[ins_idx], global_id, local_idx])\n\n        curr_vectors = defaultdict(list) \n        id_info = dict()\n        for label, results in per_label_results.items():\n            vec_results = [item[0] for item in results]\n            global_ids = [item[1] for item in results]\n            local_ids = [item[2] for item in results]\n\n            curr_vectors[label] = np.stack(vec_results, axis=0)\n            id_info[label] = {idx:ins_id for idx, ins_id in enumerate(local_ids)}\n        \n        viz_image = plot_one_frame_results(curr_vectors, id_info, roi_size, scene_dir, args)\n        all_viz_images.append(viz_image)\n        \n    gif_path = os.path.join(scene_dir, 'per_frame_pred.gif')\n    save_as_video(all_viz_images, gif_path)\n        \ndef vis_gt_data(scene_name, args, dataset, scene_name2idx, gt_data, origin, roi_size):\n    gt_info = gt_data[scene_name]\n    gt_info_list = []\n    ids_info = []\n\n    scene_dir = os.path.join(args.out_dir,scene_name)\n    os.makedirs(scene_dir,exist_ok=True)\n\n    for index, one_idx in enumerate(gt_info[\"sample_ids\"]):\n        gt_info_list.append(dataset[one_idx])\n        ids_info.append(gt_info[\"instance_ids\"][index])\n    scene_len = len(gt_info_list)\n\n    all_viz_images = []\n    all_cam_images = {cam_name: [] for cam_name in dataset.samples[0]['cams'].keys()}\n\n    for frame_idx in range(scene_len):\n        global_idx = scene_name2idx[scene_name][frame_idx]\n        # collect images for each camera\n        cams = dataset.samples[global_idx]['cams']\n        for cam, info in cams.items():\n            img = imageio.imread(info['img_fpath'])\n            all_cam_images[cam].append(img)\n        # collect vectors for each frame\n        curr_vectors = dict()\n        for label, vecs in gt_info_list[frame_idx]['vectors'].data.items():\n            if len(vecs) > 0:\n                curr_vectors[label] = vecs * roi_size + origin\n            else:\n                curr_vectors[label] = vecs\n        \n        id_info = ids_info[frame_idx]\n\n        viz_image = plot_one_frame_results(curr_vectors, id_info, roi_size, scene_dir, args)\n        all_viz_images.append(viz_image)\n    \n    gif_path = os.path.join(scene_dir, 'per_frame_gt.gif')\n    save_as_video(all_viz_images, gif_path)\n    \n    for cam_name, image_list in all_cam_images.items():\n        gif_path = os.path.join(scene_dir, f'{cam_name}.gif')\n        save_as_video(image_list, gif_path, scale=0.3)\n    \ndef main():\n    args = parse_args()\n    cfg = Config.fromfile(args.config)\n    import_plugin(cfg)\n    dataset = build_dataset(cfg.match_config)\n\n    scene_name2idx = {}\n    scene_name2token = {}\n    for idx, sample in enumerate(dataset.samples):\n        scene = sample['scene_name']\n        if scene not in scene_name2idx:\n            scene_name2idx[scene] = []\n            scene_name2token[scene] = []\n        scene_name2idx[scene].append(idx)\n\n    if args.data_path == \"\":\n        data = {}\n    elif args.option == \"vis-gt\": # visulize GT option\n        data = mmcv.load(args.data_path)\n    elif args.option == \"vis-pred\":\n        with open(args.data_path,'rb') as fp:\n            data = pickle.load(fp)\n\n    all_scene_names = sorted(list(scene_name2idx.keys()))\n    scene_info_list = []\n    for single_scene_name in all_scene_names:\n        scene_info_list.append((single_scene_name, args))\n\n    roi_size = torch.tensor(cfg.roi_size).numpy()\n    origin = torch.tensor(cfg.pc_range[:2]).numpy()\n    \n    for scene_name in all_scene_names:\n\n        if args.scene_id is not None and scene_name not in args.scene_id:\n            continue\n        scene_dir = os.path.join(args.out_dir,scene_name)\n        if os.path.exists(scene_dir) and len(os.listdir(scene_dir)) > 0 and not args.overwrite:\n            print(f\"Scene {scene_name} already generated, skipping...\")\n            continue\n        os.makedirs(scene_dir,exist_ok=True)\n        if args.option == \"vis-gt\":\n            vis_gt_data(scene_name=scene_name, args=args, dataset=dataset, \n                scene_name2idx=scene_name2idx, gt_data=data,origin=origin,roi_size=roi_size)\n        elif args.option == \"vis-pred\":\n            vis_pred_data(scene_name=scene_name, args=args, pred_results=data, origin=origin, roi_size=roi_size)\n\nif __name__ == '__main__':\n    main()"
  }
]