Repository: WongKinYiu/yolov9 Branch: main Commit: 5b1ea9a8b3f0 Files: 116 Total size: 1.2 MB Directory structure: gitextract_ys0vmsdq/ ├── LICENSE.md ├── README.md ├── benchmarks.py ├── classify/ │ ├── predict.py │ ├── train.py │ └── val.py ├── data/ │ ├── coco.yaml │ └── hyps/ │ └── hyp.scratch-high.yaml ├── detect.py ├── detect_dual.py ├── export.py ├── hubconf.py ├── models/ │ ├── __init__.py │ ├── common.py │ ├── detect/ │ │ ├── gelan-c.yaml │ │ ├── gelan-e.yaml │ │ ├── gelan-m.yaml │ │ ├── gelan-s.yaml │ │ ├── gelan-t.yaml │ │ ├── gelan.yaml │ │ ├── yolov7-af.yaml │ │ ├── yolov9-c.yaml │ │ ├── yolov9-cf.yaml │ │ ├── yolov9-e.yaml │ │ ├── yolov9-m.yaml │ │ ├── yolov9-s.yaml │ │ ├── yolov9-t.yaml │ │ └── yolov9.yaml │ ├── experimental.py │ ├── hub/ │ │ ├── anchors.yaml │ │ ├── yolov3-spp.yaml │ │ ├── yolov3-tiny.yaml │ │ └── yolov3.yaml │ ├── panoptic/ │ │ ├── gelan-c-pan.yaml │ │ └── yolov7-af-pan.yaml │ ├── segment/ │ │ ├── gelan-c-dseg.yaml │ │ ├── gelan-c-seg.yaml │ │ ├── yolov7-af-seg.yaml │ │ └── yolov9-c-dseg.yaml │ ├── tf.py │ └── yolo.py ├── panoptic/ │ ├── predict.py │ ├── train.py │ └── val.py ├── requirements.txt ├── scripts/ │ └── get_coco.sh ├── segment/ │ ├── predict.py │ ├── train.py │ ├── train_dual.py │ ├── val.py │ └── val_dual.py ├── tools/ │ └── reparameterization.ipynb ├── train.py ├── train_dual.py ├── train_triple.py ├── utils/ │ ├── __init__.py │ ├── activations.py │ ├── augmentations.py │ ├── autoanchor.py │ ├── autobatch.py │ ├── callbacks.py │ ├── coco_utils.py │ ├── dataloaders.py │ ├── downloads.py │ ├── general.py │ ├── lion.py │ ├── loggers/ │ │ ├── __init__.py │ │ ├── clearml/ │ │ │ ├── __init__.py │ │ │ ├── clearml_utils.py │ │ │ └── hpo.py │ │ ├── comet/ │ │ │ ├── __init__.py │ │ │ ├── comet_utils.py │ │ │ ├── hpo.py │ │ │ └── optimizer_config.json │ │ └── wandb/ │ │ ├── __init__.py │ │ ├── log_dataset.py │ │ ├── sweep.py │ │ ├── sweep.yaml │ │ └── wandb_utils.py │ ├── loss.py │ ├── loss_tal.py │ ├── loss_tal_dual.py │ ├── loss_tal_triple.py │ ├── metrics.py │ ├── panoptic/ │ │ ├── __init__.py │ │ ├── augmentations.py │ │ ├── dataloaders.py │ │ ├── general.py │ │ ├── loss.py │ │ ├── loss_tal.py │ │ ├── metrics.py │ │ ├── plots.py │ │ └── tal/ │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ └── assigner.py │ ├── plots.py │ ├── segment/ │ │ ├── __init__.py │ │ ├── augmentations.py │ │ ├── dataloaders.py │ │ ├── general.py │ │ ├── loss.py │ │ ├── loss_tal.py │ │ ├── loss_tal_dual.py │ │ ├── metrics.py │ │ ├── plots.py │ │ └── tal/ │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ └── assigner.py │ ├── tal/ │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ └── assigner.py │ ├── torch_utils.py │ └── triton.py ├── val.py ├── val_dual.py └── val_triple.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE.md ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: README.md ================================================ # YOLOv9 Implementation of paper - [YOLOv9: Learning What You Want to Learn Using Programmable Gradient Information](https://arxiv.org/abs/2402.13616) [![arxiv.org](http://img.shields.io/badge/cs.CV-arXiv%3A2402.13616-B31B1B.svg)](https://arxiv.org/abs/2402.13616) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/kadirnar/Yolov9) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/merve/yolov9) [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/train-yolov9-object-detection-on-custom-dataset.ipynb) [![OpenCV](https://img.shields.io/badge/OpenCV-BlogPost-black?logo=opencv&labelColor=blue&color=black)](https://learnopencv.com/yolov9-advancing-the-yolo-legacy/) ## Performance MS COCO | Model | Test Size | APval | AP50val | AP75val | Param. | FLOPs | | :-- | :-: | :-: | :-: | :-: | :-: | :-: | | [**YOLOv9-T**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-t-converted.pt) | 640 | **38.3%** | **53.1%** | **41.3%** | **2.0M** | **7.7G** | | [**YOLOv9-S**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-s-converted.pt) | 640 | **46.8%** | **63.4%** | **50.7%** | **7.1M** | **26.4G** | | [**YOLOv9-M**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-m-converted.pt) | 640 | **51.4%** | **68.1%** | **56.1%** | **20.0M** | **76.3G** | | [**YOLOv9-C**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c-converted.pt) | 640 | **53.0%** | **70.2%** | **57.8%** | **25.3M** | **102.1G** | | [**YOLOv9-E**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-e-converted.pt) | 640 | **55.6%** | **72.8%** | **60.6%** | **57.3M** | **189.0G** | ## Useful Links
Expand Custom training: https://github.com/WongKinYiu/yolov9/issues/30#issuecomment-1960955297 ONNX export: https://github.com/WongKinYiu/yolov9/issues/2#issuecomment-1960519506 https://github.com/WongKinYiu/yolov9/issues/40#issue-2150697688 https://github.com/WongKinYiu/yolov9/issues/130#issue-2162045461 ONNX export for segmentation: https://github.com/WongKinYiu/yolov9/issues/260#issue-2191162150 TensorRT inference: https://github.com/WongKinYiu/yolov9/issues/143#issuecomment-1975049660 https://github.com/WongKinYiu/yolov9/issues/34#issue-2150393690 https://github.com/WongKinYiu/yolov9/issues/79#issue-2153547004 https://github.com/WongKinYiu/yolov9/issues/143#issue-2164002309 QAT TensorRT: https://github.com/WongKinYiu/yolov9/issues/327#issue-2229284136 https://github.com/WongKinYiu/yolov9/issues/253#issue-2189520073 TensorRT inference for segmentation: https://github.com/WongKinYiu/yolov9/issues/446 TFLite: https://github.com/WongKinYiu/yolov9/issues/374#issuecomment-2065751706 OpenVINO: https://github.com/WongKinYiu/yolov9/issues/164#issue-2168540003 C# ONNX inference: https://github.com/WongKinYiu/yolov9/issues/95#issue-2155974619 C# OpenVINO inference: https://github.com/WongKinYiu/yolov9/issues/95#issuecomment-1968131244 OpenCV: https://github.com/WongKinYiu/yolov9/issues/113#issuecomment-1971327672 Hugging Face demo: https://github.com/WongKinYiu/yolov9/issues/45#issuecomment-1961496943 CoLab demo: https://github.com/WongKinYiu/yolov9/pull/18 ONNXSlim export: https://github.com/WongKinYiu/yolov9/pull/37 YOLOv9 ROS: https://github.com/WongKinYiu/yolov9/issues/144#issue-2164210644 YOLOv9 ROS TensorRT: https://github.com/WongKinYiu/yolov9/issues/145#issue-2164218595 YOLOv9 Julia: https://github.com/WongKinYiu/yolov9/issues/141#issuecomment-1973710107 YOLOv9 MLX: https://github.com/WongKinYiu/yolov9/issues/258#issue-2190586540 YOLOv9 StrongSORT with OSNet: https://github.com/WongKinYiu/yolov9/issues/299#issue-2212093340 YOLOv9 ByteTrack: https://github.com/WongKinYiu/yolov9/issues/78#issue-2153512879 YOLOv9 DeepSORT: https://github.com/WongKinYiu/yolov9/issues/98#issue-2156172319 YOLOv9 counting: https://github.com/WongKinYiu/yolov9/issues/84#issue-2153904804 YOLOv9 speed estimation: https://github.com/WongKinYiu/yolov9/issues/456 YOLOv9 face detection: https://github.com/WongKinYiu/yolov9/issues/121#issue-2160218766 YOLOv9 segmentation onnxruntime: https://github.com/WongKinYiu/yolov9/issues/151#issue-2165667350 Comet logging: https://github.com/WongKinYiu/yolov9/pull/110 MLflow logging: https://github.com/WongKinYiu/yolov9/pull/87 AnyLabeling tool: https://github.com/WongKinYiu/yolov9/issues/48#issue-2152139662 AX650N deploy: https://github.com/WongKinYiu/yolov9/issues/96#issue-2156115760 Conda environment: https://github.com/WongKinYiu/yolov9/pull/93 AutoDL docker environment: https://github.com/WongKinYiu/yolov9/issues/112#issue-2158203480
## Installation Docker environment (recommended)
Expand ``` shell # create the docker container, you can change the share memory size if you have more. nvidia-docker run --name yolov9 -it -v your_coco_path/:/coco/ -v your_code_path/:/yolov9 --shm-size=64g nvcr.io/nvidia/pytorch:21.11-py3 # apt install required packages apt update apt install -y zip htop screen libgl1-mesa-glx # pip install required packages pip install seaborn thop # go to code folder cd /yolov9 ```
## Evaluation [`yolov9-s-converted.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-s-converted.pt) [`yolov9-m-converted.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-m-converted.pt) [`yolov9-c-converted.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c-converted.pt) [`yolov9-e-converted.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-e-converted.pt) [`yolov9-s.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-s.pt) [`yolov9-m.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-m.pt) [`yolov9-c.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-c.pt) [`yolov9-e.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/yolov9-e.pt) [`gelan-s.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-s.pt) [`gelan-m.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-m.pt) [`gelan-c.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c.pt) [`gelan-e.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-e.pt) ``` shell # evaluate converted yolov9 models python val.py --data data/coco.yaml --img 640 --batch 32 --conf 0.001 --iou 0.7 --device 0 --weights './yolov9-c-converted.pt' --save-json --name yolov9_c_c_640_val # evaluate yolov9 models # python val_dual.py --data data/coco.yaml --img 640 --batch 32 --conf 0.001 --iou 0.7 --device 0 --weights './yolov9-c.pt' --save-json --name yolov9_c_640_val # evaluate gelan models # python val.py --data data/coco.yaml --img 640 --batch 32 --conf 0.001 --iou 0.7 --device 0 --weights './gelan-c.pt' --save-json --name gelan_c_640_val ``` You will get the results: ``` Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.530 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.702 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.578 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.362 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.585 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.693 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.392 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.652 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.702 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.541 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.760 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.844 ``` ## Training Data preparation ``` shell bash scripts/get_coco.sh ``` * Download MS COCO dataset images ([train](http://images.cocodataset.org/zips/train2017.zip), [val](http://images.cocodataset.org/zips/val2017.zip), [test](http://images.cocodataset.org/zips/test2017.zip)) and [labels](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/coco2017labels-segments.zip). If you have previously used a different version of YOLO, we strongly recommend that you delete `train2017.cache` and `val2017.cache` files, and redownload [labels](https://github.com/WongKinYiu/yolov7/releases/download/v0.1/coco2017labels-segments.zip) Single GPU training ``` shell # train yolov9 models python train_dual.py --workers 8 --device 0 --batch 16 --data data/coco.yaml --img 640 --cfg models/detect/yolov9-c.yaml --weights '' --name yolov9-c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 500 --close-mosaic 15 # train gelan models # python train.py --workers 8 --device 0 --batch 32 --data data/coco.yaml --img 640 --cfg models/detect/gelan-c.yaml --weights '' --name gelan-c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 500 --close-mosaic 15 ``` Multiple GPU training ``` shell # train yolov9 models python -m torch.distributed.launch --nproc_per_node 8 --master_port 9527 train_dual.py --workers 8 --device 0,1,2,3,4,5,6,7 --sync-bn --batch 128 --data data/coco.yaml --img 640 --cfg models/detect/yolov9-c.yaml --weights '' --name yolov9-c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 500 --close-mosaic 15 # train gelan models # python -m torch.distributed.launch --nproc_per_node 4 --master_port 9527 train.py --workers 8 --device 0,1,2,3 --sync-bn --batch 128 --data data/coco.yaml --img 640 --cfg models/detect/gelan-c.yaml --weights '' --name gelan-c --hyp hyp.scratch-high.yaml --min-items 0 --epochs 500 --close-mosaic 15 ``` ## Re-parameterization See [reparameterization.ipynb](https://github.com/WongKinYiu/yolov9/blob/main/tools/reparameterization.ipynb). ## Inference ``` shell # inference converted yolov9 models python detect.py --source './data/images/horses.jpg' --img 640 --device 0 --weights './yolov9-c-converted.pt' --name yolov9_c_c_640_detect # inference yolov9 models # python detect_dual.py --source './data/images/horses.jpg' --img 640 --device 0 --weights './yolov9-c.pt' --name yolov9_c_640_detect # inference gelan models # python detect.py --source './data/images/horses.jpg' --img 640 --device 0 --weights './gelan-c.pt' --name gelan_c_c_640_detect ``` ## Citation ``` @article{wang2024yolov9, title={{YOLOv9}: Learning What You Want to Learn Using Programmable Gradient Information}, author={Wang, Chien-Yao and Liao, Hong-Yuan Mark}, booktitle={arXiv preprint arXiv:2402.13616}, year={2024} } ``` ``` @article{chang2023yolor, title={{YOLOR}-Based Multi-Task Learning}, author={Chang, Hung-Shuo and Wang, Chien-Yao and Wang, Richard Robert and Chou, Gene and Liao, Hong-Yuan Mark}, journal={arXiv preprint arXiv:2309.16921}, year={2023} } ``` ## Teaser Parts of code of [YOLOR-Based Multi-Task Learning](https://arxiv.org/abs/2309.16921) are released in the repository. #### Object Detection [`gelan-c-det.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-det.pt) `object detection` ``` shell # coco/labels/{split}/*.txt # bbox or polygon (1 instance 1 line) python train.py --workers 8 --device 0 --batch 32 --data data/coco.yaml --img 640 --cfg models/detect/gelan-c.yaml --weights '' --name gelan-c-det --hyp hyp.scratch-high.yaml --min-items 0 --epochs 300 --close-mosaic 10 ``` | Model | Test Size | Param. | FLOPs | APbox | | :-- | :-: | :-: | :-: | :-: | | [**GELAN-C-DET**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-det.pt) | 640 | 25.3M | 102.1G |**52.3%** | | [**YOLOv9-C-DET**]() | 640 | 25.3M | 102.1G | **53.0%** | #### Instance Segmentation [`gelan-c-seg.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-seg.pt) `object detection` `instance segmentation` ``` shell # coco/labels/{split}/*.txt # polygon (1 instance 1 line) python segment/train.py --workers 8 --device 0 --batch 32 --data coco.yaml --img 640 --cfg models/segment/gelan-c-seg.yaml --weights '' --name gelan-c-seg --hyp hyp.scratch-high.yaml --no-overlap --epochs 300 --close-mosaic 10 ``` | Model | Test Size | Param. | FLOPs | APbox | APmask | | :-- | :-: | :-: | :-: | :-: | :-: | | [**GELAN-C-SEG**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-seg.pt) | 640 | 27.4M | 144.6G | **52.3%** | **42.4%** | | [**YOLOv9-C-SEG**]() | 640 | 27.4M | 145.5G | **53.3%** | **43.5%** | #### Panoptic Segmentation [`gelan-c-pan.pt`](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-pan.pt) `object detection` `instance segmentation` `semantic segmentation` `stuff segmentation` `panoptic segmentation` ``` shell # coco/labels/{split}/*.txt # polygon (1 instance 1 line) # coco/stuff/{split}/*.txt # polygon (1 semantic 1 line) python panoptic/train.py --workers 8 --device 0 --batch 32 --data coco.yaml --img 640 --cfg models/panoptic/gelan-c-pan.yaml --weights '' --name gelan-c-pan --hyp hyp.scratch-high.yaml --no-overlap --epochs 300 --close-mosaic 10 ``` | Model | Test Size | Param. | FLOPs | APbox | APmask | mIoU164k/10ksemantic | mIoUstuff | PQpanoptic | | :-- | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | [**GELAN-C-PAN**](https://github.com/WongKinYiu/yolov9/releases/download/v0.1/gelan-c-pan.pt) | 640 | 27.6M | 146.7G | **52.6%** | **42.5%** | **39.0%/48.3%** | **52.7%** | **39.4%** | | [**YOLOv9-C-PAN**]() | 640 | 28.8M | 187.0G | **52.7%** | **43.0%** | **39.8%/-** | **52.2%** | **40.5%** | #### Image Captioning (not yet released) `object detection` `instance segmentation` `semantic segmentation` `stuff segmentation` `panoptic segmentation` `image captioning` ``` shell # coco/labels/{split}/*.txt # polygon (1 instance 1 line) # coco/stuff/{split}/*.txt # polygon (1 semantic 1 line) # coco/annotations/*.json # json (1 split 1 file) python caption/train.py --workers 8 --device 0 --batch 32 --data coco.yaml --img 640 --cfg models/caption/gelan-c-cap.yaml --weights '' --name gelan-c-cap --hyp hyp.scratch-high.yaml --no-overlap --epochs 300 --close-mosaic 10 ``` | Model | Test Size | Param. | FLOPs | APbox | APmask | mIoU164k/10ksemantic | mIoUstuff | PQpanoptic | BLEU@4caption | CIDErcaption | | :-- | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | [**GELAN-C-CAP**]() | 640 | 47.5M | - | **51.9%** | **42.6%** | **42.5%/-** | **56.5%** | **41.7%** | **38.8** | **122.3** | | [**YOLOv9-C-CAP**]() | 640 | 47.5M | - | **52.1%** | **42.6%** | **43.0%/-** | **56.4%** | **42.1%** | **39.1** | **122.0** | ## Acknowledgements
Expand * [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) * [https://github.com/WongKinYiu/yolor](https://github.com/WongKinYiu/yolor) * [https://github.com/WongKinYiu/yolov7](https://github.com/WongKinYiu/yolov7) * [https://github.com/VDIGPKU/DynamicDet](https://github.com/VDIGPKU/DynamicDet) * [https://github.com/DingXiaoH/RepVGG](https://github.com/DingXiaoH/RepVGG) * [https://github.com/ultralytics/yolov5](https://github.com/ultralytics/yolov5) * [https://github.com/meituan/YOLOv6](https://github.com/meituan/YOLOv6)
================================================ FILE: benchmarks.py ================================================ import argparse import platform import sys import time from pathlib import Path import pandas as pd FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH # ROOT = ROOT.relative_to(Path.cwd()) # relative import export from models.experimental import attempt_load from models.yolo import SegmentationModel from segment.val import run as val_seg from utils import notebook_init from utils.general import LOGGER, check_yaml, file_size, print_args from utils.torch_utils import select_device from val import run as val_det def run( weights=ROOT / 'yolo.pt', # weights path imgsz=640, # inference size (pixels) batch_size=1, # batch size data=ROOT / 'data/coco.yaml', # dataset.yaml path device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu half=False, # use FP16 half-precision inference test=False, # test exports only pt_only=False, # test PyTorch only hard_fail=False, # throw error on benchmark failure ): y, t = [], time.time() device = select_device(device) model_type = type(attempt_load(weights, fuse=False)) # DetectionModel, SegmentationModel, etc. for i, (name, f, suffix, cpu, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, CPU, GPU) try: assert i not in (9, 10), 'inference not supported' # Edge TPU and TF.js are unsupported assert i != 5 or platform.system() == 'Darwin', 'inference only supported on macOS>=10.13' # CoreML if 'cpu' in device.type: assert cpu, 'inference not supported on CPU' if 'cuda' in device.type: assert gpu, 'inference not supported on GPU' # Export if f == '-': w = weights # PyTorch format else: w = export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # all others assert suffix in str(w), 'export failed' # Validate if model_type == SegmentationModel: result = val_seg(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half) metric = result[0][7] # (box(p, r, map50, map), mask(p, r, map50, map), *loss(box, obj, cls)) else: # DetectionModel: result = val_det(data, w, batch_size, imgsz, plots=False, device=device, task='speed', half=half) metric = result[0][3] # (p, r, map50, map, *loss(box, obj, cls)) speed = result[2][1] # times (preprocess, inference, postprocess) y.append([name, round(file_size(w), 1), round(metric, 4), round(speed, 2)]) # MB, mAP, t_inference except Exception as e: if hard_fail: assert type(e) is AssertionError, f'Benchmark --hard-fail for {name}: {e}' LOGGER.warning(f'WARNING ⚠️ Benchmark failure for {name}: {e}') y.append([name, None, None, None]) # mAP, t_inference if pt_only and i == 0: break # break after PyTorch # Print results LOGGER.info('\n') parse_opt() notebook_init() # print system info c = ['Format', 'Size (MB)', 'mAP50-95', 'Inference time (ms)'] if map else ['Format', 'Export', '', ''] py = pd.DataFrame(y, columns=c) LOGGER.info(f'\nBenchmarks complete ({time.time() - t:.2f}s)') LOGGER.info(str(py if map else py.iloc[:, :2])) if hard_fail and isinstance(hard_fail, str): metrics = py['mAP50-95'].array # values to compare to floor floor = eval(hard_fail) # minimum metric floor to pass assert all(x > floor for x in metrics if pd.notna(x)), f'HARD FAIL: mAP50-95 < floor {floor}' return py def test( weights=ROOT / 'yolo.pt', # weights path imgsz=640, # inference size (pixels) batch_size=1, # batch size data=ROOT / 'data/coco128.yaml', # dataset.yaml path device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu half=False, # use FP16 half-precision inference test=False, # test exports only pt_only=False, # test PyTorch only hard_fail=False, # throw error on benchmark failure ): y, t = [], time.time() device = select_device(device) for i, (name, f, suffix, gpu) in export.export_formats().iterrows(): # index, (name, file, suffix, gpu-capable) try: w = weights if f == '-' else \ export.run(weights=weights, imgsz=[imgsz], include=[f], device=device, half=half)[-1] # weights assert suffix in str(w), 'export failed' y.append([name, True]) except Exception: y.append([name, False]) # mAP, t_inference # Print results LOGGER.info('\n') parse_opt() notebook_init() # print system info py = pd.DataFrame(y, columns=['Format', 'Export']) LOGGER.info(f'\nExports complete ({time.time() - t:.2f}s)') LOGGER.info(str(py)) return py def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='weights path') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--batch-size', type=int, default=1, help='batch size') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--test', action='store_true', help='test exports only') parser.add_argument('--pt-only', action='store_true', help='test PyTorch only') parser.add_argument('--hard-fail', nargs='?', const=True, default=False, help='Exception on error or < min metric') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML print_args(vars(opt)) return opt def main(opt): test(**vars(opt)) if opt.test else run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: classify/predict.py ================================================ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ Run YOLOv5 classification inference on images, videos, directories, globs, YouTube, webcam, streams, etc. Usage - sources: $ python classify/predict.py --weights yolov5s-cls.pt --source 0 # webcam img.jpg # image vid.mp4 # video screen # screenshot path/ # directory 'path/*.jpg' # glob 'https://youtu.be/Zgi9g1ksQHc' # YouTube 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream Usage - formats: $ python classify/predict.py --weights yolov5s-cls.pt # PyTorch yolov5s-cls.torchscript # TorchScript yolov5s-cls.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov5s-cls_openvino_model # OpenVINO yolov5s-cls.engine # TensorRT yolov5s-cls.mlmodel # CoreML (macOS-only) yolov5s-cls_saved_model # TensorFlow SavedModel yolov5s-cls.pb # TensorFlow GraphDef yolov5s-cls.tflite # TensorFlow Lite yolov5s-cls_edgetpu.tflite # TensorFlow Edge TPU yolov5s-cls_paddle_model # PaddlePaddle """ import argparse import os import platform import sys from pathlib import Path import torch import torch.nn.functional as F FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.augmentations import classify_transforms from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, print_args, strip_optimizer) from utils.plots import Annotator from utils.torch_utils import select_device, smart_inference_mode @smart_inference_mode() def run( weights=ROOT / 'yolov5s-cls.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam) data=ROOT / 'data/coco128.yaml', # dataset.yaml path imgsz=(224, 224), # inference size (height, width) device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt nosave=False, # do not save images/videos augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/predict-cls', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) screenshot = source.lower().startswith('screen') if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader bs = 1 # batch_size if webcam: view_img = check_imshow(warn=True) dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride) bs = len(dataset) elif screenshot: dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) else: dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.Tensor(im).to(model.device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 if len(im.shape) == 3: im = im[None] # expand for batch dim # Inference with dt[1]: results = model(im) # Post-process with dt[2]: pred = F.softmax(results, dim=1) # probabilities # Process predictions for i, prob in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string annotator = Annotator(im0, example=str(names), pil=True) # Print results top5i = prob.argsort(0, descending=True)[:5].tolist() # top 5 indices s += f"{', '.join(f'{names[j]} {prob[j]:.2f}' for j in top5i)}, " # Write results text = '\n'.join(f'{prob[j]:.2f} {names[j]}' for j in top5i) if save_img or view_img: # Add bbox to image annotator.text((32, 32), text, txt_color=(255, 255, 255)) if save_txt: # Write to file with open(f'{txt_path}.txt', 'a') as f: f.write(text + '\n') # Stream results im0 = annotator.result() if view_img: if platform.system() == 'Linux' and p not in windows: windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f"{s}{dt[1].dt * 1E3:.1f}ms") # Print results t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-cls.pt', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[224], help='inference size h,w') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='show results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default=ROOT / 'runs/predict-cls', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) return opt def main(opt): check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: classify/train.py ================================================ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ Train a YOLOv5 classifier model on a classification dataset Usage - Single-GPU training: $ python classify/train.py --model yolov5s-cls.pt --data imagenette160 --epochs 5 --img 224 Usage - Multi-GPU DDP training: $ python -m torch.distributed.run --nproc_per_node 4 --master_port 1 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3 Datasets: --data mnist, fashion-mnist, cifar10, cifar100, imagenette, imagewoof, imagenet, or 'path/to/data' YOLOv5-cls models: --model yolov5n-cls.pt, yolov5s-cls.pt, yolov5m-cls.pt, yolov5l-cls.pt, yolov5x-cls.pt Torchvision models: --model resnet50, efficientnet_b0, etc. See https://pytorch.org/vision/stable/models.html """ import argparse import os import subprocess import sys import time from copy import deepcopy from datetime import datetime from pathlib import Path import torch import torch.distributed as dist import torch.hub as hub import torch.optim.lr_scheduler as lr_scheduler import torchvision from torch.cuda import amp from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from classify import val as validate from models.experimental import attempt_load from models.yolo import ClassificationModel, DetectionModel from utils.dataloaders import create_classification_dataloader from utils.general import (DATASETS_DIR, LOGGER, TQDM_BAR_FORMAT, WorkingDirectory, check_git_info, check_git_status, check_requirements, colorstr, download, increment_path, init_seeds, print_args, yaml_save) from utils.loggers import GenericLogger from utils.plots import imshow_cls from utils.torch_utils import (ModelEMA, model_info, reshape_classifier_output, select_device, smart_DDP, smart_optimizer, smartCrossEntropyLoss, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) GIT_INFO = check_git_info() def train(opt, device): init_seeds(opt.seed + 1 + RANK, deterministic=True) save_dir, data, bs, epochs, nw, imgsz, pretrained = \ opt.save_dir, Path(opt.data), opt.batch_size, opt.epochs, min(os.cpu_count() - 1, opt.workers), \ opt.imgsz, str(opt.pretrained).lower() == 'true' cuda = device.type != 'cpu' # Directories wdir = save_dir / 'weights' wdir.mkdir(parents=True, exist_ok=True) # make dir last, best = wdir / 'last.pt', wdir / 'best.pt' # Save run settings yaml_save(save_dir / 'opt.yaml', vars(opt)) # Logger logger = GenericLogger(opt=opt, console_logger=LOGGER) if RANK in {-1, 0} else None # Download Dataset with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT): data_dir = data if data.is_dir() else (DATASETS_DIR / data) if not data_dir.is_dir(): LOGGER.info(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...') t = time.time() if str(data) == 'imagenet': subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True) else: url = f'https://github.com/ultralytics/yolov5/releases/download/v1.0/{data}.zip' download(url, dir=data_dir.parent) s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n" LOGGER.info(s) # Dataloaders nc = len([x for x in (data_dir / 'train').glob('*') if x.is_dir()]) # number of classes trainloader = create_classification_dataloader(path=data_dir / 'train', imgsz=imgsz, batch_size=bs // WORLD_SIZE, augment=True, cache=opt.cache, rank=LOCAL_RANK, workers=nw) test_dir = data_dir / 'test' if (data_dir / 'test').exists() else data_dir / 'val' # data/test or data/val if RANK in {-1, 0}: testloader = create_classification_dataloader(path=test_dir, imgsz=imgsz, batch_size=bs // WORLD_SIZE * 2, augment=False, cache=opt.cache, rank=-1, workers=nw) # Model with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT): if Path(opt.model).is_file() or opt.model.endswith('.pt'): model = attempt_load(opt.model, device='cpu', fuse=False) elif opt.model in torchvision.models.__dict__: # TorchVision models i.e. resnet50, efficientnet_b0 model = torchvision.models.__dict__[opt.model](weights='IMAGENET1K_V1' if pretrained else None) else: m = hub.list('ultralytics/yolov5') # + hub.list('pytorch/vision') # models raise ModuleNotFoundError(f'--model {opt.model} not found. Available models are: \n' + '\n'.join(m)) if isinstance(model, DetectionModel): LOGGER.warning("WARNING ⚠️ pass YOLOv5 classifier model with '-cls' suffix, i.e. '--model yolov5s-cls.pt'") model = ClassificationModel(model=model, nc=nc, cutoff=opt.cutoff or 10) # convert to classification model reshape_classifier_output(model, nc) # update class count for m in model.modules(): if not pretrained and hasattr(m, 'reset_parameters'): m.reset_parameters() if isinstance(m, torch.nn.Dropout) and opt.dropout is not None: m.p = opt.dropout # set dropout for p in model.parameters(): p.requires_grad = True # for training model = model.to(device) # Info if RANK in {-1, 0}: model.names = trainloader.dataset.classes # attach class names model.transforms = testloader.dataset.torch_transforms # attach inference transforms model_info(model) if opt.verbose: LOGGER.info(model) images, labels = next(iter(trainloader)) file = imshow_cls(images[:25], labels[:25], names=model.names, f=save_dir / 'train_images.jpg') logger.log_images(file, name='Train Examples') logger.log_graph(model, imgsz) # log model # Optimizer optimizer = smart_optimizer(model, opt.optimizer, opt.lr0, momentum=0.9, decay=opt.decay) # Scheduler lrf = 0.01 # final lr (fraction of lr0) # lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf # cosine lf = lambda x: (1 - x / epochs) * (1 - lrf) + lrf # linear scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr0, total_steps=epochs, pct_start=0.1, # final_div_factor=1 / 25 / lrf) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Train t0 = time.time() criterion = smartCrossEntropyLoss(label_smoothing=opt.label_smoothing) # loss function best_fitness = 0.0 scaler = amp.GradScaler(enabled=cuda) val = test_dir.stem # 'val' or 'test' LOGGER.info(f'Image sizes {imgsz} train, {imgsz} test\n' f'Using {nw * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting {opt.model} training on {data} dataset with {nc} classes for {epochs} epochs...\n\n' f"{'Epoch':>10}{'GPU_mem':>10}{'train_loss':>12}{f'{val}_loss':>12}{'top1_acc':>12}{'top5_acc':>12}") for epoch in range(epochs): # loop over the dataset multiple times tloss, vloss, fitness = 0.0, 0.0, 0.0 # train loss, val loss, fitness model.train() if RANK != -1: trainloader.sampler.set_epoch(epoch) pbar = enumerate(trainloader) if RANK in {-1, 0}: pbar = tqdm(enumerate(trainloader), total=len(trainloader), bar_format=TQDM_BAR_FORMAT) for i, (images, labels) in pbar: # progress bar images, labels = images.to(device, non_blocking=True), labels.to(device) # Forward with amp.autocast(enabled=cuda): # stability issues when enabled loss = criterion(model(images), labels) # Backward scaler.scale(loss).backward() # Optimize scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) scaler.update() optimizer.zero_grad() if ema: ema.update(model) if RANK in {-1, 0}: # Print tloss = (tloss * i + loss.item()) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) pbar.desc = f"{f'{epoch + 1}/{epochs}':>10}{mem:>10}{tloss:>12.3g}" + ' ' * 36 # Test if i == len(pbar) - 1: # last batch top1, top5, vloss = validate.run(model=ema.ema, dataloader=testloader, criterion=criterion, pbar=pbar) # test accuracy, loss fitness = top1 # define fitness as top1 accuracy # Scheduler scheduler.step() # Log metrics if RANK in {-1, 0}: # Best fitness if fitness > best_fitness: best_fitness = fitness # Log metrics = { "train/loss": tloss, f"{val}/loss": vloss, "metrics/accuracy_top1": top1, "metrics/accuracy_top5": top5, "lr/0": optimizer.param_groups[0]['lr']} # learning rate logger.log_metrics(metrics, epoch) # Save model final_epoch = epoch + 1 == epochs if (not opt.nosave) or final_epoch: ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(ema.ema).half(), # deepcopy(de_parallel(model)).half(), 'ema': None, # deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': None, # optimizer.state_dict(), 'opt': vars(opt), 'git': GIT_INFO, # {remote, branch, commit} if a git repo 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fitness: torch.save(ckpt, best) del ckpt # Train complete if RANK in {-1, 0} and final_epoch: LOGGER.info(f'\nTraining complete ({(time.time() - t0) / 3600:.3f} hours)' f"\nResults saved to {colorstr('bold', save_dir)}" f"\nPredict: python classify/predict.py --weights {best} --source im.jpg" f"\nValidate: python classify/val.py --weights {best} --data {data_dir}" f"\nExport: python export.py --weights {best} --include onnx" f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{best}')" f"\nVisualize: https://netron.app\n") # Plot examples images, labels = (x[:25] for x in next(iter(testloader))) # first 25 images and labels pred = torch.max(ema.ema(images.to(device)), 1)[1] file = imshow_cls(images, labels, pred, model.names, verbose=False, f=save_dir / 'test_images.jpg') # Log results meta = {"epochs": epochs, "top1_acc": best_fitness, "date": datetime.now().isoformat()} logger.log_images(file, name='Test Examples (true-predicted)', epoch=epoch) logger.log_model(best, epochs, metadata=meta) def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, default='yolov5s-cls.pt', help='initial weights path') parser.add_argument('--data', type=str, default='imagenette160', help='cifar10, cifar100, mnist, imagenet, ...') parser.add_argument('--epochs', type=int, default=10, help='total training epochs') parser.add_argument('--batch-size', type=int, default=64, help='total batch size for all GPUs') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='train, val image size (pixels)') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train-cls', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--pretrained', nargs='?', const=True, default=True, help='start from i.e. --pretrained False') parser.add_argument('--optimizer', choices=['SGD', 'Adam', 'AdamW', 'RMSProp'], default='Adam', help='optimizer') parser.add_argument('--lr0', type=float, default=0.001, help='initial learning rate') parser.add_argument('--decay', type=float, default=5e-5, help='weight decay') parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing epsilon') parser.add_argument('--cutoff', type=int, default=None, help='Model layer cutoff index for Classify() head') parser.add_argument('--dropout', type=float, default=None, help='Dropout (fraction)') parser.add_argument('--verbose', action='store_true', help='Verbose mode') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt): # Checks if RANK in {-1, 0}: print_args(vars(opt)) check_git_status() check_requirements() # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: assert opt.batch_size != -1, 'AutoBatch is coming soon for classification, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") # Parameters opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run # Train train(opt, device) def run(**kwargs): # Usage: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: classify/val.py ================================================ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license """ Validate a trained YOLOv5 classification model on a classification dataset Usage: $ bash data/scripts/get_imagenet.sh --val # download ImageNet val split (6.3G, 50000 images) $ python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224 # validate ImageNet Usage - formats: $ python classify/val.py --weights yolov5s-cls.pt # PyTorch yolov5s-cls.torchscript # TorchScript yolov5s-cls.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov5s-cls_openvino_model # OpenVINO yolov5s-cls.engine # TensorRT yolov5s-cls.mlmodel # CoreML (macOS-only) yolov5s-cls_saved_model # TensorFlow SavedModel yolov5s-cls.pb # TensorFlow GraphDef yolov5s-cls.tflite # TensorFlow Lite yolov5s-cls_edgetpu.tflite # TensorFlow Edge TPU yolov5s-cls_paddle_model # PaddlePaddle """ import argparse import os import sys from pathlib import Path import torch from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.dataloaders import create_classification_dataloader from utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_img_size, check_requirements, colorstr, increment_path, print_args) from utils.torch_utils import select_device, smart_inference_mode @smart_inference_mode() def run( data=ROOT / '../datasets/mnist', # dataset dir weights=ROOT / 'yolov5s-cls.pt', # model.pt path(s) batch_size=128, # batch size imgsz=224, # inference size (pixels) device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) verbose=False, # verbose output project=ROOT / 'runs/val-cls', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference model=None, dataloader=None, criterion=None, pbar=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run save_dir.mkdir(parents=True, exist_ok=True) # make dir # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn, fp16=half) stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA if engine: batch_size = model.batch_size else: device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') # Dataloader data = Path(data) test_dir = data / 'test' if (data / 'test').exists() else data / 'val' # data/test or data/val dataloader = create_classification_dataloader(path=test_dir, imgsz=imgsz, batch_size=batch_size, augment=False, rank=-1, workers=workers) model.eval() pred, targets, loss, dt = [], [], 0, (Profile(), Profile(), Profile()) n = len(dataloader) # number of batches action = 'validating' if dataloader.dataset.root.stem == 'val' else 'testing' desc = f"{pbar.desc[:-36]}{action:>36}" if pbar else f"{action}" bar = tqdm(dataloader, desc, n, not training, bar_format=TQDM_BAR_FORMAT, position=0) with torch.cuda.amp.autocast(enabled=device.type != 'cpu'): for images, labels in bar: with dt[0]: images, labels = images.to(device, non_blocking=True), labels.to(device) with dt[1]: y = model(images) with dt[2]: pred.append(y.argsort(1, descending=True)[:, :5]) targets.append(labels) if criterion: loss += criterion(y, labels) loss /= n pred, targets = torch.cat(pred), torch.cat(targets) correct = (targets[:, None] == pred).float() acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy top1, top5 = acc.mean(0).tolist() if pbar: pbar.desc = f"{pbar.desc[:-36]}{loss:>12.3g}{top1:>12.3g}{top5:>12.3g}" if verbose: # all classes LOGGER.info(f"{'Class':>24}{'Images':>12}{'top1_acc':>12}{'top5_acc':>12}") LOGGER.info(f"{'all':>24}{targets.shape[0]:>12}{top1:>12.3g}{top5:>12.3g}") for i, c in model.names.items(): aci = acc[targets == i] top1i, top5i = aci.mean(0).tolist() LOGGER.info(f"{c:>24}{aci.shape[0]:>12}{top1i:>12.3g}{top5i:>12.3g}") # Print results t = tuple(x.t / len(dataloader.dataset.samples) * 1E3 for x in dt) # speeds per image shape = (1, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms post-process per image at shape {shape}' % t) LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") return top1, top5, loss def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / '../datasets/mnist', help='dataset path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s-cls.pt', help='model.pt path(s)') parser.add_argument('--batch-size', type=int, default=128, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='inference size (pixels)') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--verbose', nargs='?', const=True, default=True, help='verbose output') parser.add_argument('--project', default=ROOT / 'runs/val-cls', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() print_args(vars(opt)) return opt def main(opt): check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: data/coco.yaml ================================================ path: ../datasets/coco # dataset root dir train: train2017.txt # train images (relative to 'path') 118287 images val: val2017.txt # val images (relative to 'path') 5000 images test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 # Classes names: 0: person 1: bicycle 2: car 3: motorcycle 4: airplane 5: bus 6: train 7: truck 8: boat 9: traffic light 10: fire hydrant 11: stop sign 12: parking meter 13: bench 14: bird 15: cat 16: dog 17: horse 18: sheep 19: cow 20: elephant 21: bear 22: zebra 23: giraffe 24: backpack 25: umbrella 26: handbag 27: tie 28: suitcase 29: frisbee 30: skis 31: snowboard 32: sports ball 33: kite 34: baseball bat 35: baseball glove 36: skateboard 37: surfboard 38: tennis racket 39: bottle 40: wine glass 41: cup 42: fork 43: knife 44: spoon 45: bowl 46: banana 47: apple 48: sandwich 49: orange 50: broccoli 51: carrot 52: hot dog 53: pizza 54: donut 55: cake 56: chair 57: couch 58: potted plant 59: bed 60: dining table 61: toilet 62: tv 63: laptop 64: mouse 65: remote 66: keyboard 67: cell phone 68: microwave 69: oven 70: toaster 71: sink 72: refrigerator 73: book 74: clock 75: vase 76: scissors 77: teddy bear 78: hair drier 79: toothbrush # stuff names stuff_names: [ 'banner', 'blanket', 'branch', 'bridge', 'building-other', 'bush', 'cabinet', 'cage', 'cardboard', 'carpet', 'ceiling-other', 'ceiling-tile', 'cloth', 'clothes', 'clouds', 'counter', 'cupboard', 'curtain', 'desk-stuff', 'dirt', 'door-stuff', 'fence', 'floor-marble', 'floor-other', 'floor-stone', 'floor-tile', 'floor-wood', 'flower', 'fog', 'food-other', 'fruit', 'furniture-other', 'grass', 'gravel', 'ground-other', 'hill', 'house', 'leaves', 'light', 'mat', 'metal', 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net', 'paper', 'pavement', 'pillow', 'plant-other', 'plastic', 'platform', 'playingfield', 'railing', 'railroad', 'river', 'road', 'rock', 'roof', 'rug', 'salad', 'sand', 'sea', 'shelf', 'sky-other', 'skyscraper', 'snow', 'solid-other', 'stairs', 'stone', 'straw', 'structural-other', 'table', 'tent', 'textile-other', 'towel', 'tree', 'vegetable', 'wall-brick', 'wall-concrete', 'wall-other', 'wall-panel', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'waterdrops', 'window-blind', 'window-other', 'wood', # other 'other', # unlabeled 'unlabeled' ] # Download script/URL (optional) download: | from utils.general import download, Path # Download labels #segments = True # segment or box labels #dir = Path(yaml['path']) # dataset root dir #url = 'https://github.com/WongKinYiu/yolov7/releases/download/v0.1/' #urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels #download(urls, dir=dir.parent) # Download data #urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images # 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images # 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) #download(urls, dir=dir / 'images', threads=3) ================================================ FILE: data/hyps/hyp.scratch-high.yaml ================================================ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) momentum: 0.937 # SGD momentum/Adam beta1 weight_decay: 0.0005 # optimizer weight decay 5e-4 warmup_epochs: 3.0 # warmup epochs (fractions ok) warmup_momentum: 0.8 # warmup initial momentum warmup_bias_lr: 0.1 # warmup initial bias lr box: 7.5 # box loss gain cls: 0.5 # cls loss gain cls_pw: 1.0 # cls BCELoss positive_weight obj: 0.7 # obj loss gain (scale with pixels) obj_pw: 1.0 # obj BCELoss positive_weight dfl: 1.5 # dfl loss gain iou_t: 0.20 # IoU training threshold anchor_t: 5.0 # anchor-multiple threshold # anchors: 3 # anchors per output layer (0 to ignore) fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) hsv_h: 0.015 # image HSV-Hue augmentation (fraction) hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) hsv_v: 0.4 # image HSV-Value augmentation (fraction) degrees: 0.0 # image rotation (+/- deg) translate: 0.1 # image translation (+/- fraction) scale: 0.9 # image scale (+/- gain) shear: 0.0 # image shear (+/- deg) perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 flipud: 0.0 # image flip up-down (probability) fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.15 # image mixup (probability) copy_paste: 0.3 # segment copy-paste (probability) ================================================ FILE: detect.py ================================================ import argparse import os import platform import sys from pathlib import Path import torch FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import select_device, smart_inference_mode @smart_inference_mode() def run( weights=ROOT / 'yolo.pt', # model path or triton URL source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam) data=ROOT / 'data/coco.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) screenshot = source.lower().startswith('screen') if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader bs = 1 # batch_size if webcam: view_img = check_imshow(warn=True) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) bs = len(dataset) elif screenshot: dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.from_numpy(im).to(model.device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim # Inference with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(im, augment=augment, visualize=visualize) # NMS with dt[2]: pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, 5].unique(): n = (det[:, 5] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(f'{txt_path}.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: if platform.system() == 'Linux' and p not in windows: windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") # Print results t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model path or triton URL') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='show results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) return opt def main(opt): # check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: detect_dual.py ================================================ import argparse import os import platform import sys from pathlib import Path import torch FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import select_device, smart_inference_mode @smart_inference_mode() def run( weights=ROOT / 'yolo.pt', # model path or triton URL source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam) data=ROOT / 'data/coco.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) screenshot = source.lower().startswith('screen') if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader bs = 1 # batch_size if webcam: view_img = check_imshow(warn=True) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) bs = len(dataset) elif screenshot: dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.from_numpy(im).to(model.device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim # Inference with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(im, augment=augment, visualize=visualize) pred = pred[0][1] # NMS with dt[2]: pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, 5].unique(): n = (det[:, 5] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(f'{txt_path}.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: if platform.system() == 'Linux' and p not in windows: windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") # Print results t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model path or triton URL') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='show results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) return opt def main(opt): # check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: export.py ================================================ import argparse import contextlib import json import os import platform import re import subprocess import sys import time import warnings from pathlib import Path import pandas as pd import torch from torch.utils.mobile_optimizer import optimize_for_mobile FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH if platform.system() != 'Windows': ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.experimental import attempt_load, End2End from models.yolo import ClassificationModel, Detect, DDetect, DualDetect, DualDDetect, DetectionModel, SegmentationModel from utils.dataloaders import LoadImages from utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_version, check_yaml, colorstr, file_size, get_default_args, print_args, url2file, yaml_save) from utils.torch_utils import select_device, smart_inference_mode MACOS = platform.system() == 'Darwin' # macOS environment def export_formats(): # YOLO export formats x = [ ['PyTorch', '-', '.pt', True, True], ['TorchScript', 'torchscript', '.torchscript', True, True], ['ONNX', 'onnx', '.onnx', True, True], ['ONNX END2END', 'onnx_end2end', '_end2end.onnx', True, True], ['OpenVINO', 'openvino', '_openvino_model', True, False], ['TensorRT', 'engine', '.engine', False, True], ['CoreML', 'coreml', '.mlmodel', True, False], ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True], ['TensorFlow GraphDef', 'pb', '.pb', True, True], ['TensorFlow Lite', 'tflite', '.tflite', True, False], ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False], ['TensorFlow.js', 'tfjs', '_web_model', False, False], ['PaddlePaddle', 'paddle', '_paddle_model', True, True],] return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU']) def try_export(inner_func): # YOLO export decorator, i..e @try_export inner_args = get_default_args(inner_func) def outer_func(*args, **kwargs): prefix = inner_args['prefix'] try: with Profile() as dt: f, model = inner_func(*args, **kwargs) LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)') return f, model except Exception as e: LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}') return None, None return outer_func @try_export def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')): # YOLO TorchScript model export LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...') f = file.with_suffix('.torchscript') ts = torch.jit.trace(model, im, strict=False) d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names} extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap() if optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files) else: ts.save(str(f), _extra_files=extra_files) return f, None @try_export def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')): # YOLO ONNX export check_requirements('onnx') import onnx LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') f = file.with_suffix('.onnx') output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0'] if dynamic: dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640) if isinstance(model, SegmentationModel): dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85) dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160) elif isinstance(model, DetectionModel): dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85) torch.onnx.export( model.cpu() if dynamic else model, # --dynamic only compatible with cpu im.cpu() if dynamic else im, f, verbose=False, opset_version=opset, do_constant_folding=True, input_names=['images'], output_names=output_names, dynamic_axes=dynamic or None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # Metadata d = {'stride': int(max(model.stride)), 'names': model.names} for k, v in d.items(): meta = model_onnx.metadata_props.add() meta.key, meta.value = k, str(v) onnx.save(model_onnx, f) # Simplify if simplify: try: cuda = torch.cuda.is_available() check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1')) import onnxsim LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') model_onnx, check = onnxsim.simplify(model_onnx) assert check, 'assert check failed' onnx.save(model_onnx, f) except Exception as e: LOGGER.info(f'{prefix} simplifier failure: {e}') return f, model_onnx @try_export def export_onnx_end2end(model, im, file, simplify, topk_all, iou_thres, conf_thres, device, labels, prefix=colorstr('ONNX END2END:')): # YOLO ONNX export check_requirements('onnx') import onnx LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') f = os.path.splitext(file)[0] + "-end2end.onnx" batch_size = 'batch' dynamic_axes = {'images': {0 : 'batch', 2: 'height', 3:'width'}, } # variable length axes output_axes = { 'num_dets': {0: 'batch'}, 'det_boxes': {0: 'batch'}, 'det_scores': {0: 'batch'}, 'det_classes': {0: 'batch'}, } dynamic_axes.update(output_axes) model = End2End(model, topk_all, iou_thres, conf_thres, None ,device, labels) output_names = ['num_dets', 'det_boxes', 'det_scores', 'det_classes'] shapes = [ batch_size, 1, batch_size, topk_all, 4, batch_size, topk_all, batch_size, topk_all] torch.onnx.export(model, im, f, verbose=False, export_params=True, # store the trained parameter weights inside the model file opset_version=12, do_constant_folding=True, # whether to execute constant folding for optimization input_names=['images'], output_names=output_names, dynamic_axes=dynamic_axes) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model for i in model_onnx.graph.output: for j in i.type.tensor_type.shape.dim: j.dim_param = str(shapes.pop(0)) if simplify: try: import onnxsim print('\nStarting to simplify ONNX...') model_onnx, check = onnxsim.simplify(model_onnx) assert check, 'assert check failed' except Exception as e: print(f'Simplifier failure: {e}') # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model onnx.save(model_onnx,f) print('ONNX export success, saved as %s' % f) return f, model_onnx @try_export def export_openvino(file, metadata, half, prefix=colorstr('OpenVINO:')): # YOLO OpenVINO export check_requirements('openvino-dev') # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.inference_engine as ie LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...') f = str(file).replace('.pt', f'_openvino_model{os.sep}') #cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} --data_type {'FP16' if half else 'FP32'}" #cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} {"--compress_to_fp16" if half else ""}" half_arg = "--compress_to_fp16" if half else "" cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} {half_arg}" subprocess.run(cmd.split(), check=True, env=os.environ) # export yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata) # add metadata.yaml return f, None @try_export def export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')): # YOLO Paddle export check_requirements(('paddlepaddle', 'x2paddle')) import x2paddle from x2paddle.convert import pytorch2paddle LOGGER.info(f'\n{prefix} starting export with X2Paddle {x2paddle.__version__}...') f = str(file).replace('.pt', f'_paddle_model{os.sep}') pytorch2paddle(module=model, save_dir=f, jit_type='trace', input_examples=[im]) # export yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata) # add metadata.yaml return f, None @try_export def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')): # YOLO CoreML export check_requirements('coremltools') import coremltools as ct LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...') f = file.with_suffix('.mlmodel') ts = torch.jit.trace(model, im, strict=False) # TorchScript model ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])]) bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None) if bits < 32: if MACOS: # quantization only supported on macOS with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress numpy==1.20 float warning ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode) else: print(f'{prefix} quantization only supported on macOS, skipping...') ct_model.save(f) return f, ct_model @try_export def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')): # YOLO TensorRT export https://developer.nvidia.com/tensorrt assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`' try: import tensorrt as trt except Exception: if platform.system() == 'Linux': check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com') import tensorrt as trt if trt.__version__[0] == '7': # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012 grid = model.model[-1].anchor_grid model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid] export_onnx(model, im, file, 12, dynamic, simplify) # opset 12 model.model[-1].anchor_grid = grid else: # TensorRT >= 8 check_version(trt.__version__, '8.0.0', hard=True) # require tensorrt>=8.0.0 export_onnx(model, im, file, 12, dynamic, simplify) # opset 12 onnx = file.with_suffix('.onnx') LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...') assert onnx.exists(), f'failed to export ONNX file: {onnx}' f = file.with_suffix('.engine') # TensorRT engine file logger = trt.Logger(trt.Logger.INFO) if verbose: logger.min_severity = trt.Logger.Severity.VERBOSE builder = trt.Builder(logger) config = builder.create_builder_config() config.max_workspace_size = workspace * 1 << 30 # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(str(onnx)): raise RuntimeError(f'failed to load ONNX file: {onnx}') inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] for inp in inputs: LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}') for out in outputs: LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}') if dynamic: if im.shape[0] <= 1: LOGGER.warning(f"{prefix} WARNING ⚠️ --dynamic model requires maximum --batch-size argument") profile = builder.create_optimization_profile() for inp in inputs: profile.set_shape(inp.name, (1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape) config.add_optimization_profile(profile) LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine as {f}') if builder.platform_has_fast_fp16 and half: config.set_flag(trt.BuilderFlag.FP16) with builder.build_engine(network, config) as engine, open(f, 'wb') as t: t.write(engine.serialize()) return f, None @try_export def export_saved_model(model, im, file, dynamic, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, keras=False, prefix=colorstr('TensorFlow SavedModel:')): # YOLO TensorFlow SavedModel export try: import tensorflow as tf except Exception: check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}") import tensorflow as tf from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 from models.tf import TFModel LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') f = str(file).replace('.pt', '_saved_model') batch_size, ch, *imgsz = list(im.shape) # BCHW tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz) im = tf.zeros((batch_size, *imgsz, ch)) # BHWC order for TensorFlow _ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size) outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) keras_model = tf.keras.Model(inputs=inputs, outputs=outputs) keras_model.trainable = False keras_model.summary() if keras: keras_model.save(f, save_format='tf') else: spec = tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype) m = tf.function(lambda x: keras_model(x)) # full model m = m.get_concrete_function(spec) frozen_func = convert_variables_to_constants_v2(m) tfm = tf.Module() tfm.__call__ = tf.function(lambda x: frozen_func(x)[:4] if tf_nms else frozen_func(x), [spec]) tfm.__call__(im) tf.saved_model.save(tfm, f, options=tf.saved_model.SaveOptions(experimental_custom_gradients=False) if check_version( tf.__version__, '2.6') else tf.saved_model.SaveOptions()) return f, keras_model @try_export def export_pb(keras_model, file, prefix=colorstr('TensorFlow GraphDef:')): # YOLO TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow import tensorflow as tf from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') f = file.with_suffix('.pb') m = tf.function(lambda x: keras_model(x)) # full model m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)) frozen_func = convert_variables_to_constants_v2(m) frozen_func.graph.as_graph_def() tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False) return f, None @try_export def export_tflite(keras_model, im, file, int8, data, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')): # YOLOv5 TensorFlow Lite export import tensorflow as tf LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') batch_size, ch, *imgsz = list(im.shape) # BCHW f = str(file).replace('.pt', '-fp16.tflite') converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] converter.target_spec.supported_types = [tf.float16] converter.optimizations = [tf.lite.Optimize.DEFAULT] if int8: from models.tf import representative_dataset_gen dataset = LoadImages(check_dataset(check_yaml(data))['train'], img_size=imgsz, auto=False) converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib=100) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] converter.target_spec.supported_types = [] converter.inference_input_type = tf.uint8 # or tf.int8 converter.inference_output_type = tf.uint8 # or tf.int8 converter.experimental_new_quantizer = True f = str(file).replace('.pt', '-int8.tflite') if nms or agnostic_nms: converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS) tflite_model = converter.convert() open(f, "wb").write(tflite_model) return f, None @try_export def export_edgetpu(file, prefix=colorstr('Edge TPU:')): # YOLO Edge TPU export https://coral.ai/docs/edgetpu/models-intro/ cmd = 'edgetpu_compiler --version' help_url = 'https://coral.ai/docs/edgetpu/compiler/' assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}' if subprocess.run(f'{cmd} >/dev/null', shell=True).returncode != 0: LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}') sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0 # sudo installed on system for c in ( 'curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -', 'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list', 'sudo apt-get update', 'sudo apt-get install edgetpu-compiler'): subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True) ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1] LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...') f = str(file).replace('.pt', '-int8_edgetpu.tflite') # Edge TPU model f_tfl = str(file).replace('.pt', '-int8.tflite') # TFLite model cmd = f"edgetpu_compiler -s -d -k 10 --out_dir {file.parent} {f_tfl}" subprocess.run(cmd.split(), check=True) return f, None @try_export def export_tfjs(file, prefix=colorstr('TensorFlow.js:')): # YOLO TensorFlow.js export check_requirements('tensorflowjs') import tensorflowjs as tfjs LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...') f = str(file).replace('.pt', '_web_model') # js dir f_pb = file.with_suffix('.pb') # *.pb path f_json = f'{f}/model.json' # *.json path cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \ f'--output_node_names=Identity,Identity_1,Identity_2,Identity_3 {f_pb} {f}' subprocess.run(cmd.split()) json = Path(f_json).read_text() with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order subst = re.sub( r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}}}', r'{"outputs": {"Identity": {"name": "Identity"}, ' r'"Identity_1": {"name": "Identity_1"}, ' r'"Identity_2": {"name": "Identity_2"}, ' r'"Identity_3": {"name": "Identity_3"}}}', json) j.write(subst) return f, None def add_tflite_metadata(file, metadata, num_outputs): # Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata with contextlib.suppress(ImportError): # check_requirements('tflite_support') from tflite_support import flatbuffers from tflite_support import metadata as _metadata from tflite_support import metadata_schema_py_generated as _metadata_fb tmp_file = Path('/tmp/meta.txt') with open(tmp_file, 'w') as meta_f: meta_f.write(str(metadata)) model_meta = _metadata_fb.ModelMetadataT() label_file = _metadata_fb.AssociatedFileT() label_file.name = tmp_file.name model_meta.associatedFiles = [label_file] subgraph = _metadata_fb.SubGraphMetadataT() subgraph.inputTensorMetadata = [_metadata_fb.TensorMetadataT()] subgraph.outputTensorMetadata = [_metadata_fb.TensorMetadataT()] * num_outputs model_meta.subgraphMetadata = [subgraph] b = flatbuffers.Builder(0) b.Finish(model_meta.Pack(b), _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER) metadata_buf = b.Output() populator = _metadata.MetadataPopulator.with_model_file(file) populator.load_metadata_buffer(metadata_buf) populator.load_associated_files([str(tmp_file)]) populator.populate() tmp_file.unlink() @smart_inference_mode() def run( data=ROOT / 'data/coco.yaml', # 'dataset.yaml path' weights=ROOT / 'yolo.pt', # weights path imgsz=(640, 640), # image (height, width) batch_size=1, # batch size device='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpu include=('torchscript', 'onnx'), # include formats half=False, # FP16 half-precision export inplace=False, # set YOLO Detect() inplace=True keras=False, # use Keras optimize=False, # TorchScript: optimize for mobile int8=False, # CoreML/TF INT8 quantization dynamic=False, # ONNX/TF/TensorRT: dynamic axes simplify=False, # ONNX: simplify model opset=12, # ONNX: opset version verbose=False, # TensorRT: verbose log workspace=4, # TensorRT: workspace size (GB) nms=False, # TF: add NMS to model agnostic_nms=False, # TF: add agnostic NMS to model topk_per_class=100, # TF.js NMS: topk per class to keep topk_all=100, # TF.js NMS: topk for all classes to keep iou_thres=0.45, # TF.js NMS: IoU threshold conf_thres=0.25, # TF.js NMS: confidence threshold ): t = time.time() include = [x.lower() for x in include] # to lowercase fmts = tuple(export_formats()['Argument'][1:]) # --include arguments flags = [x in include for x in fmts] assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {fmts}' jit, onnx, onnx_end2end, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle = flags # export booleans file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights) # PyTorch weights # Load PyTorch model device = select_device(device) if half: assert device.type != 'cpu' or coreml, '--half only compatible with GPU export, i.e. use --device 0' assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both' model = attempt_load(weights, device=device, inplace=True, fuse=True) # load FP32 model # Checks imgsz *= 2 if len(imgsz) == 1 else 1 # expand if optimize: assert device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu' # Input gs = int(max(model.stride)) # grid size (max stride) imgsz = [check_img_size(x, gs) for x in imgsz] # verify img_size are gs-multiples im = torch.zeros(batch_size, 3, *imgsz).to(device) # image size(1,3,320,192) BCHW iDetection # Update model model.eval() for k, m in model.named_modules(): if isinstance(m, (Detect, DDetect, DualDetect, DualDDetect)): m.inplace = inplace m.dynamic = dynamic m.export = True for _ in range(2): y = model(im) # dry runs if half and not coreml: im, model = im.half(), model.half() # to FP16 shape = tuple((y[0] if isinstance(y, (tuple, list)) else y).shape) # model output shape metadata = {'stride': int(max(model.stride)), 'names': model.names} # model metadata LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)") # Exports f = [''] * len(fmts) # exported filenames warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) # suppress TracerWarning if jit: # TorchScript f[0], _ = export_torchscript(model, im, file, optimize) if engine: # TensorRT required before ONNX f[1], _ = export_engine(model, im, file, half, dynamic, simplify, workspace, verbose) if onnx or xml: # OpenVINO requires ONNX f[2], _ = export_onnx(model, im, file, opset, dynamic, simplify) if onnx_end2end: if isinstance(model, DetectionModel): labels = model.names f[2], _ = export_onnx_end2end(model, im, file, simplify, topk_all, iou_thres, conf_thres, device, len(labels)) else: raise RuntimeError("The model is not a DetectionModel.") if xml: # OpenVINO f[3], _ = export_openvino(file, metadata, half) if coreml: # CoreML f[4], _ = export_coreml(model, im, file, int8, half) if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.' assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.' f[5], s_model = export_saved_model(model.cpu(), im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs, agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class, topk_all=topk_all, iou_thres=iou_thres, conf_thres=conf_thres, keras=keras) if pb or tfjs: # pb prerequisite to tfjs f[6], _ = export_pb(s_model, file) if tflite or edgetpu: f[7], _ = export_tflite(s_model, im, file, int8 or edgetpu, data=data, nms=nms, agnostic_nms=agnostic_nms) if edgetpu: f[8], _ = export_edgetpu(file) add_tflite_metadata(f[8] or f[7], metadata, num_outputs=len(s_model.outputs)) if tfjs: f[9], _ = export_tfjs(file) if paddle: # PaddlePaddle f[10], _ = export_paddle(model, im, file, metadata) # Finish f = [str(x) for x in f if x] # filter out '' and None if any(f): cls, det, seg = (isinstance(model, x) for x in (ClassificationModel, DetectionModel, SegmentationModel)) # type dir = Path('segment' if seg else 'classify' if cls else '') h = '--half' if half else '' # --half FP16 inference arg s = "# WARNING ⚠️ ClassificationModel not yet supported for PyTorch Hub AutoShape inference" if cls else \ "# WARNING ⚠️ SegmentationModel not yet supported for PyTorch Hub AutoShape inference" if seg else '' if onnx_end2end: LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)' f"\nResults saved to {colorstr('bold', file.parent.resolve())}" f"\nVisualize: https://netron.app") else: LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)' f"\nResults saved to {colorstr('bold', file.parent.resolve())}" f"\nDetect: python {dir / ('detect.py' if det else 'predict.py')} --weights {f[-1]} {h}" f"\nValidate: python {dir / 'val.py'} --weights {f[-1]} {h}" f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}') {s}" f"\nVisualize: https://netron.app") return f # return list of exported files/dirs def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model.pt path(s)') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)') parser.add_argument('--batch-size', type=int, default=1, help='batch size') parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--half', action='store_true', help='FP16 half-precision export') parser.add_argument('--inplace', action='store_true', help='set YOLO Detect() inplace=True') parser.add_argument('--keras', action='store_true', help='TF: use Keras') parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile') parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization') parser.add_argument('--dynamic', action='store_true', help='ONNX/TF/TensorRT: dynamic axes') parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model') parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version') parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log') parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)') parser.add_argument('--nms', action='store_true', help='TF: add NMS to model') parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model') parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep') parser.add_argument('--topk-all', type=int, default=100, help='ONNX END2END/TF.js NMS: topk for all classes to keep') parser.add_argument('--iou-thres', type=float, default=0.45, help='ONNX END2END/TF.js NMS: IoU threshold') parser.add_argument('--conf-thres', type=float, default=0.25, help='ONNX END2END/TF.js NMS: confidence threshold') parser.add_argument( '--include', nargs='+', default=['torchscript'], help='torchscript, onnx, onnx_end2end, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle') opt = parser.parse_args() if 'onnx_end2end' in opt.include: opt.simplify = True opt.dynamic = True opt.inplace = True opt.half = False print_args(vars(opt)) return opt def main(opt): for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]): run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: hubconf.py ================================================ import torch def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None): """Creates or loads a YOLO model Arguments: name (str): model name 'yolov3' or path 'path/to/best.pt' pretrained (bool): load pretrained weights into the model channels (int): number of input channels classes (int): number of model classes autoshape (bool): apply YOLO .autoshape() wrapper to model verbose (bool): print all information to screen device (str, torch.device, None): device to use for model parameters Returns: YOLO model """ from pathlib import Path from models.common import AutoShape, DetectMultiBackend from models.experimental import attempt_load from models.yolo import ClassificationModel, DetectionModel, SegmentationModel from utils.downloads import attempt_download from utils.general import LOGGER, check_requirements, intersect_dicts, logging from utils.torch_utils import select_device if not verbose: LOGGER.setLevel(logging.WARNING) check_requirements(exclude=('opencv-python', 'tensorboard', 'thop')) name = Path(name) path = name.with_suffix('.pt') if name.suffix == '' and not name.is_dir() else name # checkpoint path try: device = select_device(device) if pretrained and channels == 3 and classes == 80: try: model = DetectMultiBackend(path, device=device, fuse=autoshape) # detection model if autoshape: if model.pt and isinstance(model.model, ClassificationModel): LOGGER.warning('WARNING ⚠️ YOLO ClassificationModel is not yet AutoShape compatible. ' 'You must pass torch tensors in BCHW to this model, i.e. shape(1,3,224,224).') elif model.pt and isinstance(model.model, SegmentationModel): LOGGER.warning('WARNING ⚠️ YOLO SegmentationModel is not yet AutoShape compatible. ' 'You will not be able to run inference with this model.') else: model = AutoShape(model) # for file/URI/PIL/cv2/np inputs and NMS except Exception: model = attempt_load(path, device=device, fuse=False) # arbitrary model else: cfg = list((Path(__file__).parent / 'models').rglob(f'{path.stem}.yaml'))[0] # model.yaml path model = DetectionModel(cfg, channels, classes) # create model if pretrained: ckpt = torch.load(attempt_download(path), map_location=device) # load csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect model.load_state_dict(csd, strict=False) # load if len(ckpt['model'].names) == classes: model.names = ckpt['model'].names # set class names attribute if not verbose: LOGGER.setLevel(logging.INFO) # reset to default return model.to(device) except Exception as e: help_url = 'https://github.com/ultralytics/yolov5/issues/36' s = f'{e}. Cache may be out of date, try `force_reload=True` or see {help_url} for help.' raise Exception(s) from e def custom(path='path/to/model.pt', autoshape=True, _verbose=True, device=None): # YOLO custom or local model return _create(path, autoshape=autoshape, verbose=_verbose, device=device) if __name__ == '__main__': import argparse from pathlib import Path import numpy as np from PIL import Image from utils.general import cv2, print_args # Argparser parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, default='yolo', help='model name') opt = parser.parse_args() print_args(vars(opt)) # Model model = _create(name=opt.model, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True) # model = custom(path='path/to/model.pt') # custom # Images imgs = [ 'data/images/zidane.jpg', # filename Path('data/images/zidane.jpg'), # Path 'https://ultralytics.com/images/zidane.jpg', # URI cv2.imread('data/images/bus.jpg')[:, :, ::-1], # OpenCV Image.open('data/images/bus.jpg'), # PIL np.zeros((320, 640, 3))] # numpy # Inference results = model(imgs, size=320) # batched inference # Results results.print() results.save() ================================================ FILE: models/__init__.py ================================================ # init ================================================ FILE: models/common.py ================================================ import ast import contextlib import json import math import platform import warnings import zipfile from collections import OrderedDict, namedtuple from copy import copy from pathlib import Path from urllib.parse import urlparse from typing import Optional import cv2 import numpy as np import pandas as pd import requests import torch import torch.nn as nn from IPython.display import display from PIL import Image from torch.cuda import amp from utils import TryExcept from utils.dataloaders import exif_transpose, letterbox from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr, increment_path, is_notebook, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy, xyxy2xywh, yaml_load) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import copy_attr, smart_inference_mode def autopad(k, p=None, d=1): # kernel, padding, dilation # Pad to 'same' shape outputs if d > 1: k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size if p is None: p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad return p class Conv(nn.Module): # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation) default_act = nn.SiLU() # default activation def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() def forward(self, x): return self.act(self.bn(self.conv(x))) def forward_fuse(self, x): return self.act(self.conv(x)) class AConv(nn.Module): def __init__(self, c1, c2): # ch_in, ch_out, shortcut, kernels, groups, expand super().__init__() self.cv1 = Conv(c1, c2, 3, 2, 1) def forward(self, x): x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True) return self.cv1(x) class ADown(nn.Module): def __init__(self, c1, c2): # ch_in, ch_out, shortcut, kernels, groups, expand super().__init__() self.c = c2 // 2 self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1) self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0) def forward(self, x): x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True) x1,x2 = x.chunk(2, 1) x1 = self.cv1(x1) x2 = torch.nn.functional.max_pool2d(x2, 3, 2, 1) x2 = self.cv2(x2) return torch.cat((x1, x2), 1) class RepConvN(nn.Module): """RepConv is a basic rep-style block, including training and deploy status This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py """ default_act = nn.SiLU() # default activation def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False): super().__init__() assert k == 3 and p == 1 self.g = g self.c1 = c1 self.c2 = c2 self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() self.bn = None self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False) self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False) def forward_fuse(self, x): """Forward process""" return self.act(self.conv(x)) def forward(self, x): """Forward process""" id_out = 0 if self.bn is None else self.bn(x) return self.act(self.conv1(x) + self.conv2(x) + id_out) def get_equivalent_kernel_bias(self): kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1) kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2) kernelid, biasid = self._fuse_bn_tensor(self.bn) return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid def _avg_to_3x3_tensor(self, avgp): channels = self.c1 groups = self.g kernel_size = avgp.kernel_size input_dim = channels // groups k = torch.zeros((channels, input_dim, kernel_size, kernel_size)) k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2 return k def _pad_1x1_to_3x3_tensor(self, kernel1x1): if kernel1x1 is None: return 0 else: return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) def _fuse_bn_tensor(self, branch): if branch is None: return 0, 0 if isinstance(branch, Conv): kernel = branch.conv.weight running_mean = branch.bn.running_mean running_var = branch.bn.running_var gamma = branch.bn.weight beta = branch.bn.bias eps = branch.bn.eps elif isinstance(branch, nn.BatchNorm2d): if not hasattr(self, 'id_tensor'): input_dim = self.c1 // self.g kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32) for i in range(self.c1): kernel_value[i, i % input_dim, 1, 1] = 1 self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) kernel = self.id_tensor running_mean = branch.running_mean running_var = branch.running_var gamma = branch.weight beta = branch.bias eps = branch.eps std = (running_var + eps).sqrt() t = (gamma / std).reshape(-1, 1, 1, 1) return kernel * t, beta - running_mean * gamma / std def fuse_convs(self): if hasattr(self, 'conv'): return kernel, bias = self.get_equivalent_kernel_bias() self.conv = nn.Conv2d(in_channels=self.conv1.conv.in_channels, out_channels=self.conv1.conv.out_channels, kernel_size=self.conv1.conv.kernel_size, stride=self.conv1.conv.stride, padding=self.conv1.conv.padding, dilation=self.conv1.conv.dilation, groups=self.conv1.conv.groups, bias=True).requires_grad_(False) self.conv.weight.data = kernel self.conv.bias.data = bias for para in self.parameters(): para.detach_() self.__delattr__('conv1') self.__delattr__('conv2') if hasattr(self, 'nm'): self.__delattr__('nm') if hasattr(self, 'bn'): self.__delattr__('bn') if hasattr(self, 'id_tensor'): self.__delattr__('id_tensor') class SP(nn.Module): def __init__(self, k=3, s=1): super(SP, self).__init__() self.m = nn.MaxPool2d(kernel_size=k, stride=s, padding=k // 2) def forward(self, x): return self.m(x) class MP(nn.Module): # Max pooling def __init__(self, k=2): super(MP, self).__init__() self.m = nn.MaxPool2d(kernel_size=k, stride=k) def forward(self, x): return self.m(x) class ConvTranspose(nn.Module): # Convolution transpose 2d layer default_act = nn.SiLU() # default activation def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True): super().__init__() self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn) self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity() self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() def forward(self, x): return self.act(self.bn(self.conv_transpose(x))) class DWConv(Conv): # Depth-wise convolution def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) class DWConvTranspose2d(nn.ConvTranspose2d): # Depth-wise transpose convolution def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) class DFL(nn.Module): # DFL module def __init__(self, c1=17): super().__init__() self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) self.conv.weight.data[:] = nn.Parameter(torch.arange(c1, dtype=torch.float).view(1, c1, 1, 1)) # / 120.0 self.c1 = c1 # self.bn = nn.BatchNorm2d(4) def forward(self, x): b, c, a = x.shape # batch, channels, anchors return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a) # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a) class BottleneckBase(nn.Module): # Standard bottleneck def __init__(self, c1, c2, shortcut=True, g=1, k=(1, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, k[0], 1) self.cv2 = Conv(c_, c2, k[1], 1, g=g) self.add = shortcut and c1 == c2 def forward(self, x): return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) class RBottleneckBase(nn.Module): # Standard bottleneck def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 1), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, k[0], 1) self.cv2 = Conv(c_, c2, k[1], 1, g=g) self.add = shortcut and c1 == c2 def forward(self, x): return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) class RepNRBottleneckBase(nn.Module): # Standard bottleneck def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 1), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = RepConvN(c1, c_, k[0], 1) self.cv2 = Conv(c_, c2, k[1], 1, g=g) self.add = shortcut and c1 == c2 def forward(self, x): return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) class Bottleneck(nn.Module): # Standard bottleneck def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, k[0], 1) self.cv2 = Conv(c_, c2, k[1], 1, g=g) self.add = shortcut and c1 == c2 def forward(self, x): return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) class RepNBottleneck(nn.Module): # Standard bottleneck def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = RepConvN(c1, c_, k[0], 1) self.cv2 = Conv(c_, c2, k[1], 1, g=g) self.add = shortcut and c1 == c2 def forward(self, x): return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) class Res(nn.Module): # ResNet bottleneck def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion super(Res, self).__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c_, c_, 3, 1, g=g) self.cv3 = Conv(c_, c2, 1, 1) self.add = shortcut and c1 == c2 def forward(self, x): return x + self.cv3(self.cv2(self.cv1(x))) if self.add else self.cv3(self.cv2(self.cv1(x))) class RepNRes(nn.Module): # ResNet bottleneck def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion super(RepNRes, self).__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = RepConvN(c_, c_, 3, 1, g=g) self.cv3 = Conv(c_, c2, 1, 1) self.add = shortcut and c1 == c2 def forward(self, x): return x + self.cv3(self.cv2(self.cv1(x))) if self.add else self.cv3(self.cv2(self.cv1(x))) class BottleneckCSP(nn.Module): # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) self.cv4 = Conv(2 * c_, c2, 1, 1) self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) self.act = nn.SiLU() self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) def forward(self, x): y1 = self.cv3(self.m(self.cv1(x))) y2 = self.cv2(x) return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1)))) class CSP(nn.Module): # CSP Bottleneck with 3 convolutions def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) class RepNCSP(nn.Module): # CSP Bottleneck with 3 convolutions def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) self.m = nn.Sequential(*(RepNBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) class CSPBase(nn.Module): # CSP Bottleneck with 3 convolutions def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) self.m = nn.Sequential(*(BottleneckBase(c_, c_, shortcut, g, e=1.0) for _ in range(n))) def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) class SPP(nn.Module): # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 def __init__(self, c1, c2, k=(5, 9, 13)): super().__init__() c_ = c1 // 2 # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) def forward(self, x): x = self.cv1(x) with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) class ASPP(torch.nn.Module): def __init__(self, in_channels, out_channels): super().__init__() kernel_sizes = [1, 3, 3, 1] dilations = [1, 3, 6, 1] paddings = [0, 3, 6, 0] self.aspp = torch.nn.ModuleList() for aspp_idx in range(len(kernel_sizes)): conv = torch.nn.Conv2d( in_channels, out_channels, kernel_size=kernel_sizes[aspp_idx], stride=1, dilation=dilations[aspp_idx], padding=paddings[aspp_idx], bias=True) self.aspp.append(conv) self.gap = torch.nn.AdaptiveAvgPool2d(1) self.aspp_num = len(kernel_sizes) for m in self.modules(): if isinstance(m, torch.nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) m.bias.data.fill_(0) def forward(self, x): avg_x = self.gap(x) out = [] for aspp_idx in range(self.aspp_num): inp = avg_x if (aspp_idx == self.aspp_num - 1) else x out.append(F.relu_(self.aspp[aspp_idx](inp))) out[-1] = out[-1].expand_as(out[-2]) out = torch.cat(out, dim=1) return out class SPPCSPC(nn.Module): # CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): super(SPPCSPC, self).__init__() c_ = int(2 * c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(c_, c_, 3, 1) self.cv4 = Conv(c_, c_, 1, 1) self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) self.cv5 = Conv(4 * c_, c_, 1, 1) self.cv6 = Conv(c_, c_, 3, 1) self.cv7 = Conv(2 * c_, c2, 1, 1) def forward(self, x): x1 = self.cv4(self.cv3(self.cv1(x))) y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1))) y2 = self.cv2(x) return self.cv7(torch.cat((y1, y2), dim=1)) class SPPF(nn.Module): # Spatial Pyramid Pooling - Fast (SPPF) layer by Glenn Jocher def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) super().__init__() c_ = c1 // 2 # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c_ * 4, c2, 1, 1) self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) # self.m = SoftPool2d(kernel_size=k, stride=1, padding=k // 2) def forward(self, x): x = self.cv1(x) with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning y1 = self.m(x) y2 = self.m(y1) return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) import torch.nn.functional as F from torch.nn.modules.utils import _pair class ReOrg(nn.Module): # yolo def __init__(self): super(ReOrg, self).__init__() def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) class Contract(nn.Module): # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) def __init__(self, gain=2): super().__init__() self.gain = gain def forward(self, x): b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain' s = self.gain x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2) x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40) class Expand(nn.Module): # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) def __init__(self, gain=2): super().__init__() self.gain = gain def forward(self, x): b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' s = self.gain x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80) x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160) class Concat(nn.Module): # Concatenate a list of tensors along dimension def __init__(self, dimension=1): super().__init__() self.d = dimension def forward(self, x): return torch.cat(x, self.d) class Shortcut(nn.Module): def __init__(self, dimension=0): super(Shortcut, self).__init__() self.d = dimension def forward(self, x): return x[0]+x[1] class Silence(nn.Module): def __init__(self): super(Silence, self).__init__() def forward(self, x): return x ##### GELAN ##### class SPPELAN(nn.Module): # spp-elan def __init__(self, c1, c2, c3): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() self.c = c3 self.cv1 = Conv(c1, c3, 1, 1) self.cv2 = SP(5) self.cv3 = SP(5) self.cv4 = SP(5) self.cv5 = Conv(4*c3, c2, 1, 1) def forward(self, x): y = [self.cv1(x)] y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4]) return self.cv5(torch.cat(y, 1)) class ELAN1(nn.Module): def __init__(self, c1, c2, c3, c4): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() self.c = c3//2 self.cv1 = Conv(c1, c3, 1, 1) self.cv2 = Conv(c3//2, c4, 3, 1) self.cv3 = Conv(c4, c4, 3, 1) self.cv4 = Conv(c3+(2*c4), c2, 1, 1) def forward(self, x): y = list(self.cv1(x).chunk(2, 1)) y.extend(m(y[-1]) for m in [self.cv2, self.cv3]) return self.cv4(torch.cat(y, 1)) def forward_split(self, x): y = list(self.cv1(x).split((self.c, self.c), 1)) y.extend(m(y[-1]) for m in [self.cv2, self.cv3]) return self.cv4(torch.cat(y, 1)) class RepNCSPELAN4(nn.Module): # csp-elan def __init__(self, c1, c2, c3, c4, c5=1): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() self.c = c3//2 self.cv1 = Conv(c1, c3, 1, 1) self.cv2 = nn.Sequential(RepNCSP(c3//2, c4, c5), Conv(c4, c4, 3, 1)) self.cv3 = nn.Sequential(RepNCSP(c4, c4, c5), Conv(c4, c4, 3, 1)) self.cv4 = Conv(c3+(2*c4), c2, 1, 1) def forward(self, x): y = list(self.cv1(x).chunk(2, 1)) y.extend((m(y[-1])) for m in [self.cv2, self.cv3]) return self.cv4(torch.cat(y, 1)) def forward_split(self, x): y = list(self.cv1(x).split((self.c, self.c), 1)) y.extend(m(y[-1]) for m in [self.cv2, self.cv3]) return self.cv4(torch.cat(y, 1)) ################# ##### YOLOR ##### class ImplicitA(nn.Module): def __init__(self, channel): super(ImplicitA, self).__init__() self.channel = channel self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) nn.init.normal_(self.implicit, std=.02) def forward(self, x): return self.implicit + x class ImplicitM(nn.Module): def __init__(self, channel): super(ImplicitM, self).__init__() self.channel = channel self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) nn.init.normal_(self.implicit, mean=1., std=.02) def forward(self, x): return self.implicit * x ################# ##### CBNet ##### class CBLinear(nn.Module): def __init__(self, c1, c2s, k=1, s=1, p=None, g=1): # ch_in, ch_outs, kernel, stride, padding, groups super(CBLinear, self).__init__() self.c2s = c2s self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True) def forward(self, x): outs = self.conv(x).split(self.c2s, dim=1) return outs class CBFuse(nn.Module): def __init__(self, idx): super(CBFuse, self).__init__() self.idx = idx def forward(self, xs): target_size = xs[-1].shape[2:] res = [F.interpolate(x[self.idx[i]], size=target_size, mode='nearest') for i, x in enumerate(xs[:-1])] out = torch.sum(torch.stack(res + xs[-1:]), dim=0) return out ################# class DetectMultiBackend(nn.Module): # YOLO MultiBackend class for python inference on various backends def __init__(self, weights='yolo.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True): # Usage: # PyTorch: weights = *.pt # TorchScript: *.torchscript # ONNX Runtime: *.onnx # ONNX OpenCV DNN: *.onnx --dnn # OpenVINO: *_openvino_model # CoreML: *.mlmodel # TensorRT: *.engine # TensorFlow SavedModel: *_saved_model # TensorFlow GraphDef: *.pb # TensorFlow Lite: *.tflite # TensorFlow Edge TPU: *_edgetpu.tflite # PaddlePaddle: *_paddle_model from models.experimental import attempt_download, attempt_load # scoped to avoid circular import super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) pt, jit, onnx, onnx_end2end, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w) fp16 &= pt or jit or onnx or engine # FP16 nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) stride = 32 # default stride cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA if not (pt or triton): w = attempt_download(w) # download if not local if pt: # PyTorch model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files, map_location=device) model.half() if fp16 else model.float() if extra_files['config.txt']: # load metadata dict d = json.loads(extra_files['config.txt'], object_hook=lambda d: {int(k) if k.isdigit() else k: v for k, v in d.items()}) stride, names = int(d['stride']), d['names'] elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements('opencv-python>=4.5.4') net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) import onnxruntime providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] session = onnxruntime.InferenceSession(w, providers=providers) output_names = [x.name for x in session.get_outputs()] meta = session.get_modelmeta().custom_metadata_map # metadata if 'stride' in meta: stride, names = int(meta['stride']), eval(meta['names']) elif xml: # OpenVINO LOGGER.info(f'Loading {w} for OpenVINO inference...') check_requirements('openvino') # requires openvino-dev: https://pypi.org/project/openvino-dev/ from openvino.runtime import Core, Layout, get_batch ie = Core() if not Path(w).is_file(): # if not *.xml w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin')) if network.get_parameters()[0].get_layout().empty: network.get_parameters()[0].set_layout(Layout("NCHW")) batch_dim = get_batch(network) if batch_dim.is_static: batch_size = batch_dim.get_length() executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2 stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata elif engine: # TensorRT LOGGER.info(f'Loading {w} for TensorRT inference...') import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 if device.type == 'cpu': device = torch.device('cuda:0') Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) logger = trt.Logger(trt.Logger.INFO) with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) context = model.create_execution_context() bindings = OrderedDict() output_names = [] fp16 = False # default updated below dynamic = False for i in range(model.num_bindings): name = model.get_binding_name(i) dtype = trt.nptype(model.get_binding_dtype(i)) if model.binding_is_input(i): if -1 in tuple(model.get_binding_shape(i)): # dynamic dynamic = True context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2])) if dtype == np.float16: fp16 = True else: # output output_names.append(name) shape = tuple(context.get_binding_shape(i)) im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr())) binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size elif coreml: # CoreML LOGGER.info(f'Loading {w} for CoreML inference...') import coremltools as ct model = ct.models.MLModel(w) elif saved_model: # TF SavedModel LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...') import tensorflow as tf keras = False # assume TF1 saved_model model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w) elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...') import tensorflow as tf def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped ge = x.graph.as_graph_element return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) def gd_outputs(gd): name_list, input_list = [], [] for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef name_list.append(node.name) input_list.extend(node.input) return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp')) gd = tf.Graph().as_graph_def() # TF GraphDef with open(w, 'rb') as f: gd.ParseFromString(f.read()) frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd)) elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu from tflite_runtime.interpreter import Interpreter, load_delegate except ImportError: import tensorflow as tf Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') delegate = { 'Linux': 'libedgetpu.so.1', 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll'}[platform.system()] interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) else: # TFLite LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') interpreter = Interpreter(model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs # load metadata with contextlib.suppress(zipfile.BadZipFile): with zipfile.ZipFile(w, "r") as model: meta_file = model.namelist()[0] meta = ast.literal_eval(model.read(meta_file).decode("utf-8")) stride, names = int(meta['stride']), meta['names'] elif tfjs: # TF.js raise NotImplementedError('ERROR: YOLO TF.js inference is not supported') elif paddle: # PaddlePaddle LOGGER.info(f'Loading {w} for PaddlePaddle inference...') check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle') import paddle.inference as pdi if not Path(w).is_file(): # if not *.pdmodel w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir weights = Path(w).with_suffix('.pdiparams') config = pdi.Config(str(w), str(weights)) if cuda: config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0) predictor = pdi.create_predictor(config) input_handle = predictor.get_input_handle(predictor.get_input_names()[0]) output_names = predictor.get_output_names() elif triton: # NVIDIA Triton Inference Server LOGGER.info(f'Using {w} as Triton Inference Server...') check_requirements('tritonclient[all]') from utils.triton import TritonRemoteModel model = TritonRemoteModel(url=w) nhwc = model.runtime.startswith("tensorflow") else: raise NotImplementedError(f'ERROR: {w} is not a supported format') # class names if 'names' not in locals(): names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)} if names[0] == 'n01440764' and len(names) == 1000: # ImageNet names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names self.__dict__.update(locals()) # assign all variables to self def forward(self, im, augment=False, visualize=False): # YOLO MultiBackend inference b, ch, h, w = im.shape # batch, channel, height, width if self.fp16 and im.dtype != torch.float16: im = im.half() # to FP16 if self.nhwc: im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3) if self.pt: # PyTorch y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im) elif self.jit: # TorchScript y = self.model(im) elif self.dnn: # ONNX OpenCV DNN im = im.cpu().numpy() # torch to numpy self.net.setInput(im) y = self.net.forward() elif self.onnx: # ONNX Runtime im = im.cpu().numpy() # torch to numpy y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) elif self.xml: # OpenVINO im = im.cpu().numpy() # FP32 y = list(self.executable_network([im]).values()) elif self.engine: # TensorRT if self.dynamic and im.shape != self.bindings['images'].shape: i = self.model.get_binding_index('images') self.context.set_binding_shape(i, im.shape) # reshape if dynamic self.bindings['images'] = self.bindings['images']._replace(shape=im.shape) for name in self.output_names: i = self.model.get_binding_index(name) self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i))) s = self.bindings['images'].shape assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}" self.binding_addrs['images'] = int(im.data_ptr()) self.context.execute_v2(list(self.binding_addrs.values())) y = [self.bindings[x].data for x in sorted(self.output_names)] elif self.coreml: # CoreML im = im.cpu().numpy() im = Image.fromarray((im[0] * 255).astype('uint8')) # im = im.resize((192, 320), Image.ANTIALIAS) y = self.model.predict({'image': im}) # coordinates are xywh normalized if 'confidence' in y: box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) else: y = list(reversed(y.values())) # reversed for segmentation models (pred, proto) elif self.paddle: # PaddlePaddle im = im.cpu().numpy().astype(np.float32) self.input_handle.copy_from_cpu(im) self.predictor.run() y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names] elif self.triton: # NVIDIA Triton Inference Server y = self.model(im) else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) im = im.cpu().numpy() if self.saved_model: # SavedModel y = self.model(im, training=False) if self.keras else self.model(im) elif self.pb: # GraphDef y = self.frozen_func(x=self.tf.constant(im)) else: # Lite or Edge TPU input = self.input_details[0] int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model if int8: scale, zero_point = input['quantization'] im = (im / scale + zero_point).astype(np.uint8) # de-scale self.interpreter.set_tensor(input['index'], im) self.interpreter.invoke() y = [] for output in self.output_details: x = self.interpreter.get_tensor(output['index']) if int8: scale, zero_point = output['quantization'] x = (x.astype(np.float32) - zero_point) * scale # re-scale y.append(x) y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y] y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels if isinstance(y, (list, tuple)): return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] else: return self.from_numpy(y) def from_numpy(self, x): return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x def warmup(self, imgsz=(1, 3, 640, 640)): # Warmup model by running inference once warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton if any(warmup_types) and (self.device.type != 'cpu' or self.triton): im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input for _ in range(2 if self.jit else 1): # self.forward(im) # warmup @staticmethod def _model_type(p='path/to/model.pt'): # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle] from export import export_formats from utils.downloads import is_url sf = list(export_formats().Suffix) # export suffixes if not is_url(p, check=False): check_suffix(p, sf) # checks url = urlparse(p) # if url may be Triton inference server types = [s in Path(p).name for s in sf] types[8] &= not types[9] # tflite &= not edgetpu triton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc]) return types + [triton] @staticmethod def _load_metadata(f=Path('path/to/meta.yaml')): # Load metadata from meta.yaml if it exists if f.exists(): d = yaml_load(f) return d['stride'], d['names'] # assign stride, names return None, None class AutoShape(nn.Module): # YOLO input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS conf = 0.25 # NMS confidence threshold iou = 0.45 # NMS IoU threshold agnostic = False # NMS class-agnostic multi_label = False # NMS multiple labels per box classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs max_det = 1000 # maximum number of detections per image amp = False # Automatic Mixed Precision (AMP) inference def __init__(self, model, verbose=True): super().__init__() if verbose: LOGGER.info('Adding AutoShape... ') copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance self.pt = not self.dmb or model.pt # PyTorch model self.model = model.eval() if self.pt: m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() m.inplace = False # Detect.inplace=False for safe multithread inference m.export = True # do not output loss values def _apply(self, fn): # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers self = super()._apply(fn) from models.yolo import Detect, Segment if self.pt: m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() if isinstance(m, (Detect, Segment)): for k in 'stride', 'anchor_grid', 'stride_grid', 'grid': x = getattr(m, k) setattr(m, k, list(map(fn, x))) if isinstance(x, (list, tuple)) else setattr(m, k, fn(x)) return self @smart_inference_mode() def forward(self, ims, size=640, augment=False, profile=False): # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are: # file: ims = 'data/images/zidane.jpg' # str or PosixPath # URI: = 'https://ultralytics.com/images/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) # numpy: = np.zeros((640,1280,3)) # HWC # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images dt = (Profile(), Profile(), Profile()) with dt[0]: if isinstance(size, int): # expand size = (size, size) p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference if isinstance(ims, torch.Tensor): # torch with amp.autocast(autocast): return self.model(ims.to(p.device).type_as(p), augment=augment) # inference # Pre-process n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images shape0, shape1, files = [], [], [] # image and inference shapes, filenames for i, im in enumerate(ims): f = f'image{i}' # filename if isinstance(im, (str, Path)): # filename or uri im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im im = np.asarray(exif_transpose(im)) elif isinstance(im, Image.Image): # PIL Image im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f files.append(Path(f).with_suffix('.jpg').name) if im.shape[0] < 5: # image in CHW im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = max(size) / max(s) # gain shape1.append([int(y * g) for y in s]) ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 with amp.autocast(autocast): # Inference with dt[1]: y = self.model(x, augment=augment) # forward # Post-process with dt[2]: y = non_max_suppression(y if self.dmb else y[0], self.conf, self.iou, self.classes, self.agnostic, self.multi_label, max_det=self.max_det) # NMS for i in range(n): scale_boxes(shape1, y[i][:, :4], shape0[i]) return Detections(ims, y, files, dt, self.names, x.shape) class Detections: # YOLO detections class for inference results def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None): super().__init__() d = pred[0].device # device gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations self.ims = ims # list of images as numpy arrays self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) self.names = names # class names self.files = files # image filenames self.times = times # profiling times self.xyxy = pred # xyxy pixels self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized self.n = len(self.pred) # number of images (batch size) self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms) self.s = tuple(shape) # inference BCHW shape def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')): s, crops = '', [] for i, (im, pred) in enumerate(zip(self.ims, self.pred)): s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string if pred.shape[0]: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string s = s.rstrip(', ') if show or save or render or crop: annotator = Annotator(im, example=str(self.names)) for *box, conf, cls in reversed(pred): # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None crops.append({ 'box': box, 'conf': conf, 'cls': cls, 'label': label, 'im': save_one_box(box, im, file=file, save=save)}) else: # all others annotator.box_label(box, label if labels else '', color=colors(cls)) im = annotator.im else: s += '(no detections)' im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np if show: display(im) if is_notebook() else im.show(self.files[i]) if save: f = self.files[i] im.save(save_dir / f) # save if i == self.n - 1: LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") if render: self.ims[i] = np.asarray(im) if pprint: s = s.lstrip('\n') return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t if crop: if save: LOGGER.info(f'Saved results to {save_dir}\n') return crops @TryExcept('Showing images is not supported in this environment') def show(self, labels=True): self._run(show=True, labels=labels) # show results def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False): save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir self._run(save=True, labels=labels, save_dir=save_dir) # save results def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False): save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None return self._run(crop=True, save=save, save_dir=save_dir) # crop results def render(self, labels=True): self._run(render=True, labels=labels) # render results return self.ims def pandas(self): # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) new = copy(self) # return copy ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) return new def tolist(self): # return a list of Detections objects, i.e. 'for result in results.tolist():' r = range(self.n) # iterable x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r] # for d in x: # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: # setattr(d, k, getattr(d, k)[0]) # pop out of list return x def print(self): LOGGER.info(self.__str__()) def __len__(self): # override len(results) return self.n def __str__(self): # override print(results) return self._run(pprint=True) # print results def __repr__(self): return f'YOLO {self.__class__} instance\n' + self.__str__() class Proto(nn.Module): # YOLO mask Proto module for segmentation models def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks super().__init__() self.cv1 = Conv(c1, c_, k=3) self.upsample = nn.Upsample(scale_factor=2, mode='nearest') self.cv2 = Conv(c_, c_, k=3) self.cv3 = Conv(c_, c2) def forward(self, x): return self.cv3(self.cv2(self.upsample(self.cv1(x)))) class UConv(nn.Module): def __init__(self, c1, c_=256, c2=256): # ch_in, number of protos, number of masks super().__init__() self.cv1 = Conv(c1, c_, k=3) self.cv2 = nn.Conv2d(c_, c2, 1, 1) self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) def forward(self, x): return self.up(self.cv2(self.cv1(x))) class Classify(nn.Module): # YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2) def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups super().__init__() c_ = 1280 # efficientnet_b0 size self.conv = Conv(c1, c_, k, s, autopad(k, p), g) self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1) self.drop = nn.Dropout(p=0.0, inplace=True) self.linear = nn.Linear(c_, c2) # to x(b,c2) def forward(self, x): if isinstance(x, list): x = torch.cat(x, 1) return self.linear(self.drop(self.pool(self.conv(x)).flatten(1))) ================================================ FILE: models/detect/gelan-c.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 # avg-conv down [-1, 1, ADown, [256]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 # avg-conv down [-1, 1, ADown, [512]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 # avg-conv down [-1, 1, ADown, [512]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 ] # gelan head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) # detect [[15, 18, 21], 1, DDetect, [nc]], # DDetect(P3, P4, P5) ] ================================================ FILE: models/detect/gelan-e.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ [-1, 1, Silence, []], # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3 # avg-conv down [-1, 1, ADown, [256]], # 4-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5 # avg-conv down [-1, 1, ADown, [512]], # 6-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7 # avg-conv down [-1, 1, ADown, [1024]], # 8-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9 # routing [1, 1, CBLinear, [[64]]], # 10 [3, 1, CBLinear, [[64, 128]]], # 11 [5, 1, CBLinear, [[64, 128, 256]]], # 12 [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13 [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14 # conv down fuse [0, 1, Conv, [64, 3, 2]], # 15-P1/2 [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16 # conv down fuse [-1, 1, Conv, [128, 3, 2]], # 17-P2/4 [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19 # avg-conv down fuse [-1, 1, ADown, [256]], # 20-P3/8 [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22 # avg-conv down fuse [-1, 1, ADown, [512]], # 23-P4/16 [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24 # elan-2 block [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25 # avg-conv down fuse [-1, 1, ADown, [1024]], # 26-P5/32 [[14, -1], 1, CBFuse, [[4]]], # 27 # elan-2 block [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28 ] # gelan head head: [ # elan-spp block [28, 1, SPPELAN, [512, 256]], # 29 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 25], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 22], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 32], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 38 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 29], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 41 (P5/32-large) # detect [[35, 38, 41], 1, DDetect, [nc]], # Detect(P3, P4, P5) ] ================================================ FILE: models/detect/gelan-m.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [32, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 1]], # 2 # avg-conv down [-1, 1, AConv, [240]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [240, 240, 120, 1]], # 4 # avg-conv down [-1, 1, AConv, [360]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [360, 360, 180, 1]], # 6 # avg-conv down [-1, 1, AConv, [480]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [480, 480, 240, 1]], # 8 ] # elan head head: [ # elan-spp block [-1, 1, SPPELAN, [480, 240]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [360, 360, 180, 1]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [240, 240, 120, 1]], # 15 # avg-conv-down merge [-1, 1, AConv, [180]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [360, 360, 180, 1]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, AConv, [240]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [480, 480, 240, 1]], # 21 (P5/32-large) # detect [[15, 18, 21], 1, DDetect, [nc]], # DDetect(P3, P4, P5) ] ================================================ FILE: models/detect/gelan-s.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [32, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, ELAN1, [64, 64, 32]], # 2 # avg-conv down [-1, 1, AConv, [128]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 4 # avg-conv down [-1, 1, AConv, [192]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [192, 192, 96, 3]], # 6 # avg-conv down [-1, 1, AConv, [256]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 3]], # 8 ] # elan head head: [ # elan-spp block [-1, 1, SPPELAN, [256, 128]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [192, 192, 96, 3]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 15 # avg-conv-down merge [-1, 1, AConv, [96]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [192, 192, 96, 3]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, AConv, [128]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 3]], # 21 (P5/32-large) # detect [[15, 18, 21], 1, DDetect, [nc]], # DDetect(P3, P4, P5) ] ================================================ FILE: models/detect/gelan-t.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [16, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [32, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, ELAN1, [32, 32, 16]], # 2 # avg-conv down [-1, 1, AConv, [64]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 4 # avg-conv down [-1, 1, AConv, [96]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 6 # avg-conv down [-1, 1, AConv, [128]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 8 ] # elan head head: [ # elan-spp block [-1, 1, SPPELAN, [128, 64]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 15 # avg-conv-down merge [-1, 1, AConv, [48]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, AConv, [64]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 21 (P5/32-large) # detect [[15, 18, 21], 1, DDetect, [nc]], # DDetect(P3, P4, P5) ] ================================================ FILE: models/detect/gelan.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 # avg-conv down [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 # avg-conv down [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 # avg-conv down [-1, 1, Conv, [512, 3, 2]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 ] # gelan head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) # avg-conv-down merge [-1, 1, Conv, [256, 3, 2]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, Conv, [512, 3, 2]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) # detect [[15, 18, 21], 1, DDetect, [nc]], # Detect(P3, P4, P5) ] ================================================ FILE: models/detect/yolov7-af.yaml ================================================ # YOLOv7 # Parameters nc: 80 # number of classes depth_multiple: 1. # model depth multiple width_multiple: 1. # layer channel multiple anchors: 3 # YOLOv7 backbone backbone: # [from, number, module, args] [[-1, 1, Conv, [32, 3, 1]], # 0 [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 [-1, 1, Conv, [64, 1, 1]], [-2, 1, Conv, [64, 1, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 11 [-1, 1, MP, []], [-1, 1, Conv, [128, 1, 1]], [-3, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 2]], [[-1, -3], 1, Concat, [1]], # 16-P3/8 [-1, 1, Conv, [128, 1, 1]], [-2, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], # 24 [-1, 1, MP, []], [-1, 1, Conv, [256, 1, 1]], [-3, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 2]], [[-1, -3], 1, Concat, [1]], # 29-P4/16 [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [1024, 1, 1]], # 37 [-1, 1, MP, []], [-1, 1, Conv, [512, 1, 1]], [-3, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [512, 3, 2]], [[-1, -3], 1, Concat, [1]], # 42-P5/32 [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [1024, 1, 1]], # 50 ] # yolov7 head head: [[-1, 1, SPPCSPC, [512]], # 51 [-1, 1, Conv, [256, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [37, 1, Conv, [256, 1, 1]], # route backbone P4 [[-1, -2], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 63 [-1, 1, Conv, [128, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [24, 1, Conv, [128, 1, 1]], # route backbone P3 [[-1, -2], 1, Concat, [1]], [-1, 1, Conv, [128, 1, 1]], [-2, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [128, 1, 1]], # 75 [-1, 1, MP, []], [-1, 1, Conv, [128, 1, 1]], [-3, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 2]], [[-1, -3, 63], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 88 [-1, 1, MP, []], [-1, 1, Conv, [256, 1, 1]], [-3, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 2]], [[-1, -3, 51], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], [-2, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], # 101 [75, 1, Conv, [256, 3, 1]], [88, 1, Conv, [512, 3, 1]], [101, 1, Conv, [1024, 3, 1]], [[102, 103, 104], 1, Detect, [nc]], # Detect(P3, P4, P5) ] ================================================ FILE: models/detect/yolov9-c.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # YOLOv9 backbone backbone: [ [-1, 1, Silence, []], # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3 # avg-conv down [-1, 1, ADown, [256]], # 4-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5 # avg-conv down [-1, 1, ADown, [512]], # 6-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7 # avg-conv down [-1, 1, ADown, [512]], # 8-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9 ] # YOLOv9 head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 10 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 7], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 5], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 13], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 10], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large) # multi-level reversible auxiliary branch # routing [5, 1, CBLinear, [[256]]], # 23 [7, 1, CBLinear, [[256, 512]]], # 24 [9, 1, CBLinear, [[256, 512, 512]]], # 25 # conv down [0, 1, Conv, [64, 3, 2]], # 26-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 27-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28 # avg-conv down fuse [-1, 1, ADown, [256]], # 29-P3/8 [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31 # avg-conv down fuse [-1, 1, ADown, [512]], # 32-P4/16 [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34 # avg-conv down fuse [-1, 1, ADown, [512]], # 35-P5/32 [[25, -1], 1, CBFuse, [[2]]], # 36 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37 # detection head # detect [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) ] ================================================ FILE: models/detect/yolov9-cf.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # YOLOv9 backbone backbone: [ [-1, 1, Silence, []], # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3 # avg-conv down [-1, 1, ADown, [256]], # 4-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5 # avg-conv down [-1, 1, ADown, [512]], # 6-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7 # avg-conv down [-1, 1, ADown, [512]], # 8-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9 ] # YOLOv9 head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 10 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 7], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 5], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 13], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 10], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large) # multi-level reversible auxiliary branch # routing [5, 1, CBLinear, [[256]]], # 23 [7, 1, CBLinear, [[256, 512]]], # 24 [9, 1, CBLinear, [[256, 512, 512]]], # 25 # conv down [0, 1, Conv, [64, 3, 2]], # 26-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 27-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28 # avg-conv down fuse [-1, 1, ADown, [256]], # 29-P3/8 [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31 # avg-conv down fuse [-1, 1, ADown, [512]], # 32-P4/16 [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34 # avg-conv down fuse [-1, 1, ADown, [512]], # 35-P5/32 [[25, -1], 1, CBFuse, [[2]]], # 36 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37 # detection head # detect [[31, 34, 37, 16, 19, 22, 16, 19, 22], 1, TripleDDetect, [nc]], # TripleDDetect(A3, A4, A5, P3, P4, P5, P3, P4, P5) Auxiliary/Coarse(NMS-based)/Fine(NMS-free) ] ================================================ FILE: models/detect/yolov9-e.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # YOLOv9 backbone backbone: [ [-1, 1, Silence, []], # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 # csp-elan block [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3 # avg-conv down [-1, 1, ADown, [256]], # 4-P3/8 # csp-elan block [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5 # avg-conv down [-1, 1, ADown, [512]], # 6-P4/16 # csp-elan block [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7 # avg-conv down [-1, 1, ADown, [1024]], # 8-P5/32 # csp-elan block [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9 # routing [1, 1, CBLinear, [[64]]], # 10 [3, 1, CBLinear, [[64, 128]]], # 11 [5, 1, CBLinear, [[64, 128, 256]]], # 12 [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13 [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14 # conv down [0, 1, Conv, [64, 3, 2]], # 15-P1/2 [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16 # conv down [-1, 1, Conv, [128, 3, 2]], # 17-P2/4 [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18 # csp-elan block [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19 # avg-conv down fuse [-1, 1, ADown, [256]], # 20-P3/8 [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21 # csp-elan block [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22 # avg-conv down fuse [-1, 1, ADown, [512]], # 23-P4/16 [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24 # csp-elan block [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25 # avg-conv down fuse [-1, 1, ADown, [1024]], # 26-P5/32 [[14, -1], 1, CBFuse, [[4]]], # 27 # csp-elan block [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28 ] # YOLOv9 head head: [ # multi-level auxiliary branch # elan-spp block [9, 1, SPPELAN, [512, 256]], # 29 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 7], 1, Concat, [1]], # cat backbone P4 # csp-elan block [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 5], 1, Concat, [1]], # cat backbone P3 # csp-elan block [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 # main branch # elan-spp block [28, 1, SPPELAN, [512, 256]], # 36 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 25], 1, Concat, [1]], # cat backbone P4 # csp-elan block [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 39 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 22], 1, Concat, [1]], # cat backbone P3 # csp-elan block [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 42 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 39], 1, Concat, [1]], # cat head P4 # csp-elan block [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 45 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 36], 1, Concat, [1]], # cat head P5 # csp-elan block [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 48 (P5/32-large) # detect [[35, 32, 29, 42, 45, 48], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) ] ================================================ FILE: models/detect/yolov9-m.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ [-1, 1, Silence, []], # conv down [-1, 1, Conv, [32, 3, 2]], # 1-P1/2 # conv down [-1, 1, Conv, [64, 3, 2]], # 2-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 1]], # 3 # avg-conv down [-1, 1, AConv, [240]], # 4-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [240, 240, 120, 1]], # 5 # avg-conv down [-1, 1, AConv, [360]], # 6-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [360, 360, 180, 1]], # 7 # avg-conv down [-1, 1, AConv, [480]], # 8-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [480, 480, 240, 1]], # 9 ] # elan head head: [ # elan-spp block [-1, 1, SPPELAN, [480, 240]], # 10 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 7], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [360, 360, 180, 1]], # 13 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 5], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [240, 240, 120, 1]], # 16 # avg-conv-down merge [-1, 1, AConv, [180]], [[-1, 13], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [360, 360, 180, 1]], # 19 (P4/16-medium) # avg-conv-down merge [-1, 1, AConv, [240]], [[-1, 10], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [480, 480, 240, 1]], # 22 (P5/32-large) # routing [5, 1, CBLinear, [[240]]], # 23 [7, 1, CBLinear, [[240, 360]]], # 24 [9, 1, CBLinear, [[240, 360, 480]]], # 25 # conv down [0, 1, Conv, [32, 3, 2]], # 26-P1/2 # conv down [-1, 1, Conv, [64, 3, 2]], # 27-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 1]], # 28 # avg-conv down [-1, 1, AConv, [240]], # 29-P3/8 [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 # elan-2 block [-1, 1, RepNCSPELAN4, [240, 240, 120, 1]], # 31 # avg-conv down [-1, 1, AConv, [360]], # 32-P4/16 [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 # elan-2 block [-1, 1, RepNCSPELAN4, [360, 360, 180, 1]], # 34 # avg-conv down [-1, 1, AConv, [480]], # 35-P5/32 [[25, -1], 1, CBFuse, [[2]]], # 36 # elan-2 block [-1, 1, RepNCSPELAN4, [480, 480, 240, 1]], # 37 # detect [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # Detect(P3, P4, P5) ] ================================================ FILE: models/detect/yolov9-s.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [32, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, ELAN1, [64, 64, 32]], # 2 # avg-conv down [-1, 1, AConv, [128]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 4 # avg-conv down [-1, 1, AConv, [192]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [192, 192, 96, 3]], # 6 # avg-conv down [-1, 1, AConv, [256]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 3]], # 8 ] # elan head head: [ # elan-spp block [-1, 1, SPPELAN, [256, 128]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [192, 192, 96, 3]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 15 # avg-conv-down merge [-1, 1, AConv, [96]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [192, 192, 96, 3]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, AConv, [128]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 3]], # 21 (P5/32-large) # elan-spp block [8, 1, SPPELAN, [256, 128]], # 22 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [192, 192, 96, 3]], # 25 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 28 # detect [[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]], # Detect(P3, P4, P5) ] ================================================ FILE: models/detect/yolov9-t.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [16, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [32, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, ELAN1, [32, 32, 16]], # 2 # avg-conv down [-1, 1, AConv, [64]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 4 # avg-conv down [-1, 1, AConv, [96]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 6 # avg-conv down [-1, 1, AConv, [128]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 8 ] # elan head head: [ # elan-spp block [-1, 1, SPPELAN, [128, 64]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 15 # avg-conv-down merge [-1, 1, AConv, [48]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, AConv, [64]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [128, 128, 64, 3]], # 21 (P5/32-large) # elan-spp block [8, 1, SPPELAN, [128, 64]], # 22 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [96, 96, 48, 3]], # 25 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [64, 64, 32, 3]], # 28 # detect [[28, 25, 22, 15, 18, 21], 1, DualDDetect, [nc]], # Detect(P3, P4, P5) ] ================================================ FILE: models/detect/yolov9.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) activation: nn.ReLU() # anchors anchors: 3 # YOLOv9 backbone backbone: [ [-1, 1, Silence, []], # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3 # conv down [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5 # conv down [-1, 1, Conv, [512, 3, 2]], # 6-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7 # conv down [-1, 1, Conv, [512, 3, 2]], # 8-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9 ] # YOLOv9 head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 10 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 7], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 5], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small) # conv-down merge [-1, 1, Conv, [256, 3, 2]], [[-1, 13], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium) # conv-down merge [-1, 1, Conv, [512, 3, 2]], [[-1, 10], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large) # routing [5, 1, CBLinear, [[256]]], # 23 [7, 1, CBLinear, [[256, 512]]], # 24 [9, 1, CBLinear, [[256, 512, 512]]], # 25 # conv down [0, 1, Conv, [64, 3, 2]], # 26-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 27-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28 # conv down fuse [-1, 1, Conv, [256, 3, 2]], # 29-P3/8 [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31 # conv down fuse [-1, 1, Conv, [512, 3, 2]], # 32-P4/16 [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34 # conv down fuse [-1, 1, Conv, [512, 3, 2]], # 35-P5/32 [[25, -1], 1, CBFuse, [[2]]], # 36 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37 # detect [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) ] ================================================ FILE: models/experimental.py ================================================ import math import numpy as np import torch import torch.nn as nn from utils.downloads import attempt_download class Sum(nn.Module): # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 def __init__(self, n, weight=False): # n: number of inputs super().__init__() self.weight = weight # apply weights boolean self.iter = range(n - 1) # iter object if weight: self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights def forward(self, x): y = x[0] # no weight if self.weight: w = torch.sigmoid(self.w) * 2 for i in self.iter: y = y + x[i + 1] * w[i] else: for i in self.iter: y = y + x[i + 1] return y class MixConv2d(nn.Module): # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy super().__init__() n = len(k) # number of convolutions if equal_ch: # equal c_ per group i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices c_ = [(i == g).sum() for g in range(n)] # intermediate channels else: # equal weight.numel() per group b = [c2] + [0] * n a = np.eye(n + 1, n, k=-1) a -= np.roll(a, 1, axis=1) a *= np.array(k) ** 2 a[0] = 1 c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b self.m = nn.ModuleList([ nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) self.bn = nn.BatchNorm2d(c2) self.act = nn.SiLU() def forward(self, x): return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) class Ensemble(nn.ModuleList): # Ensemble of models def __init__(self): super().__init__() def forward(self, x, augment=False, profile=False, visualize=False): y = [module(x, augment, profile, visualize)[0] for module in self] # y = torch.stack(y).max(0)[0] # max ensemble # y = torch.stack(y).mean(0) # mean ensemble y = torch.cat(y, 1) # nms ensemble return y, None # inference, train output class ORT_NMS(torch.autograd.Function): '''ONNX-Runtime NMS operation''' @staticmethod def forward(ctx, boxes, scores, max_output_boxes_per_class=torch.tensor([100]), iou_threshold=torch.tensor([0.45]), score_threshold=torch.tensor([0.25])): device = boxes.device batch = scores.shape[0] num_det = random.randint(0, 100) batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device) idxs = torch.arange(100, 100 + num_det).to(device) zeros = torch.zeros((num_det,), dtype=torch.int64).to(device) selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous() selected_indices = selected_indices.to(torch.int64) return selected_indices @staticmethod def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold): return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold) class TRT_NMS(torch.autograd.Function): '''TensorRT NMS operation''' @staticmethod def forward( ctx, boxes, scores, background_class=-1, box_coding=1, iou_threshold=0.45, max_output_boxes=100, plugin_version="1", score_activation=0, score_threshold=0.25, ): batch_size, num_boxes, num_classes = scores.shape num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32) det_boxes = torch.randn(batch_size, max_output_boxes, 4) det_scores = torch.randn(batch_size, max_output_boxes) det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32) return num_det, det_boxes, det_scores, det_classes @staticmethod def symbolic(g, boxes, scores, background_class=-1, box_coding=1, iou_threshold=0.45, max_output_boxes=100, plugin_version="1", score_activation=0, score_threshold=0.25): out = g.op("TRT::EfficientNMS_TRT", boxes, scores, background_class_i=background_class, box_coding_i=box_coding, iou_threshold_f=iou_threshold, max_output_boxes_i=max_output_boxes, plugin_version_s=plugin_version, score_activation_i=score_activation, score_threshold_f=score_threshold, outputs=4) nums, boxes, scores, classes = out return nums, boxes, scores, classes class ONNX_ORT(nn.Module): '''onnx module with ONNX-Runtime NMS operation.''' def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None, n_classes=80): super().__init__() self.device = device if device else torch.device("cpu") self.max_obj = torch.tensor([max_obj]).to(device) self.iou_threshold = torch.tensor([iou_thres]).to(device) self.score_threshold = torch.tensor([score_thres]).to(device) self.max_wh = max_wh # if max_wh != 0 : non-agnostic else : agnostic self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], dtype=torch.float32, device=self.device) self.n_classes=n_classes def forward(self, x): ## https://github.com/thaitc-hust/yolov9-tensorrt/blob/main/torch2onnx.py ## thanks https://github.com/thaitc-hust if isinstance(x, list): ## yolov9-c.pt and yolov9-e.pt return list x = x[1] x = x.permute(0, 2, 1) bboxes_x = x[..., 0:1] bboxes_y = x[..., 1:2] bboxes_w = x[..., 2:3] bboxes_h = x[..., 3:4] bboxes = torch.cat([bboxes_x, bboxes_y, bboxes_w, bboxes_h], dim = -1) bboxes = bboxes.unsqueeze(2) # [n_batch, n_bboxes, 4] -> [n_batch, n_bboxes, 1, 4] obj_conf = x[..., 4:] scores = obj_conf bboxes @= self.convert_matrix max_score, category_id = scores.max(2, keepdim=True) dis = category_id.float() * self.max_wh nmsbox = bboxes + dis max_score_tp = max_score.transpose(1, 2).contiguous() selected_indices = ORT_NMS.apply(nmsbox, max_score_tp, self.max_obj, self.iou_threshold, self.score_threshold) X, Y = selected_indices[:, 0], selected_indices[:, 2] selected_boxes = bboxes[X, Y, :] selected_categories = category_id[X, Y, :].float() selected_scores = max_score[X, Y, :] X = X.unsqueeze(1).float() return torch.cat([X, selected_boxes, selected_categories, selected_scores], 1) class ONNX_TRT(nn.Module): '''onnx module with TensorRT NMS operation.''' def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None, n_classes=80): super().__init__() assert max_wh is None self.device = device if device else torch.device('cpu') self.background_class = -1, self.box_coding = 1, self.iou_threshold = iou_thres self.max_obj = max_obj self.plugin_version = '1' self.score_activation = 0 self.score_threshold = score_thres self.n_classes=n_classes def forward(self, x): ## https://github.com/thaitc-hust/yolov9-tensorrt/blob/main/torch2onnx.py ## thanks https://github.com/thaitc-hust if isinstance(x, list): ## yolov9-c.pt and yolov9-e.pt return list x = x[1] x = x.permute(0, 2, 1) bboxes_x = x[..., 0:1] bboxes_y = x[..., 1:2] bboxes_w = x[..., 2:3] bboxes_h = x[..., 3:4] bboxes = torch.cat([bboxes_x, bboxes_y, bboxes_w, bboxes_h], dim = -1) bboxes = bboxes.unsqueeze(2) # [n_batch, n_bboxes, 4] -> [n_batch, n_bboxes, 1, 4] obj_conf = x[..., 4:] scores = obj_conf num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(bboxes, scores, self.background_class, self.box_coding, self.iou_threshold, self.max_obj, self.plugin_version, self.score_activation, self.score_threshold) return num_det, det_boxes, det_scores, det_classes class End2End(nn.Module): '''export onnx or tensorrt model with NMS operation.''' def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None, n_classes=80): super().__init__() device = device if device else torch.device('cpu') assert isinstance(max_wh,(int)) or max_wh is None self.model = model.to(device) self.model.model[-1].end2end = True self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device, n_classes) self.end2end.eval() def forward(self, x): x = self.model(x) x = self.end2end(x) return x def attempt_load(weights, device=None, inplace=True, fuse=True): # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a from models.yolo import Detect, Model model = Ensemble() for w in weights if isinstance(weights, list) else [weights]: ckpt = torch.load(attempt_download(w), map_location='cpu') # load ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model # Model compatibility updates if not hasattr(ckpt, 'stride'): ckpt.stride = torch.tensor([32.]) if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)): ckpt.names = dict(enumerate(ckpt.names)) # convert to dict model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode # Module compatibility updates for m in model.modules(): t = type(m) if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): m.inplace = inplace # torch 1.7.0 compatibility # if t is Detect and not isinstance(m.anchor_grid, list): # delattr(m, 'anchor_grid') # setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): m.recompute_scale_factor = None # torch 1.11.0 compatibility # Return model if len(model) == 1: return model[-1] # Return detection ensemble print(f'Ensemble created with {weights}\n') for k in 'names', 'nc', 'yaml': setattr(model, k, getattr(model[0], k)) model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' return model ================================================ FILE: models/hub/anchors.yaml ================================================ # YOLOv3 & YOLOv5 # Default anchors for COCO data # P5 ------------------------------------------------------------------------------------------------------------------- # P5-640: anchors_p5_640: - [10,13, 16,30, 33,23] # P3/8 - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 # P6 ------------------------------------------------------------------------------------------------------------------- # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 anchors_p6_640: - [9,11, 21,19, 17,41] # P3/8 - [43,32, 39,70, 86,64] # P4/16 - [65,131, 134,130, 120,265] # P5/32 - [282,180, 247,354, 512,387] # P6/64 # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 anchors_p6_1280: - [19,27, 44,40, 38,94] # P3/8 - [96,68, 86,152, 180,137] # P4/16 - [140,301, 303,264, 238,542] # P5/32 - [436,615, 739,380, 925,792] # P6/64 # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 anchors_p6_1920: - [28,41, 67,59, 57,141] # P3/8 - [144,103, 129,227, 270,205] # P4/16 - [209,452, 455,396, 358,812] # P5/32 - [653,922, 1109,570, 1387,1187] # P6/64 # P7 ------------------------------------------------------------------------------------------------------------------- # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 anchors_p7_640: - [11,11, 13,30, 29,20] # P3/8 - [30,46, 61,38, 39,92] # P4/16 - [78,80, 146,66, 79,163] # P5/32 - [149,150, 321,143, 157,303] # P6/64 - [257,402, 359,290, 524,372] # P7/128 # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 anchors_p7_1280: - [19,22, 54,36, 32,77] # P3/8 - [70,83, 138,71, 75,173] # P4/16 - [165,159, 148,334, 375,151] # P5/32 - [334,317, 251,626, 499,474] # P6/64 - [750,326, 534,814, 1079,818] # P7/128 # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 anchors_p7_1920: - [29,34, 81,55, 47,115] # P3/8 - [105,124, 207,107, 113,259] # P4/16 - [247,238, 222,500, 563,227] # P5/32 - [501,476, 376,939, 749,711] # P6/64 - [1126,489, 801,1222, 1618,1227] # P7/128 ================================================ FILE: models/hub/yolov3-spp.yaml ================================================ # YOLOv3 # Parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple anchors: - [10,13, 16,30, 33,23] # P3/8 - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 # darknet53 backbone backbone: # [from, number, module, args] [[-1, 1, Conv, [32, 3, 1]], # 0 [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 [-1, 1, Bottleneck, [64]], [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 [-1, 2, Bottleneck, [128]], [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 [-1, 8, Bottleneck, [256]], [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 [-1, 8, Bottleneck, [512]], [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 [-1, 4, Bottleneck, [1024]], # 10 ] # YOLOv3-SPP head head: [[-1, 1, Bottleneck, [1024, False]], [-1, 1, SPP, [512, [5, 9, 13]]], [-1, 1, Conv, [1024, 3, 1]], [-1, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) [-2, 1, Conv, [256, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 8], 1, Concat, [1]], # cat backbone P4 [-1, 1, Bottleneck, [512, False]], [-1, 1, Bottleneck, [512, False]], [-1, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) [-2, 1, Conv, [128, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P3 [-1, 1, Bottleneck, [256, False]], [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) ] ================================================ FILE: models/hub/yolov3-tiny.yaml ================================================ # YOLOv3 # Parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple anchors: - [10,14, 23,27, 37,58] # P4/16 - [81,82, 135,169, 344,319] # P5/32 # YOLOv3-tiny backbone backbone: # [from, number, module, args] [[-1, 1, Conv, [16, 3, 1]], # 0 [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 [-1, 1, Conv, [32, 3, 1]], [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 [-1, 1, Conv, [64, 3, 1]], [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 [-1, 1, Conv, [128, 3, 1]], [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 [-1, 1, Conv, [256, 3, 1]], [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 [-1, 1, Conv, [512, 3, 1]], [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 ] # YOLOv3-tiny head head: [[-1, 1, Conv, [1024, 3, 1]], [-1, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) [-2, 1, Conv, [128, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 8], 1, Concat, [1]], # cat backbone P4 [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) ] ================================================ FILE: models/hub/yolov3.yaml ================================================ # YOLOv3 # Parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple anchors: - [10,13, 16,30, 33,23] # P3/8 - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 # darknet53 backbone backbone: # [from, number, module, args] [[-1, 1, Conv, [32, 3, 1]], # 0 [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 [-1, 1, Bottleneck, [64]], [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 [-1, 2, Bottleneck, [128]], [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 [-1, 8, Bottleneck, [256]], [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 [-1, 8, Bottleneck, [512]], [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 [-1, 4, Bottleneck, [1024]], # 10 ] # YOLOv3 head head: [[-1, 1, Bottleneck, [1024, False]], [-1, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [1024, 3, 1]], [-1, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) [-2, 1, Conv, [256, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 8], 1, Concat, [1]], # cat backbone P4 [-1, 1, Bottleneck, [512, False]], [-1, 1, Bottleneck, [512, False]], [-1, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) [-2, 1, Conv, [128, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P3 [-1, 1, Bottleneck, [256, False]], [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) ] ================================================ FILE: models/panoptic/gelan-c-pan.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 # avg-conv down [-1, 1, ADown, [256]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 # avg-conv down [-1, 1, ADown, [512]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 # avg-conv down [-1, 1, ADown, [512]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 ] # gelan head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) # panoptic [[15, 18, 21], 1, Panoptic, [nc, 93, 32, 256]], # Panoptic(P3, P4, P5) ] ================================================ FILE: models/panoptic/yolov7-af-pan.yaml ================================================ # YOLOv7 # Parameters nc: 80 # number of classes sem_nc: 93 # number of stuff classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple anchors: 3 # YOLOv7 backbone backbone: [[-1, 1, Conv, [32, 3, 1]], # 0 [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 [-1, 1, Conv, [64, 1, 1]], [-2, 1, Conv, [64, 1, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 11 [-1, 1, MP, []], [-1, 1, Conv, [128, 1, 1]], [-3, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 2]], [[-1, -3], 1, Concat, [1]], # 16-P3/8 [-1, 1, Conv, [128, 1, 1]], [-2, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], # 24 [-1, 1, MP, []], [-1, 1, Conv, [256, 1, 1]], [-3, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 2]], [[-1, -3], 1, Concat, [1]], # 29-P4/16 [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [1024, 1, 1]], # 37 [-1, 1, MP, []], [-1, 1, Conv, [512, 1, 1]], [-3, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [512, 3, 2]], [[-1, -3], 1, Concat, [1]], # 42-P5/32 [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [1024, 1, 1]], # 50 ] # yolov7 head head: [[-1, 1, SPPCSPC, [512]], # 51 [-1, 1, Conv, [256, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [37, 1, Conv, [256, 1, 1]], # route backbone P4 [[-1, -2], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 63 [-1, 1, Conv, [128, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [24, 1, Conv, [128, 1, 1]], # route backbone P3 [[-1, -2], 1, Concat, [1]], [-1, 1, Conv, [128, 1, 1]], [-2, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [128, 1, 1]], # 75 [-1, 1, MP, []], [-1, 1, Conv, [128, 1, 1]], [-3, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 2]], [[-1, -3, 63], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 88 [-1, 1, MP, []], [-1, 1, Conv, [256, 1, 1]], [-3, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 2]], [[-1, -3, 51], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], [-2, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], # 101 [75, 1, Conv, [256, 3, 1]], [88, 1, Conv, [512, 3, 1]], [101, 1, Conv, [1024, 3, 1]], [[102, 103, 104], 1, Panoptic, [nc, 93, 32, 256]], # Panoptic(P3, P4, P5) ] ================================================ FILE: models/segment/gelan-c-dseg.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 # avg-conv down [-1, 1, ADown, [256]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 # avg-conv down [-1, 1, ADown, [512]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 # avg-conv down [-1, 1, ADown, [512]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 ] # gelan head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) [15, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 22 [-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, Conv, [256, 3, 1]], # 24 # segment [[15, 18, 21, 24], 1, DSegment, [nc, 32, 256]], # Segment(P3, P4, P5) ] ================================================ FILE: models/segment/gelan-c-seg.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ # conv down [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 # avg-conv down [-1, 1, ADown, [256]], # 3-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 # avg-conv down [-1, 1, ADown, [512]], # 5-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 # avg-conv down [-1, 1, ADown, [512]], # 7-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 ] # gelan head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 9 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 12], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 9], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) # segment [[15, 18, 21], 1, Segment, [nc, 32, 256]], # Segment(P3, P4, P5) ] ================================================ FILE: models/segment/yolov7-af-seg.yaml ================================================ # YOLOv7 # Parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple anchors: 3 # YOLOv7 backbone backbone: [[-1, 1, Conv, [32, 3, 1]], # 0 [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 [-1, 1, Conv, [64, 1, 1]], [-2, 1, Conv, [64, 1, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 11 [-1, 1, MP, []], [-1, 1, Conv, [128, 1, 1]], [-3, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 2]], [[-1, -3], 1, Concat, [1]], # 16-P3/8 [-1, 1, Conv, [128, 1, 1]], [-2, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], # 24 [-1, 1, MP, []], [-1, 1, Conv, [256, 1, 1]], [-3, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 2]], [[-1, -3], 1, Concat, [1]], # 29-P4/16 [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [1024, 1, 1]], # 37 [-1, 1, MP, []], [-1, 1, Conv, [512, 1, 1]], [-3, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [512, 3, 2]], [[-1, -3], 1, Concat, [1]], # 42-P5/32 [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -3, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [1024, 1, 1]], # 50 ] # yolov7 head head: [[-1, 1, SPPCSPC, [512]], # 51 [-1, 1, Conv, [256, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [37, 1, Conv, [256, 1, 1]], # route backbone P4 [[-1, -2], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 63 [-1, 1, Conv, [128, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [24, 1, Conv, [128, 1, 1]], # route backbone P3 [[-1, -2], 1, Concat, [1]], [-1, 1, Conv, [128, 1, 1]], [-2, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [-1, 1, Conv, [64, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [128, 1, 1]], # 75 [-1, 1, MP, []], [-1, 1, Conv, [128, 1, 1]], [-3, 1, Conv, [128, 1, 1]], [-1, 1, Conv, [128, 3, 2]], [[-1, -3, 63], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], [-2, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [-1, 1, Conv, [128, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [256, 1, 1]], # 88 [-1, 1, MP, []], [-1, 1, Conv, [256, 1, 1]], [-3, 1, Conv, [256, 1, 1]], [-1, 1, Conv, [256, 3, 2]], [[-1, -3, 51], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], [-2, 1, Conv, [512, 1, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [-1, 1, Conv, [256, 3, 1]], [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], [-1, 1, Conv, [512, 1, 1]], # 101 [75, 1, Conv, [256, 3, 1]], [88, 1, Conv, [512, 3, 1]], [101, 1, Conv, [1024, 3, 1]], [[102, 103, 104], 1, Segment, [nc, 32, 256]], # Segment(P3, P4, P5) ] ================================================ FILE: models/segment/yolov9-c-dseg.yaml ================================================ # YOLOv9 # parameters nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple #activation: nn.LeakyReLU(0.1) #activation: nn.ReLU() # anchors anchors: 3 # gelan backbone backbone: [ [-1, 1, Silence, []], # conv down [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3 # avg-conv down [-1, 1, ADown, [256]], # 4-P3/8 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5 # avg-conv down [-1, 1, ADown, [512]], # 6-P4/16 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7 # avg-conv down [-1, 1, ADown, [512]], # 8-P5/32 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9 ] # YOLOv9 head head: [ # elan-spp block [-1, 1, SPPELAN, [512, 256]], # 10 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 7], 1, Concat, [1]], # cat backbone P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13 # up-concat merge [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 5], 1, Concat, [1]], # cat backbone P3 # elan-2 block [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small) # avg-conv-down merge [-1, 1, ADown, [256]], [[-1, 13], 1, Concat, [1]], # cat head P4 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium) # avg-conv-down merge [-1, 1, ADown, [512]], [[-1, 10], 1, Concat, [1]], # cat head P5 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large) # multi-level reversible auxiliary branch # routing [5, 1, CBLinear, [[256]]], # 23 [7, 1, CBLinear, [[256, 512]]], # 24 [9, 1, CBLinear, [[256, 512, 512]]], # 25 # conv down [0, 1, Conv, [64, 3, 2]], # 26-P1/2 # conv down [-1, 1, Conv, [128, 3, 2]], # 27-P2/4 # elan-1 block [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28 # avg-conv down fuse [-1, 1, ADown, [256]], # 29-P3/8 [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31 # avg-conv down fuse [-1, 1, ADown, [512]], # 32-P4/16 [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34 # avg-conv down fuse [-1, 1, ADown, [512]], # 35-P5/32 [[25, -1], 1, CBFuse, [[2]]], # 36 # elan-2 block [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37 [31, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 38 [-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, Conv, [256, 3, 1]], # 40 [16, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 41 [-1, 1, nn.Upsample, [None, 2, 'nearest']], [-1, 1, Conv, [256, 3, 1]], # 43 # segment [[31, 34, 37, 16, 19, 22, 40, 43], 1, DualDSegment, [nc, 32, 256]], # Segment(P3, P4, P5) ] ================================================ FILE: models/tf.py ================================================ import argparse import sys from copy import deepcopy from pathlib import Path FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH # ROOT = ROOT.relative_to(Path.cwd()) # relative import numpy as np import tensorflow as tf import torch import torch.nn as nn from tensorflow import keras from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv, DWConvTranspose2d, Focus, autopad) from models.experimental import MixConv2d, attempt_load from models.yolo import Detect, Segment from utils.activations import SiLU from utils.general import LOGGER, make_divisible, print_args class TFBN(keras.layers.Layer): # TensorFlow BatchNormalization wrapper def __init__(self, w=None): super().__init__() self.bn = keras.layers.BatchNormalization( beta_initializer=keras.initializers.Constant(w.bias.numpy()), gamma_initializer=keras.initializers.Constant(w.weight.numpy()), moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()), moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()), epsilon=w.eps) def call(self, inputs): return self.bn(inputs) class TFPad(keras.layers.Layer): # Pad inputs in spatial dimensions 1 and 2 def __init__(self, pad): super().__init__() if isinstance(pad, int): self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]]) else: # tuple/list self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]) def call(self, inputs): return tf.pad(inputs, self.pad, mode='constant', constant_values=0) class TFConv(keras.layers.Layer): # Standard convolution def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): # ch_in, ch_out, weights, kernel, stride, padding, groups super().__init__() assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding) # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch conv = keras.layers.Conv2D( filters=c2, kernel_size=k, strides=s, padding='SAME' if s == 1 else 'VALID', use_bias=not hasattr(w, 'bn'), kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()), bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy())) self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv]) self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity self.act = activations(w.act) if act else tf.identity def call(self, inputs): return self.act(self.bn(self.conv(inputs))) class TFDWConv(keras.layers.Layer): # Depthwise convolution def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None): # ch_in, ch_out, weights, kernel, stride, padding, groups super().__init__() assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels' conv = keras.layers.DepthwiseConv2D( kernel_size=k, depth_multiplier=c2 // c1, strides=s, padding='SAME' if s == 1 else 'VALID', use_bias=not hasattr(w, 'bn'), depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()), bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy())) self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv]) self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity self.act = activations(w.act) if act else tf.identity def call(self, inputs): return self.act(self.bn(self.conv(inputs))) class TFDWConvTranspose2d(keras.layers.Layer): # Depthwise ConvTranspose2d def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None): # ch_in, ch_out, weights, kernel, stride, padding, groups super().__init__() assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels' assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1' weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy() self.c1 = c1 self.conv = [ keras.layers.Conv2DTranspose(filters=1, kernel_size=k, strides=s, padding='VALID', output_padding=p2, use_bias=True, kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]), bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)] def call(self, inputs): return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1] class TFFocus(keras.layers.Layer): # Focus wh information into c-space def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): # ch_in, ch_out, kernel, stride, padding, groups super().__init__() self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) # inputs = inputs / 255 # normalize 0-255 to 0-1 inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]] return self.conv(tf.concat(inputs, 3)) class TFBottleneck(keras.layers.Layer): # Standard bottleneck def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2) self.add = shortcut and c1 == c2 def call(self, inputs): return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs)) class TFCrossConv(keras.layers.Layer): # Cross Convolution def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None): super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1) self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2) self.add = shortcut and c1 == c2 def call(self, inputs): return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs)) class TFConv2d(keras.layers.Layer): # Substitution for PyTorch nn.Conv2D def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None): super().__init__() assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" self.conv = keras.layers.Conv2D(filters=c2, kernel_size=k, strides=s, padding='VALID', use_bias=bias, kernel_initializer=keras.initializers.Constant( w.weight.permute(2, 3, 1, 0).numpy()), bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None) def call(self, inputs): return self.conv(inputs) class TFBottleneckCSP(keras.layers.Layer): # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2) self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3) self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4) self.bn = TFBN(w.bn) self.act = lambda x: keras.activations.swish(x) self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)]) def call(self, inputs): y1 = self.cv3(self.m(self.cv1(inputs))) y2 = self.cv2(inputs) return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3)))) class TFC3(keras.layers.Layer): # CSP Bottleneck with 3 convolutions def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2) self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3) self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)]) def call(self, inputs): return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3)) class TFC3x(keras.layers.Layer): # 3 module with cross-convolutions def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2) self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3) self.m = keras.Sequential([ TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]) def call(self, inputs): return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3)) class TFSPP(keras.layers.Layer): # Spatial pyramid pooling layer used in YOLOv3-SPP def __init__(self, c1, c2, k=(5, 9, 13), w=None): super().__init__() c_ = c1 // 2 # hidden channels self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2) self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k] def call(self, inputs): x = self.cv1(inputs) return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3)) class TFSPPF(keras.layers.Layer): # Spatial pyramid pooling-Fast layer def __init__(self, c1, c2, k=5, w=None): super().__init__() c_ = c1 // 2 # hidden channels self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2) self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME') def call(self, inputs): x = self.cv1(inputs) y1 = self.m(x) y2 = self.m(y1) return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3)) class TFDetect(keras.layers.Layer): # TF YOLO Detect layer def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer super().__init__() self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32) self.nc = nc # number of classes self.no = nc + 5 # number of outputs per anchor self.nl = len(anchors) # number of detection layers self.na = len(anchors[0]) // 2 # number of anchors self.grid = [tf.zeros(1)] * self.nl # init grid self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32) self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2]) self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] self.training = False # set to False after building model self.imgsz = imgsz for i in range(self.nl): ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] self.grid[i] = self._make_grid(nx, ny) def call(self, inputs): z = [] # inference output x = [] for i in range(self.nl): x.append(self.m[i](inputs[i])) # x(bs,20,20,255) to x(bs,3,20,20,85) ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no]) if not self.training: # inference y = x[i] grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5 anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4 xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid # Normalize xywh to 0-1 to reduce calibration error xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1) z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no])) return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),) @staticmethod def _make_grid(nx=20, ny=20): # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny)) return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32) class TFSegment(TFDetect): # YOLO Segment head for segmentation models def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None): super().__init__(nc, anchors, ch, imgsz, w) self.nm = nm # number of masks self.npr = npr # number of protos self.no = 5 + nc + self.nm # number of outputs per anchor self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos self.detect = TFDetect.call def call(self, x): p = self.proto(x[0]) # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160) x = self.detect(self, x) return (x, p) if self.training else (x[0], p) class TFProto(keras.layers.Layer): def __init__(self, c1, c_=256, c2=32, w=None): super().__init__() self.cv1 = TFConv(c1, c_, k=3, w=w.cv1) self.upsample = TFUpsample(None, scale_factor=2, mode='nearest') self.cv2 = TFConv(c_, c_, k=3, w=w.cv2) self.cv3 = TFConv(c_, c2, w=w.cv3) def call(self, inputs): return self.cv3(self.cv2(self.upsample(self.cv1(inputs)))) class TFUpsample(keras.layers.Layer): # TF version of torch.nn.Upsample() def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' super().__init__() assert scale_factor % 2 == 0, "scale_factor must be multiple of 2" self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode) # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode) # with default arguments: align_corners=False, half_pixel_centers=False # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x, # size=(x.shape[1] * 2, x.shape[2] * 2)) def call(self, inputs): return self.upsample(inputs) class TFConcat(keras.layers.Layer): # TF version of torch.concat() def __init__(self, dimension=1, w=None): super().__init__() assert dimension == 1, "convert only NCHW to NHWC concat" self.d = 3 def call(self, inputs): return tf.concat(inputs, self.d) def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors no = na * (nc + 5) # number of outputs = anchors * (classes + 5) layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args m_str = m m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): try: args[j] = eval(a) if isinstance(a, str) else a # eval strings except NameError: pass n = max(round(n * gd), 1) if n > 1 else n # depth gain if m in [ nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv, BottleneckCSP, C3, C3x]: c1, c2 = ch[f], args[0] c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 args = [c1, c2, *args[1:]] if m in [BottleneckCSP, C3, C3x]: args.insert(2, n) n = 1 elif m is nn.BatchNorm2d: args = [ch[f]] elif m is Concat: c2 = sum(ch[-1 if x == -1 else x + 1] for x in f) elif m in [Detect, Segment]: args.append([ch[x + 1] for x in f]) if isinstance(args[1], int): # number of anchors args[1] = [list(range(args[1] * 2))] * len(f) if m is Segment: args[3] = make_divisible(args[3] * gw, 8) args.append(imgsz) else: c2 = ch[f] tf_m = eval('TF' + m_str.replace('nn.', '')) m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \ else tf_m(*args, w=model.model[i]) # module torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type np = sum(x.numel() for x in torch_m_.parameters()) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) ch.append(c2) return keras.Sequential(layers), sorted(save) class TFModel: # TF YOLO model def __init__(self, cfg='yolo.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes super().__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict # Define model if nc and nc != self.yaml['nc']: LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz) def predict(self, inputs, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25): y = [] # outputs x = inputs for m in self.model.layers: if m.f != -1: # if not from previous layer x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers x = m(x) # run y.append(x if m.i in self.savelist else None) # save output # Add TensorFlow NMS if tf_nms: boxes = self._xywh2xyxy(x[0][..., :4]) probs = x[0][:, :, 4:5] classes = x[0][:, :, 5:] scores = probs * classes if agnostic_nms: nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres) else: boxes = tf.expand_dims(boxes, 2) nms = tf.image.combined_non_max_suppression(boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False) return (nms,) return x # output [1,6300,85] = [xywh, conf, class0, class1, ...] # x = x[0] # [x(1,6300,85), ...] to x(6300,85) # xywh = x[..., :4] # x(6300,4) boxes # conf = x[..., 4:5] # x(6300,1) confidences # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes # return tf.concat([conf, cls, xywh], 1) @staticmethod def _xywh2xyxy(xywh): # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1) return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1) class AgnosticNMS(keras.layers.Layer): # TF Agnostic NMS def call(self, input, topk_all, iou_thres, conf_thres): # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450 return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres), input, fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32), name='agnostic_nms') @staticmethod def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS boxes, classes, scores = x class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32) scores_inp = tf.reduce_max(scores, -1) selected_inds = tf.image.non_max_suppression(boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres) selected_boxes = tf.gather(boxes, selected_inds) padded_boxes = tf.pad(selected_boxes, paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]], mode="CONSTANT", constant_values=0.0) selected_scores = tf.gather(scores_inp, selected_inds) padded_scores = tf.pad(selected_scores, paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]], mode="CONSTANT", constant_values=-1.0) selected_classes = tf.gather(class_inds, selected_inds) padded_classes = tf.pad(selected_classes, paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]], mode="CONSTANT", constant_values=-1.0) valid_detections = tf.shape(selected_inds)[0] return padded_boxes, padded_scores, padded_classes, valid_detections def activations(act=nn.SiLU): # Returns TF activation from input PyTorch activation if isinstance(act, nn.LeakyReLU): return lambda x: keras.activations.relu(x, alpha=0.1) elif isinstance(act, nn.Hardswish): return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667 elif isinstance(act, (nn.SiLU, SiLU)): return lambda x: keras.activations.swish(x) else: raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}') def representative_dataset_gen(dataset, ncalib=100): # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): im = np.transpose(img, [1, 2, 0]) im = np.expand_dims(im, axis=0).astype(np.float32) im /= 255 yield [im] if n >= ncalib: break def run( weights=ROOT / 'yolo.pt', # weights path imgsz=(640, 640), # inference size h,w batch_size=1, # batch size dynamic=False, # dynamic batch size ): # PyTorch model im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False) _ = model(im) # inference model.info() # TensorFlow model im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz) _ = tf_model.predict(im) # inference # Keras model im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size) keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im)) keras_model.summary() LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.') def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='weights path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--batch-size', type=int, default=1, help='batch size') parser.add_argument('--dynamic', action='store_true', help='dynamic batch size') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) return opt def main(opt): run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: models/yolo.py ================================================ import argparse import os import platform import sys from copy import deepcopy from pathlib import Path FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH if platform.system() != 'Windows': ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import * from models.experimental import * from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args from utils.plots import feature_visualization from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync) from utils.tal.anchor_generator import make_anchors, dist2bbox try: import thop # for FLOPs computation except ImportError: thop = None class Detect(nn.Module): # YOLO Detect head for detection models dynamic = False # force grid reconstruction export = False # export mode shape = None anchors = torch.empty(0) # init strides = torch.empty(0) # init def __init__(self, nc=80, ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.nl = len(ch) # number of detection layers self.reg_max = 16 self.no = nc + self.reg_max * 4 # number of outputs per anchor self.inplace = inplace # use inplace ops (e.g. slice assignment) self.stride = torch.zeros(self.nl) # strides computed during build c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) self.cv3 = nn.ModuleList( nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() def forward(self, x): shape = x[0].shape # BCHW for i in range(self.nl): x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) if self.training: return x elif self.dynamic or self.shape != shape: self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) self.shape = shape box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides y = torch.cat((dbox, cls.sigmoid()), 1) return y if self.export else (y, x) def bias_init(self): # Initialize Detect() biases, WARNING: requires stride availability m = self # self.model[-1] # Detect() module # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency for a, b, s in zip(m.cv2, m.cv3, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) class DDetect(nn.Module): # YOLO Detect head for detection models dynamic = False # force grid reconstruction export = False # export mode shape = None anchors = torch.empty(0) # init strides = torch.empty(0) # init def __init__(self, nc=80, ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.nl = len(ch) # number of detection layers self.reg_max = 16 self.no = nc + self.reg_max * 4 # number of outputs per anchor self.inplace = inplace # use inplace ops (e.g. slice assignment) self.stride = torch.zeros(self.nl) # strides computed during build c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), max((ch[0], min((self.nc * 2, 128)))) # channels self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch) self.cv3 = nn.ModuleList( nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() def forward(self, x): shape = x[0].shape # BCHW for i in range(self.nl): x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) if self.training: return x elif self.dynamic or self.shape != shape: self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) self.shape = shape box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides y = torch.cat((dbox, cls.sigmoid()), 1) return y if self.export else (y, x) def bias_init(self): # Initialize Detect() biases, WARNING: requires stride availability m = self # self.model[-1] # Detect() module # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency for a, b, s in zip(m.cv2, m.cv3, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) class DualDetect(nn.Module): # YOLO Detect head for detection models dynamic = False # force grid reconstruction export = False # export mode shape = None anchors = torch.empty(0) # init strides = torch.empty(0) # init def __init__(self, nc=80, ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.nl = len(ch) // 2 # number of detection layers self.reg_max = 16 self.no = nc + self.reg_max * 4 # number of outputs per anchor self.inplace = inplace # use inplace ops (e.g. slice assignment) self.stride = torch.zeros(self.nl) # strides computed during build c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels c4, c5 = max((ch[self.nl] // 4, self.reg_max * 4, 16)), max((ch[self.nl], min((self.nc * 2, 128)))) # channels self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch[:self.nl]) self.cv3 = nn.ModuleList( nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) self.cv4 = nn.ModuleList( nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, 4 * self.reg_max, 1)) for x in ch[self.nl:]) self.cv5 = nn.ModuleList( nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:]) self.dfl = DFL(self.reg_max) self.dfl2 = DFL(self.reg_max) def forward(self, x): shape = x[0].shape # BCHW d1 = [] d2 = [] for i in range(self.nl): d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) if self.training: return [d1, d2] elif self.dynamic or self.shape != shape: self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) self.shape = shape box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1)] return y if self.export else (y, [d1, d2]) def bias_init(self): # Initialize Detect() biases, WARNING: requires stride availability m = self # self.model[-1] # Detect() module # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency for a, b, s in zip(m.cv2, m.cv3, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) for a, b, s in zip(m.cv4, m.cv5, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) class DualDDetect(nn.Module): # YOLO Detect head for detection models dynamic = False # force grid reconstruction export = False # export mode shape = None anchors = torch.empty(0) # init strides = torch.empty(0) # init def __init__(self, nc=80, ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.nl = len(ch) // 2 # number of detection layers self.reg_max = 16 self.no = nc + self.reg_max * 4 # number of outputs per anchor self.inplace = inplace # use inplace ops (e.g. slice assignment) self.stride = torch.zeros(self.nl) # strides computed during build c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), max((ch[0], min((self.nc * 2, 128)))) # channels c4, c5 = make_divisible(max((ch[self.nl] // 4, self.reg_max * 4, 16)), 4), max((ch[self.nl], min((self.nc * 2, 128)))) # channels self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch[:self.nl]) self.cv3 = nn.ModuleList( nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) self.cv4 = nn.ModuleList( nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3, g=4), nn.Conv2d(c4, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl:]) self.cv5 = nn.ModuleList( nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:]) self.dfl = DFL(self.reg_max) self.dfl2 = DFL(self.reg_max) def forward(self, x): shape = x[0].shape # BCHW d1 = [] d2 = [] for i in range(self.nl): d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) if self.training: return [d1, d2] elif self.dynamic or self.shape != shape: self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) self.shape = shape box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1)] return y if self.export else (y, [d1, d2]) #y = torch.cat((dbox2, cls2.sigmoid()), 1) #return y if self.export else (y, d2) #y1 = torch.cat((dbox, cls.sigmoid()), 1) #y2 = torch.cat((dbox2, cls2.sigmoid()), 1) #return [y1, y2] if self.export else [(y1, d1), (y2, d2)] #return [y1, y2] if self.export else [(y1, y2), (d1, d2)] def bias_init(self): # Initialize Detect() biases, WARNING: requires stride availability m = self # self.model[-1] # Detect() module # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency for a, b, s in zip(m.cv2, m.cv3, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) for a, b, s in zip(m.cv4, m.cv5, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) class TripleDetect(nn.Module): # YOLO Detect head for detection models dynamic = False # force grid reconstruction export = False # export mode shape = None anchors = torch.empty(0) # init strides = torch.empty(0) # init def __init__(self, nc=80, ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.nl = len(ch) // 3 # number of detection layers self.reg_max = 16 self.no = nc + self.reg_max * 4 # number of outputs per anchor self.inplace = inplace # use inplace ops (e.g. slice assignment) self.stride = torch.zeros(self.nl) # strides computed during build c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels c4, c5 = max((ch[self.nl] // 4, self.reg_max * 4, 16)), max((ch[self.nl], min((self.nc * 2, 128)))) # channels c6, c7 = max((ch[self.nl * 2] // 4, self.reg_max * 4, 16)), max((ch[self.nl * 2], min((self.nc * 2, 128)))) # channels self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch[:self.nl]) self.cv3 = nn.ModuleList( nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) self.cv4 = nn.ModuleList( nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, 4 * self.reg_max, 1)) for x in ch[self.nl:self.nl*2]) self.cv5 = nn.ModuleList( nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:self.nl*2]) self.cv6 = nn.ModuleList( nn.Sequential(Conv(x, c6, 3), Conv(c6, c6, 3), nn.Conv2d(c6, 4 * self.reg_max, 1)) for x in ch[self.nl*2:self.nl*3]) self.cv7 = nn.ModuleList( nn.Sequential(Conv(x, c7, 3), Conv(c7, c7, 3), nn.Conv2d(c7, self.nc, 1)) for x in ch[self.nl*2:self.nl*3]) self.dfl = DFL(self.reg_max) self.dfl2 = DFL(self.reg_max) self.dfl3 = DFL(self.reg_max) def forward(self, x): shape = x[0].shape # BCHW d1 = [] d2 = [] d3 = [] for i in range(self.nl): d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) d3.append(torch.cat((self.cv6[i](x[self.nl*2+i]), self.cv7[i](x[self.nl*2+i])), 1)) if self.training: return [d1, d2, d3] elif self.dynamic or self.shape != shape: self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) self.shape = shape box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides box3, cls3 = torch.cat([di.view(shape[0], self.no, -1) for di in d3], 2).split((self.reg_max * 4, self.nc), 1) dbox3 = dist2bbox(self.dfl3(box3), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1), torch.cat((dbox3, cls3.sigmoid()), 1)] return y if self.export else (y, [d1, d2, d3]) def bias_init(self): # Initialize Detect() biases, WARNING: requires stride availability m = self # self.model[-1] # Detect() module # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency for a, b, s in zip(m.cv2, m.cv3, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) for a, b, s in zip(m.cv4, m.cv5, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) for a, b, s in zip(m.cv6, m.cv7, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) class TripleDDetect(nn.Module): # YOLO Detect head for detection models dynamic = False # force grid reconstruction export = False # export mode shape = None anchors = torch.empty(0) # init strides = torch.empty(0) # init def __init__(self, nc=80, ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.nl = len(ch) // 3 # number of detection layers self.reg_max = 16 self.no = nc + self.reg_max * 4 # number of outputs per anchor self.inplace = inplace # use inplace ops (e.g. slice assignment) self.stride = torch.zeros(self.nl) # strides computed during build c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), \ max((ch[0], min((self.nc * 2, 128)))) # channels c4, c5 = make_divisible(max((ch[self.nl] // 4, self.reg_max * 4, 16)), 4), \ max((ch[self.nl], min((self.nc * 2, 128)))) # channels c6, c7 = make_divisible(max((ch[self.nl * 2] // 4, self.reg_max * 4, 16)), 4), \ max((ch[self.nl * 2], min((self.nc * 2, 128)))) # channels self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch[:self.nl]) self.cv3 = nn.ModuleList( nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) self.cv4 = nn.ModuleList( nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3, g=4), nn.Conv2d(c4, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl:self.nl*2]) self.cv5 = nn.ModuleList( nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:self.nl*2]) self.cv6 = nn.ModuleList( nn.Sequential(Conv(x, c6, 3), Conv(c6, c6, 3, g=4), nn.Conv2d(c6, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl*2:self.nl*3]) self.cv7 = nn.ModuleList( nn.Sequential(Conv(x, c7, 3), Conv(c7, c7, 3), nn.Conv2d(c7, self.nc, 1)) for x in ch[self.nl*2:self.nl*3]) self.dfl = DFL(self.reg_max) self.dfl2 = DFL(self.reg_max) self.dfl3 = DFL(self.reg_max) def forward(self, x): shape = x[0].shape # BCHW d1 = [] d2 = [] d3 = [] for i in range(self.nl): d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) d3.append(torch.cat((self.cv6[i](x[self.nl*2+i]), self.cv7[i](x[self.nl*2+i])), 1)) if self.training: return [d1, d2, d3] elif self.dynamic or self.shape != shape: self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) self.shape = shape box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides box3, cls3 = torch.cat([di.view(shape[0], self.no, -1) for di in d3], 2).split((self.reg_max * 4, self.nc), 1) dbox3 = dist2bbox(self.dfl3(box3), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides #y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1), torch.cat((dbox3, cls3.sigmoid()), 1)] #return y if self.export else (y, [d1, d2, d3]) y = torch.cat((dbox3, cls3.sigmoid()), 1) return y if self.export else (y, d3) def bias_init(self): # Initialize Detect() biases, WARNING: requires stride availability m = self # self.model[-1] # Detect() module # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency for a, b, s in zip(m.cv2, m.cv3, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) for a, b, s in zip(m.cv4, m.cv5, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) for a, b, s in zip(m.cv6, m.cv7, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) class Segment(Detect): # YOLO Segment head for segmentation models def __init__(self, nc=80, nm=32, npr=256, ch=(), inplace=True): super().__init__(nc, ch, inplace) self.nm = nm # number of masks self.npr = npr # number of protos self.proto = Proto(ch[0], self.npr, self.nm) # protos self.detect = Detect.forward c4 = max(ch[0] // 4, self.nm) self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) def forward(self, x): p = self.proto(x[0]) bs = p.shape[0] mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients x = self.detect(self, x) if self.training: return x, mc, p return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) class DSegment(DDetect): # YOLO Segment head for segmentation models def __init__(self, nc=80, nm=32, npr=256, ch=(), inplace=True): super().__init__(nc, ch[:-1], inplace) self.nl = len(ch)-1 self.nm = nm # number of masks self.npr = npr # number of protos self.proto = Conv(ch[-1], self.nm, 1) # protos self.detect = DDetect.forward c4 = max(ch[0] // 4, self.nm) self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch[:-1]) def forward(self, x): p = self.proto(x[-1]) bs = p.shape[0] mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients x = self.detect(self, x[:-1]) if self.training: return x, mc, p return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) class DualDSegment(DualDDetect): # YOLO Segment head for segmentation models def __init__(self, nc=80, nm=32, npr=256, ch=(), inplace=True): super().__init__(nc, ch[:-2], inplace) self.nl = (len(ch)-2) // 2 self.nm = nm # number of masks self.npr = npr # number of protos self.proto = Conv(ch[-2], self.nm, 1) # protos self.proto2 = Conv(ch[-1], self.nm, 1) # protos self.detect = DualDDetect.forward c6 = max(ch[0] // 4, self.nm) c7 = max(ch[self.nl] // 4, self.nm) self.cv6 = nn.ModuleList(nn.Sequential(Conv(x, c6, 3), Conv(c6, c6, 3), nn.Conv2d(c6, self.nm, 1)) for x in ch[:self.nl]) self.cv7 = nn.ModuleList(nn.Sequential(Conv(x, c7, 3), Conv(c7, c7, 3), nn.Conv2d(c7, self.nm, 1)) for x in ch[self.nl:self.nl*2]) def forward(self, x): p = [self.proto(x[-2]), self.proto2(x[-1])] bs = p[0].shape[0] mc = [torch.cat([self.cv6[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2), torch.cat([self.cv7[i](x[self.nl+i]).view(bs, self.nm, -1) for i in range(self.nl)], 2)] # mask coefficients d = self.detect(self, x[:-2]) if self.training: return d, mc, p return (torch.cat([d[0][1], mc[1]], 1), (d[1][1], mc[1], p[1])) class Panoptic(Detect): # YOLO Panoptic head for panoptic segmentation models def __init__(self, nc=80, sem_nc=93, nm=32, npr=256, ch=(), inplace=True): super().__init__(nc, ch, inplace) self.sem_nc = sem_nc self.nm = nm # number of masks self.npr = npr # number of protos self.proto = Proto(ch[0], self.npr, self.nm) # protos self.uconv = UConv(ch[0], ch[0]//4, self.sem_nc+self.nc) self.detect = Detect.forward c4 = max(ch[0] // 4, self.nm) self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) def forward(self, x): p = self.proto(x[0]) s = self.uconv(x[0]) bs = p.shape[0] mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients x = self.detect(self, x) if self.training: return x, mc, p, s return (torch.cat([x, mc], 1), p, s) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p, s)) class BaseModel(nn.Module): # YOLO base model def forward(self, x, profile=False, visualize=False): return self._forward_once(x, profile, visualize) # single-scale inference, train def _forward_once(self, x, profile=False, visualize=False): y, dt = [], [] # outputs for m in self.model: if m.f != -1: # if not from previous layer x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers if profile: self._profile_one_layer(m, x, dt) x = m(x) # run y.append(x if m.i in self.save else None) # save output if visualize: feature_visualization(x, m.type, m.i, save_dir=visualize) return x def _profile_one_layer(self, m, x, dt): c = m == self.model[-1] # is final layer, copy input as inplace fix o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs t = time_sync() for _ in range(10): m(x.copy() if c else x) dt.append((time_sync() - t) * 100) if m == self.model[0]: LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module") LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') if c: LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers LOGGER.info('Fusing layers... ') for m in self.model.modules(): if isinstance(m, (RepConvN)) and hasattr(m, 'fuse_convs'): m.fuse_convs() m.forward = m.forward_fuse # update forward if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv delattr(m, 'bn') # remove batchnorm m.forward = m.forward_fuse # update forward self.info() return self def info(self, verbose=False, img_size=640): # print model information model_info(self, verbose, img_size) def _apply(self, fn): # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers self = super()._apply(fn) m = self.model[-1] # Detect() if isinstance(m, (Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment, DSegment, DualDSegment, Panoptic)): m.stride = fn(m.stride) m.anchors = fn(m.anchors) m.strides = fn(m.strides) # m.grid = list(map(fn, m.grid)) return self class DetectionModel(BaseModel): # YOLO detection model def __init__(self, cfg='yolo.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super().__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg, encoding='ascii', errors='ignore') as f: self.yaml = yaml.safe_load(f) # model dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value if anchors: LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round(anchors) # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names self.inplace = self.yaml.get('inplace', True) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, (Detect, DDetect, Segment, DSegment, Panoptic)): s = 256 # 2x min stride m.inplace = self.inplace forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment, DSegment, Panoptic)) else self.forward(x) m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward # check_anchor_order(m) # m.anchors /= m.stride.view(-1, 1, 1) self.stride = m.stride m.bias_init() # only run once if isinstance(m, (DualDetect, TripleDetect, DualDDetect, TripleDDetect, DualDSegment)): s = 256 # 2x min stride m.inplace = self.inplace forward = lambda x: self.forward(x)[0][0] if isinstance(m, (DualDSegment)) else self.forward(x)[0] m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward # check_anchor_order(m) # m.anchors /= m.stride.view(-1, 1, 1) self.stride = m.stride m.bias_init() # only run once # Init weights, biases initialize_weights(self) self.info() LOGGER.info('') def forward(self, x, augment=False, profile=False, visualize=False): if augment: return self._forward_augment(x) # augmented inference, None return self._forward_once(x, profile, visualize) # single-scale inference, train def _forward_augment(self, x): img_size = x.shape[-2:] # height, width s = [1, 0.83, 0.67] # scales f = [None, 3, None] # flips (2-ud, 3-lr) y = [] # outputs for si, fi in zip(s, f): xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) yi = self._forward_once(xi)[0] # forward # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save yi = self._descale_pred(yi, fi, si, img_size) y.append(yi) y = self._clip_augmented(y) # clip augmented tails return torch.cat(y, 1), None # augmented inference, train def _descale_pred(self, p, flips, scale, img_size): # de-scale predictions following augmented inference (inverse operation) if self.inplace: p[..., :4] /= scale # de-scale if flips == 2: p[..., 1] = img_size[0] - p[..., 1] # de-flip ud elif flips == 3: p[..., 0] = img_size[1] - p[..., 0] # de-flip lr else: x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale if flips == 2: y = img_size[0] - y # de-flip ud elif flips == 3: x = img_size[1] - x # de-flip lr p = torch.cat((x, y, wh, p[..., 4:]), -1) return p def _clip_augmented(self, y): # Clip YOLO augmented inference tails nl = self.model[-1].nl # number of detection layers (P3-P5) g = sum(4 ** x for x in range(nl)) # grid points e = 1 # exclude layer count i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices y[0] = y[0][:, :-i] # large i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices y[-1] = y[-1][:, i:] # small return y Model = DetectionModel # retain YOLO 'Model' class for backwards compatibility class SegmentationModel(DetectionModel): # YOLO segmentation model def __init__(self, cfg='yolo-seg.yaml', ch=3, nc=None, anchors=None): super().__init__(cfg, ch, nc, anchors) class ClassificationModel(BaseModel): # YOLO classification model def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index super().__init__() self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg) def _from_detection_model(self, model, nc=1000, cutoff=10): # Create a YOLO classification model from a YOLO detection model if isinstance(model, DetectMultiBackend): model = model.model # unwrap DetectMultiBackend model.model = model.model[:cutoff] # backbone m = model.model[-1] # last layer ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module c = Classify(ch, nc) # Classify() c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type model.model[-1] = c # replace self.model = model.model self.stride = model.stride self.save = [] self.nc = nc def _from_yaml(self, cfg): # Create a YOLO classification model from a *.yaml file self.model = None def parse_model(d, ch): # model_dict, input_channels(3) # Parse a YOLO model.yaml dictionary LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation') if act: Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() RepConvN.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() LOGGER.info(f"{colorstr('activation:')} {act}") # print na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors no = na * (nc + 5) # number of outputs = anchors * (classes + 5) layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): with contextlib.suppress(NameError): args[j] = eval(a) if isinstance(a, str) else a # eval strings n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain if m in { Conv, AConv, ConvTranspose, Bottleneck, SPP, SPPF, DWConv, BottleneckCSP, nn.ConvTranspose2d, DWConvTranspose2d, SPPCSPC, ADown, ELAN1, RepNCSPELAN4, SPPELAN}: c1, c2 = ch[f], args[0] if c2 != no: # if not output c2 = make_divisible(c2 * gw, 8) args = [c1, c2, *args[1:]] if m in {BottleneckCSP, SPPCSPC}: args.insert(2, n) # number of repeats n = 1 elif m is nn.BatchNorm2d: args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) elif m is Shortcut: c2 = ch[f[0]] elif m is ReOrg: c2 = ch[f] * 4 elif m is CBLinear: c2 = args[0] c1 = ch[f] args = [c1, c2, *args[1:]] elif m is CBFuse: c2 = ch[f[-1]] # TODO: channel, gw, gd elif m in {Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment, DSegment, DualDSegment, Panoptic}: args.append([ch[x] for x in f]) # if isinstance(args[1], int): # number of anchors # args[1] = [list(range(args[1] * 2))] * len(f) if m in {Segment, DSegment, DualDSegment, Panoptic}: args[2] = make_divisible(args[2] * gw, 8) elif m is Contract: c2 = ch[f] * args[0] ** 2 elif m is Expand: c2 = ch[f] // args[0] ** 2 else: c2 = ch[f] m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type np = sum(x.numel() for x in m_.parameters()) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) if i == 0: ch = [] ch.append(c2) return nn.Sequential(*layers), sorted(save) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cfg', type=str, default='yolo.yaml', help='model.yaml') parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--profile', action='store_true', help='profile model speed') parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer') parser.add_argument('--test', action='store_true', help='test all yolo*.yaml') opt = parser.parse_args() opt.cfg = check_yaml(opt.cfg) # check YAML print_args(vars(opt)) device = select_device(opt.device) # Create model im = torch.rand(opt.batch_size, 3, 640, 640).to(device) model = Model(opt.cfg).to(device) model.eval() # Options if opt.line_profile: # profile layer by layer model(im, profile=True) elif opt.profile: # profile forward-backward results = profile(input=im, ops=[model], n=3) elif opt.test: # test all models for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'): try: _ = Model(cfg) except Exception as e: print(f'Error in {cfg}: {e}') else: # report fused model summary model.fuse() ================================================ FILE: panoptic/predict.py ================================================ import argparse import os import platform import sys from pathlib import Path import torch FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_boxes, scale_segments, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.segment.general import masks2segments, process_mask from utils.torch_utils import select_device, smart_inference_mode @smart_inference_mode() def run( weights=ROOT / 'yolo-pan.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam) data=ROOT / 'data/coco128.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/predict-seg', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride retina_masks=False, ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) screenshot = source.lower().startswith('screen') if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader bs = 1 # batch_size if webcam: view_img = check_imshow(warn=True) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) bs = len(dataset) elif screenshot: dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.from_numpy(im).to(model.device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim # Inference with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred, proto = model(im, augment=augment, visualize=visualize)[:2] # NMS with dt[2]: pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32) # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # rescale boxes to im0 size # Segments if save_txt: segments = reversed(masks2segments(masks)) segments = [scale_segments(im.shape[2:], x, im0.shape, normalize=True) for x in segments] # Print results for c in det[:, 5].unique(): n = (det[:, 5] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Mask plotting annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], im_gpu=None if retina_masks else im[i]) # Write results for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])): if save_txt: # Write to file segj = segments[j].reshape(-1) # (n,2) to (n*2) line = (cls, *segj, conf) if save_conf else (cls, *segj) # label format with open(f'{txt_path}.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) # annotator.draw.polygon(segments[j], outline=colors(c, True), width=3) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: if platform.system() == 'Linux' and p not in windows: windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # 1 millisecond exit() # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") # Print results t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo-pan.pt', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='show results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default=ROOT / 'runs/predict-seg', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') parser.add_argument('--retina-masks', action='store_true', help='whether to plot masks in native resolution') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) return opt def main(opt): check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: panoptic/train.py ================================================ import argparse import math import os import random import sys import time from copy import deepcopy from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import panoptic.val as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import SegmentationModel from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks from utils.downloads import attempt_download, is_url from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_git_info, check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, one_flat_cycle, print_args, print_mutation, strip_optimizer, yaml_save) from utils.loggers import GenericLogger from utils.plots import plot_evolve, plot_labels from utils.panoptic.dataloaders import create_dataloader from utils.panoptic.loss_tal import ComputeLoss from utils.panoptic.metrics import KEYS, fitness from utils.panoptic.plots import plot_images_and_masks, plot_results_with_masks from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) GIT_INFO = None#check_git_info() def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio # callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Hyperparameters if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: yaml_save(save_dir / 'hyp.yaml', hyp) yaml_save(save_dir / 'opt.yaml', vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: logger = GenericLogger(opt=opt, console_logger=LOGGER) # Config plots = not evolve and not opt.noplots # create plots overlap = not opt.no_overlap cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names #is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset is_coco = isinstance(val_path, str) and val_path.endswith('val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak model = SegmentationModel(cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: model = SegmentationModel(cfg, ch=3, nc=nc).to(device) # create amp = check_amp(model) # check AMP # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): #v.requires_grad = True # train all layers # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) logger.update_params({"batch_size": batch_size}) # loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler if opt.cos_lr: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] elif opt.flat_cos_lr: lf = one_flat_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] elif opt.fixed_lr: lf = lambda x: 1.0 elif opt.poly_lr: power = 0.9 lf = lambda x: ((1 - (x / epochs)) ** power) * (1.0 - hyp['lrf']) + hyp['lrf'] else: lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume best_fitness, start_epoch = 0.0, 0 if pretrained: if resume: best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader( train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if opt.cache == 'val' else opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, close_mosaic=opt.close_mosaic != 0, quad=opt.quad, prefix=colorstr('train: '), shuffle=True, mask_downsample_ratio=mask_ratio, overlap_mask=overlap, ) labels = np.concatenate(dataset.labels, 0) mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in {-1, 0}: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5, mask_downsample_ratio=mask_ratio, overlap_mask=overlap, prefix=colorstr('val: '))[0] if not resume: #if not opt.noautoanchor: # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision if plots: plot_labels(labels, names, save_dir) # callbacks.run('on_pretrain_routine_end', labels, names) # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) #hyp['box'] *= 3 / nl # scale to layers #hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers #hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nb = len(train_loader) # number of batches nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model, overlap=overlap) # init loss class # callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ # callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx if epoch == (epochs - opt.close_mosaic): LOGGER.info("Closing dataloader mosaic") dataset.mosaic = False # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(6, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%11s' * 10) % ('Epoch', 'GPU_mem', 'box_loss', 'seg_loss', 'cls_loss', 'dfl_loss', 'fcl_loss', 'dic_loss', 'Instances', 'Size')) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _, masks, semasks) in pbar: # batch ------------------------------------------------------ # callbacks.run('on_train_batch_start') #print(imgs.shape) #print(semasks.shape) #print(masks.shape) ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float(), semasks=semasks.to(device).float()) if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward torch.use_deterministic_algorithms(False) scaler.scale(loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%11s' * 2 + '%11.4g' * 8) % (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) # callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths) # if callbacks.stop_training: # return # Mosaic plots if plots: if ni < 10: plot_images_and_masks(imgs, targets, masks, semasks, paths, save_dir / f"train_batch{ni}.jpg") if ni == 10: files = sorted(save_dir.glob('train*.jpg')) logger.log_images(files, "Mosaics", epoch) # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in {-1, 0}: # mAP # callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP if (opt.save_period > 0 and epoch % opt.save_period == 0) or (epoch > (epochs - 2 * opt.close_mosaic)): results, maps, _ = validate.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, half=amp, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr # callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Log val metrics and media metrics_dict = dict(zip(KEYS, log_vals)) logger.log_metrics(metrics_dict, epoch) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'opt': vars(opt), 'git': GIT_INFO, # {remote, branch, commit} if a git repo 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f'epoch{epoch}.pt') logger.log_model(w / f'epoch{epoch}.pt') del ckpt # callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training broadcast_list = [stop if RANK == 0 else None] dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks if RANK != 0: stop = broadcast_list[0] if stop: break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in {-1, 0}: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65 single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # val best model with plots if is_coco: # callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr)) logger.log_metrics(metrics_dict, epoch) # callbacks.run('on_train_end', last, best, epoch, results) # on train end callback using genericLogger logger.log_metrics(dict(zip(KEYS[6:22], results)), epochs) if not opt.evolve: logger.log_model(best, epoch) if plots: plot_results_with_masks(file=save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") logger.log_images(files, "Results", epoch + 1) logger.log_images(sorted(save_dir.glob('val*.jpg')), "Validation", epoch + 1) torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolo-pan.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=100, help='total training epochs') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noplots', action='store_true', help='save no plot files') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW', 'LION'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train-pan', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--flat-cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--fixed-lr', action='store_true', help='fixed LR scheduler') parser.add_argument('--poly-lr', action='store_true', help='fixed LR scheduler') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') parser.add_argument('--close-mosaic', type=int, default=0, help='Experimental') # Instance Segmentation Args parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the truth masks to saving memory') parser.add_argument('--no-overlap', action='store_true', help='Overlap masks train faster at slightly less mAP') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) #check_git_status() #check_requirements() # Resume if opt.resume and not opt.evolve: # resume from specified or most recent last.pt last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors='ignore') as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location='cpu')['opt'] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = 'is not compatible with YOLO Multi-GPU DDP training' assert not opt.image_weights, f'--image-weights {msg}' assert not opt.evolve, f'--evolve {msg}' assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0), # image mixup (probability) 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) with open(opt.hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 if opt.noautoanchor: del hyp['anchors'], meta['anchors'] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' if opt.bucket: os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists for _ in range(opt.evolve): # generations to evolve if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results print_mutation(KEYS, results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolve(evolve_csv) LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' f"Results saved to {colorstr('bold', save_dir)}\n" f'Usage example: $ python train.py --hyp {evolve_yaml}') def run(**kwargs): # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolo.pt') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: panoptic/val.py ================================================ import argparse import json import os import sys from multiprocessing.pool import ThreadPool from pathlib import Path import numpy as np import torch from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import torch.nn.functional as F import torchvision.transforms as transforms from pycocotools import mask as maskUtils from models.common import DetectMultiBackend from models.yolo import SegmentationModel from utils.callbacks import Callbacks from utils.coco_utils import getCocoIds, getMappingId, getMappingIndex from utils.general import (LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_boxes, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou from utils.plots import output_to_target, plot_val_study from utils.panoptic.dataloaders import create_dataloader from utils.panoptic.general import mask_iou, process_mask, process_mask_upsample, scale_image from utils.panoptic.metrics import Metrics, ap_per_class_box_and_mask, Semantic_Metrics from utils.panoptic.plots import plot_images_and_masks from utils.torch_utils import de_parallel, select_device, smart_inference_mode def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') def save_one_json(predn, jdict, path, class_map, pred_masks): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} from pycocotools.mask import encode def single_encode(x): rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] rle["counts"] = rle["counts"].decode("utf-8") return rle image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner pred_masks = np.transpose(pred_masks, (2, 0, 1)) with ThreadPool(NUM_THREADS) as pool: rles = pool.map(single_encode, pred_masks) for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): jdict.append({ 'image_id': image_id, 'category_id': class_map[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5), 'segmentation': rles[i]}) def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False): """ Return correct prediction matrix Arguments: detections (array[N, 6]), x1, y1, x2, y2, conf, class labels (array[M, 5]), class, x1, y1, x2, y2 Returns: correct (array[N, 10]), for 10 IoU levels """ if masks: if overlap: nl = len(labels) index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640) gt_masks = torch.where(gt_masks == index, 1.0, 0.0) if gt_masks.shape[1:] != pred_masks.shape[1:]: gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0] gt_masks = gt_masks.gt_(0.5) iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)) else: # boxes iou = box_iou(labels[:, 1:], detections[:, :4]) correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(iouv)): x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold max_det=300, # maximum detections per image task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file project=ROOT / 'runs/val-pan', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference model=None, dataloader=None, save_dir=Path(''), plots=True, overlap=False, mask_downsample_ratio=1, compute_loss=None, callbacks=Callbacks(), ): if save_json: check_requirements(['pycocotools']) process = process_mask_upsample # more accurate else: process = process_mask # faster # Initialize/load model and set device training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() nm = de_parallel(model).model[-1].nm # number of masks else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32 # number of masks if engine: batch_size = model.batch_size else: device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') # Data data = check_dataset(data) # check # Configure model.eval() cuda = device.type != 'cpu' #is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes stuff_names = data.get('stuff_names', []) # names of stuff classes stuff_nc = len(stuff_names) # number of stuff classes iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() # Semantic Segmentation img_id_list = [] # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data ncm = model.model.nc assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad, rect = (0.0, False) if task == 'speed' else (0.5, pt) # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=rect, workers=workers, prefix=colorstr(f'{task}: '), overlap_mask=overlap, mask_downsample_ratio=mask_downsample_ratio)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = model.names if hasattr(model, 'names') else model.module.names # get class names if isinstance(names, (list, tuple)): # old format names = dict(enumerate(names)) class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%22s' + '%11s' * 12) % ('Class', 'Images', 'Instances', 'Box(P', "R", "mAP50", "mAP50-95)", "Mask(P", "R", "mAP50", "mAP50-95)", 'S(MIoU', 'FWIoU)') dt = Profile(), Profile(), Profile() metrics = Metrics() semantic_metrics = Semantic_Metrics(nc = (nc + stuff_nc), device = device) loss = torch.zeros(6, device=device) jdict, stats = [], [] semantic_jdict = [] # callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar for batch_i, (im, targets, paths, shapes, masks, semasks) in enumerate(pbar): # callbacks.run('on_val_batch_start') with dt[0]: if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) masks = masks.to(device) semasks = semasks.to(device) masks = masks.float() semasks = semasks.float() im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width # Inference with dt[1]: preds, train_out = model(im)# if compute_loss else (*model(im, augment=augment)[:2], None) #train_out, preds, protos = p if len(p) == 3 else p[1] #preds = p #train_out = p[1][0] if len(p[1]) == 3 else p[0] # protos = train_out[-1] #print(preds.shape) #print(train_out[0].shape) #print(train_out[1].shape) #print(train_out[2].shape) _, pred_masks, protos, psemasks = train_out # Loss if compute_loss: loss += compute_loss(train_out, targets, masks, semasks = semasks)[1] # box, obj, cls # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: preds = non_max_suppression(preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det, nm=nm) # Metrics plot_masks = [] # masks for plotting plot_semasks = [] # masks for plotting if training: semantic_metrics.update(psemasks, semasks) else: _, _, smh, smw = semasks.shape semantic_metrics.update(torch.nn.functional.interpolate(psemasks, size = (smh, smw), mode = 'bilinear', align_corners = False), semasks) if plots and batch_i < 3: plot_semasks.append(psemasks.clone().detach().cpu()) for si, (pred, proto, psemask) in enumerate(zip(preds, protos, psemasks)): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] image_id = path.stem img_id_list.append(image_id) correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init seen += 1 if npr == 0: if nl: stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0])) if plots: confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) else: # Masks midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]) # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct_bboxes = process_batch(predn, labelsn, iouv) correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True) if plots: confusion_matrix.process_batch(predn, labelsn) stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (conf, pcls, tcls) pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) if plots and batch_i < 3: plot_masks.append(pred_masks[:15].cpu()) # filter top 15 to plot # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') if save_json: pred_masks = scale_image(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary # callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Semantic Segmentation h0, w0 = shape # resize _, mask_h, mask_w = psemask.shape h_ratio = mask_h / h0 w_ratio = mask_w / w0 if h_ratio == w_ratio: psemask = torch.nn.functional.interpolate(psemask[None, :], size = (h0, w0), mode = 'bilinear', align_corners = False) else: transform = transforms.CenterCrop((h0, w0)) if (1 != h_ratio) and (1 != w_ratio): h_new = h0 if (h_ratio < w_ratio) else int(mask_h / w_ratio) w_new = w0 if (h_ratio > w_ratio) else int(mask_w / h_ratio) psemask = torch.nn.functional.interpolate(psemask[None, :], size = (h_new, w_new), mode = 'bilinear', align_corners = False) psemask = transform(psemask) psemask = torch.squeeze(psemask) nc, h, w = psemask.shape semantic_mask = torch.flatten(psemask, start_dim = 1).permute(1, 0) # class x h x w -> (h x w) x class max_idx = semantic_mask.argmax(1) output_masks = torch.zeros(semantic_mask.shape).scatter(1, max_idx.cpu().unsqueeze(1), 1.0) # one hot: (h x w) x class output_masks = torch.reshape(output_masks.permute(1, 0), (nc, h, w)) # (h x w) x class -> class x h x w psemask = output_masks.to(device = device) # TODO: check is_coco instances_ids = getCocoIds(name = 'instances') stuff_mask = torch.zeros((h, w), device = device) check_semantic_mask = False for idx, pred_semantic_mask in enumerate(psemask): category_id = int(getMappingId(idx)) if 183 == category_id: # set all non-stuff pixels to other pred_semantic_mask = (torch.logical_xor(stuff_mask, torch.ones((h, w), device = device))).int() # ignore the classes which all zeros / unlabeled class if (0 >= torch.max(pred_semantic_mask)) or (0 >= category_id): continue if category_id not in instances_ids: # record all stuff mask stuff_mask = torch.logical_or(stuff_mask, pred_semantic_mask) if (category_id not in instances_ids): rle = maskUtils.encode(np.asfortranarray(pred_semantic_mask.cpu(), dtype = np.uint8)) rle['counts'] = rle['counts'].decode('utf-8') temp_d = { 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': category_id, 'segmentation': rle, 'score': 1 } semantic_jdict.append(temp_d) check_semantic_mask = True if not check_semantic_mask: # append a other mask for evaluation if the image without any mask other_mask = (torch.ones((h, w), device = device)).int() rle = maskUtils.encode(np.asfortranarray(other_mask.cpu(), dtype = np.uint8)) rle['counts'] = rle['counts'].decode('utf-8') temp_d = { 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': 183, 'segmentation': rle, 'score': 1 } semantic_jdict.append(temp_d) # Plot images if plots and batch_i < 3: if len(plot_masks): plot_masks = torch.cat(plot_masks, dim=0) if len(plot_semasks): plot_semasks = torch.cat(plot_semasks, dim = 0) plot_images_and_masks(im, targets, masks, semasks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) plot_images_and_masks(im, output_to_target(preds, max_det=15), plot_masks, plot_semasks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred # callbacks.run('on_val_batch_end') # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names) metrics.update(results) nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class # Print results pf = '%22s' + '%11i' * 2 + '%11.3g' * 10 # print format LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results(), *semantic_metrics.results())) if nt.sum() == 0: LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels') # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(metrics.ap_class_index): LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i), *semantic_metrics.results())) # Print speeds t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) # callbacks.run('on_val_end') mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results() miou_sem, fwiou_sem = semantic_metrics.results() semantic_metrics.reset() # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_path = Path(data.get('path', '../coco')) anno_json = str(anno_path / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) semantic_anno_json = str(anno_path / 'annotations/stuff_val2017.json') # annotations json semantic_pred_json = str(save_dir / f"{w}_predictions_stuff.json") # predictions json LOGGER.info(f'\nsaving {semantic_pred_json}...') with open(semantic_pred_json, 'w') as f: json.dump(semantic_jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api results = [] for eval in COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm'): if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # img ID to evaluate eval.evaluate() eval.accumulate() eval.summarize() results.extend(eval.stats[:2]) # update results (mAP@0.5:0.95, mAP@0.5) map_bbox, map50_bbox, map_mask, map50_mask = results # Semantic Segmentation from utils.stuff_seg.cocostuffeval import COCOStuffeval LOGGER.info(f'\nEvaluating pycocotools stuff... ') imgIds = [int(x) for x in img_id_list] stuffGt = COCO(semantic_anno_json) # initialize COCO ground truth api stuffDt = stuffGt.loadRes(semantic_pred_json) # initialize COCO pred api cocoStuffEval = COCOStuffeval(stuffGt, stuffDt) cocoStuffEval.params.imgIds = imgIds # image IDs to evaluate cocoStuffEval.evaluate() stats, statsClass = cocoStuffEval.summarize() stuffIds = getCocoIds(name = 'stuff') title = ' {:<5} | {:^6} | {:^6} '.format('class', 'iou', 'macc') if (0 >= len(stuff_names)) else \ ' {:<5} | {:<20} | {:^6} | {:^6} '.format('class', 'class name', 'iou', 'macc') print(title) for idx, (iou, macc) in enumerate(zip(statsClass['ious'], statsClass['maccs'])): id = (idx + 1) if id not in stuffIds: continue content = ' {:<5} | {:0.4f} | {:0.4f} '.format(str(id), iou, macc) if (0 >= len(stuff_names)) else \ ' {:<5} | {:<20} | {:0.4f} | {:0.4f} '.format(str(id), str(stuff_names[getMappingIndex(id, name = 'stuff')]), iou, macc) print(content) except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask, miou_sem, fwiou_sem return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-pan.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo-pan.pt', help='model path(s)') parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--verbose', action='store_true', help='report mAP by class') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') parser.add_argument('--project', default=ROOT / 'runs/val-pan', help='save results to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML # opt.save_json |= opt.data.endswith('coco.yaml') opt.save_txt |= opt.save_hybrid print_args(vars(opt)) return opt def main(opt): #check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop')) if opt.task in ('train', 'val', 'test'): # run normally if opt.conf_thres > 0.001: # https://github.com/ LOGGER.warning(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results') if opt.save_hybrid: LOGGER.warning('WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone') run(**vars(opt)) else: weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] opt.half = torch.cuda.is_available() and opt.device != 'cpu' # FP16 for fastest results if opt.task == 'speed': # speed benchmarks # python val.py --task speed --data coco.yaml --batch 1 --weights yolo.pt... opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False for opt.weights in weights: run(**vars(opt), plots=False) elif opt.task == 'study': # speed vs mAP benchmarks # python val.py --task study --data coco.yaml --iou 0.7 --weights yolo.pt... for opt.weights in weights: f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis for opt.imgsz in x: # img-size LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') r, _, t = run(**vars(opt), plots=False) y.append(r + t) # results and times np.savetxt(f, y, fmt='%10.4g') # save os.system('zip -r study.zip study_*.txt') plot_val_study(x=x) # plot if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: requirements.txt ================================================ # requirements # Usage: pip install -r requirements.txt # Base ------------------------------------------------------------------------ gitpython ipython matplotlib>=3.2.2 numpy>=1.18.5 opencv-python>=4.1.1 Pillow>=7.1.2 psutil PyYAML>=5.3.1 requests>=2.23.0 scipy>=1.4.1 thop>=0.1.1 torch>=1.7.0 torchvision>=0.8.1 tqdm>=4.64.0 # protobuf<=3.20.1 # Logging --------------------------------------------------------------------- tensorboard>=2.4.1 # clearml>=1.2.0 # comet # Plotting -------------------------------------------------------------------- pandas>=1.1.4 seaborn>=0.11.0 # Export ---------------------------------------------------------------------- # coremltools>=6.0 # onnx>=1.9.0 # onnx-simplifier>=0.4.1 # nvidia-pyindex # nvidia-tensorrt # scikit-learn<=1.1.2 # tensorflow>=2.4.1 # tensorflowjs>=3.9.0 # openvino-dev # Deploy ---------------------------------------------------------------------- # tritonclient[all]~=2.24.0 # Extras ---------------------------------------------------------------------- # mss albumentations>=1.0.3 pycocotools>=2.0 ================================================ FILE: scripts/get_coco.sh ================================================ #!/bin/bash # COCO 2017 dataset http://cocodataset.org # Download command: bash ./scripts/get_coco.sh # Download/unzip labels d='./' # unzip directory url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ f='coco2017labels-segments.zip' # or 'coco2017labels.zip', 68 MB echo 'Downloading' $url$f ' ...' curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background # Download/unzip images d='./coco/images' # unzip directory url=http://images.cocodataset.org/zips/ f1='train2017.zip' # 19G, 118k images f2='val2017.zip' # 1G, 5k images f3='test2017.zip' # 7G, 41k images (optional) for f in $f1 $f2 $f3; do echo 'Downloading' $url$f '...' curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background done wait # finish background tasks ================================================ FILE: segment/predict.py ================================================ import argparse import os import platform import sys from pathlib import Path import torch FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_boxes, scale_segments, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.segment.general import masks2segments, process_mask from utils.torch_utils import select_device, smart_inference_mode @smart_inference_mode() def run( weights=ROOT / 'yolo-seg.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam) data=ROOT / 'data/coco.yaml', # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/predict-seg', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride retina_masks=False, ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) screenshot = source.lower().startswith('screen') if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader bs = 1 # batch_size if webcam: view_img = check_imshow(warn=True) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) bs = len(dataset) elif screenshot: dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs # Run inference model.warmup(imgsz=(1 if pt else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.from_numpy(im).to(model.device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim # Inference with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred, proto = model(im, augment=augment, visualize=visualize)[:2] # NMS with dt[2]: pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det, nm=32) # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f'{i}: ' else: p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt s += '%gx%g ' % im.shape[2:] # print string imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): masks = process_mask(proto[i], det[:, 6:], det[:, :4], im.shape[2:], upsample=True) # HWC det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # rescale boxes to im0 size # Segments if save_txt: segments = reversed(masks2segments(masks)) segments = [scale_segments(im.shape[2:], x, im0.shape, normalize=True) for x in segments] # Print results for c in det[:, 5].unique(): n = (det[:, 5] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Mask plotting annotator.masks(masks, colors=[colors(x, True) for x in det[:, 5]], im_gpu=None if retina_masks else im[i]) # Write results for j, (*xyxy, conf, cls) in enumerate(reversed(det[:, :6])): if save_txt: # Write to file segj = segments[j].reshape(-1) # (n,2) to (n*2) line = (cls, *segj, conf) if save_conf else (cls, *segj) # label format with open(f'{txt_path}.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) # annotator.draw.polygon(segments[j], outline=colors(c, True), width=3) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Stream results im0 = annotator.result() if view_img: if platform.system() == 'Linux' and p not in windows: windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # 1 millisecond exit() # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") # Print results t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo-seg.pt', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='show results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default=ROOT / 'runs/predict-seg', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') parser.add_argument('--retina-masks', action='store_true', help='whether to plot masks in native resolution') opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) return opt def main(opt): check_requirements(exclude=('tensorboard', 'thop')) run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: segment/train.py ================================================ import argparse import math import os import random import sys import time from copy import deepcopy from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import segment.val as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import SegmentationModel from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks from utils.downloads import attempt_download, is_url from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_git_info, check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer, yaml_save) from utils.loggers import GenericLogger from utils.plots import plot_evolve, plot_labels from utils.segment.dataloaders import create_dataloader from utils.segment.loss_tal import ComputeLoss from utils.segment.metrics import KEYS, fitness from utils.segment.plots import plot_images_and_masks, plot_results_with_masks from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) GIT_INFO = None#check_git_info() def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio # callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Hyperparameters if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: yaml_save(save_dir / 'hyp.yaml', hyp) yaml_save(save_dir / 'opt.yaml', vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: logger = GenericLogger(opt=opt, console_logger=LOGGER) # Config plots = not evolve and not opt.noplots # create plots overlap = not opt.no_overlap cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names #is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset is_coco = isinstance(val_path, str) and val_path.endswith('val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak model = SegmentationModel(cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: model = SegmentationModel(cfg, ch=3, nc=nc).to(device) # create amp = check_amp(model) # check AMP # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): #v.requires_grad = True # train all layers # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) logger.update_params({"batch_size": batch_size}) # loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler if opt.cos_lr: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] else: lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume best_fitness, start_epoch = 0.0, 0 if pretrained: if resume: best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader( train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if opt.cache == 'val' else opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, close_mosaic=opt.close_mosaic != 0, quad=opt.quad, prefix=colorstr('train: '), shuffle=True, mask_downsample_ratio=mask_ratio, overlap_mask=overlap, ) labels = np.concatenate(dataset.labels, 0) mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in {-1, 0}: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5, mask_downsample_ratio=mask_ratio, overlap_mask=overlap, prefix=colorstr('val: '))[0] if not resume: #if not opt.noautoanchor: # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision if plots: plot_labels(labels, names, save_dir) # callbacks.run('on_pretrain_routine_end', labels, names) # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) #hyp['box'] *= 3 / nl # scale to layers #hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers #hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nb = len(train_loader) # number of batches nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model, overlap=overlap) # init loss class # callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ # callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx if epoch == (epochs - opt.close_mosaic): LOGGER.info("Closing dataloader mosaic") dataset.mosaic = False # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%11s' * 8) % ('Epoch', 'GPU_mem', 'box_loss', 'seg_loss', 'cls_loss', 'dfl_loss', 'Instances', 'Size')) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------ # callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float()) if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%11s' * 2 + '%11.4g' * 6) % (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) # callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths) # if callbacks.stop_training: # return # Mosaic plots if plots: if ni < 3: plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg") if ni == 10: files = sorted(save_dir.glob('train*.jpg')) logger.log_images(files, "Mosaics", epoch) # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in {-1, 0}: # mAP # callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP results, maps, _ = validate.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, half=amp, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr # callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Log val metrics and media metrics_dict = dict(zip(KEYS, log_vals)) logger.log_metrics(metrics_dict, epoch) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'opt': vars(opt), 'git': GIT_INFO, # {remote, branch, commit} if a git repo 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f'epoch{epoch}.pt') logger.log_model(w / f'epoch{epoch}.pt') del ckpt # callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training broadcast_list = [stop if RANK == 0 else None] dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks if RANK != 0: stop = broadcast_list[0] if stop: break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in {-1, 0}: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65 single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # val best model with plots if is_coco: # callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr)) logger.log_metrics(metrics_dict, epoch) # callbacks.run('on_train_end', last, best, epoch, results) # on train end callback using genericLogger logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs) if not opt.evolve: logger.log_model(best, epoch) if plots: plot_results_with_masks(file=save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") logger.log_images(files, "Results", epoch + 1) logger.log_images(sorted(save_dir.glob('val*.jpg')), "Validation", epoch + 1) torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolo-seg.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=100, help='total training epochs') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noplots', action='store_true', help='save no plot files') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW', 'LION'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train-seg', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') parser.add_argument('--close-mosaic', type=int, default=0, help='Experimental') # Instance Segmentation Args parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the truth masks to saving memory') parser.add_argument('--no-overlap', action='store_true', help='Overlap masks train faster at slightly less mAP') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) #check_git_status() #check_requirements() # Resume if opt.resume and not opt.evolve: # resume from specified or most recent last.pt last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors='ignore') as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location='cpu')['opt'] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = 'is not compatible with YOLO Multi-GPU DDP training' assert not opt.image_weights, f'--image-weights {msg}' assert not opt.evolve, f'--evolve {msg}' assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0), # image mixup (probability) 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) with open(opt.hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 if opt.noautoanchor: del hyp['anchors'], meta['anchors'] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' if opt.bucket: os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists for _ in range(opt.evolve): # generations to evolve if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results print_mutation(KEYS, results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolve(evolve_csv) LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' f"Results saved to {colorstr('bold', save_dir)}\n" f'Usage example: $ python train.py --hyp {evolve_yaml}') def run(**kwargs): # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolo.pt') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: segment/train_dual.py ================================================ import argparse import math import os import random import sys import time from copy import deepcopy from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import segment.val_dual as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import SegmentationModel from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks from utils.downloads import attempt_download, is_url from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_git_info, check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, one_cycle, print_args, print_mutation, strip_optimizer, yaml_save) from utils.loggers import GenericLogger from utils.plots import plot_evolve, plot_labels from utils.segment.dataloaders import create_dataloader from utils.segment.loss_tal_dual import ComputeLoss #from utils.segment.loss_tal_dual import ComputeLossLH as ComputeLoss from utils.segment.metrics import KEYS, fitness from utils.segment.plots import plot_images_and_masks, plot_results_with_masks from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) GIT_INFO = None#check_git_info() def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze, mask_ratio = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze, opt.mask_ratio # callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Hyperparameters if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: yaml_save(save_dir / 'hyp.yaml', hyp) yaml_save(save_dir / 'opt.yaml', vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: logger = GenericLogger(opt=opt, console_logger=LOGGER) # Config plots = not evolve and not opt.noplots # create plots overlap = not opt.no_overlap cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names #is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset is_coco = isinstance(val_path, str) and val_path.endswith('val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak model = SegmentationModel(cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: model = SegmentationModel(cfg, ch=3, nc=nc).to(device) # create amp = check_amp(model) # check AMP # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): #v.requires_grad = True # train all layers # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) logger.update_params({"batch_size": batch_size}) # loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler if opt.cos_lr: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] else: lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume best_fitness, start_epoch = 0.0, 0 if pretrained: if resume: best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader( train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if opt.cache == 'val' else opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, close_mosaic=opt.close_mosaic != 0, quad=opt.quad, prefix=colorstr('train: '), shuffle=True, mask_downsample_ratio=mask_ratio, overlap_mask=overlap, ) labels = np.concatenate(dataset.labels, 0) mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in {-1, 0}: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5, mask_downsample_ratio=mask_ratio, overlap_mask=overlap, prefix=colorstr('val: '))[0] if not resume: #if not opt.noautoanchor: # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision if plots: plot_labels(labels, names, save_dir) # callbacks.run('on_pretrain_routine_end', labels, names) # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) #hyp['box'] *= 3 / nl # scale to layers #hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers #hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nb = len(train_loader) # number of batches nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model, overlap=overlap) # init loss class # callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ # callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx if epoch == (epochs - opt.close_mosaic): LOGGER.info("Closing dataloader mosaic") dataset.mosaic = False # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%11s' * 8) % ('Epoch', 'GPU_mem', 'box_loss', 'seg_loss', 'cls_loss', 'dfl_loss', 'Instances', 'Size')) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _, masks) in pbar: # batch ------------------------------------------------------ # callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device), masks=masks.to(device).float()) if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%11s' * 2 + '%11.4g' * 6) % (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) # callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths) # if callbacks.stop_training: # return # Mosaic plots if plots: if ni < 3: plot_images_and_masks(imgs, targets, masks, paths, save_dir / f"train_batch{ni}.jpg") if ni == 10: files = sorted(save_dir.glob('train*.jpg')) logger.log_images(files, "Mosaics", epoch) # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in {-1, 0}: # mAP # callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP results, maps, _ = validate.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, half=amp, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr # callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Log val metrics and media metrics_dict = dict(zip(KEYS, log_vals)) logger.log_metrics(metrics_dict, epoch) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'opt': vars(opt), 'git': GIT_INFO, # {remote, branch, commit} if a git repo 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f'epoch{epoch}.pt') logger.log_model(w / f'epoch{epoch}.pt') del ckpt # callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training broadcast_list = [stop if RANK == 0 else None] dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks if RANK != 0: stop = broadcast_list[0] if stop: break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in {-1, 0}: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), iou_thres=0.65 if is_coco else 0.60, # best pycocotools at iou 0.65 single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, callbacks=callbacks, compute_loss=compute_loss, mask_downsample_ratio=mask_ratio, overlap=overlap) # val best model with plots if is_coco: # callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) metrics_dict = dict(zip(KEYS, list(mloss) + list(results) + lr)) logger.log_metrics(metrics_dict, epoch) # callbacks.run('on_train_end', last, best, epoch, results) # on train end callback using genericLogger logger.log_metrics(dict(zip(KEYS[4:16], results)), epochs) if not opt.evolve: logger.log_model(best, epoch) if plots: plot_results_with_masks(file=save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] files = [(save_dir / f) for f in files if (save_dir / f).exists()] # filter LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") logger.log_images(files, "Results", epoch + 1) logger.log_images(sorted(save_dir.glob('val*.jpg')), "Validation", epoch + 1) torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolo-seg.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=100, help='total training epochs') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noplots', action='store_true', help='save no plot files') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW', 'LION'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train-seg', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') parser.add_argument('--close-mosaic', type=int, default=0, help='Experimental') # Instance Segmentation Args parser.add_argument('--mask-ratio', type=int, default=4, help='Downsample the truth masks to saving memory') parser.add_argument('--no-overlap', action='store_true', help='Overlap masks train faster at slightly less mAP') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) #check_git_status() #check_requirements() # Resume if opt.resume and not opt.evolve: # resume from specified or most recent last.pt last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors='ignore') as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location='cpu')['opt'] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = 'is not compatible with YOLO Multi-GPU DDP training' assert not opt.image_weights, f'--image-weights {msg}' assert not opt.evolve, f'--evolve {msg}' assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0), # image mixup (probability) 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) with open(opt.hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 if opt.noautoanchor: del hyp['anchors'], meta['anchors'] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' if opt.bucket: os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists for _ in range(opt.evolve): # generations to evolve if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results print_mutation(KEYS, results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolve(evolve_csv) LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' f"Results saved to {colorstr('bold', save_dir)}\n" f'Usage example: $ python train.py --hyp {evolve_yaml}') def run(**kwargs): # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolo.pt') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: segment/val.py ================================================ import argparse import json import os import sys from multiprocessing.pool import ThreadPool from pathlib import Path import numpy as np import torch from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import torch.nn.functional as F from models.common import DetectMultiBackend from models.yolo import SegmentationModel from utils.callbacks import Callbacks from utils.general import (LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_boxes, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou from utils.plots import output_to_target, plot_val_study from utils.segment.dataloaders import create_dataloader from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_image from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import plot_images_and_masks from utils.torch_utils import de_parallel, select_device, smart_inference_mode def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') def save_one_json(predn, jdict, path, class_map, pred_masks): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} from pycocotools.mask import encode def single_encode(x): rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] rle["counts"] = rle["counts"].decode("utf-8") return rle image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner pred_masks = np.transpose(pred_masks, (2, 0, 1)) with ThreadPool(NUM_THREADS) as pool: rles = pool.map(single_encode, pred_masks) for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): jdict.append({ 'image_id': image_id, 'category_id': class_map[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5), 'segmentation': rles[i]}) def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False): """ Return correct prediction matrix Arguments: detections (array[N, 6]), x1, y1, x2, y2, conf, class labels (array[M, 5]), class, x1, y1, x2, y2 Returns: correct (array[N, 10]), for 10 IoU levels """ if masks: if overlap: nl = len(labels) index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640) gt_masks = torch.where(gt_masks == index, 1.0, 0.0) if gt_masks.shape[1:] != pred_masks.shape[1:]: gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0] gt_masks = gt_masks.gt_(0.5) iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)) else: # boxes iou = box_iou(labels[:, 1:], detections[:, :4]) correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(iouv)): x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold max_det=300, # maximum detections per image task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file project=ROOT / 'runs/val-seg', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference model=None, dataloader=None, save_dir=Path(''), plots=True, overlap=False, mask_downsample_ratio=1, compute_loss=None, callbacks=Callbacks(), ): if save_json: check_requirements(['pycocotools']) process = process_mask_upsample # more accurate else: process = process_mask # faster # Initialize/load model and set device training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() nm = de_parallel(model).model[-1].nm # number of masks else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32 # number of masks if engine: batch_size = model.batch_size else: device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') # Data data = check_dataset(data) # check # Configure model.eval() cuda = device.type != 'cpu' #is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data ncm = model.model.nc assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad, rect = (0.0, False) if task == 'speed' else (0.5, pt) # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=rect, workers=workers, prefix=colorstr(f'{task}: '), overlap_mask=overlap, mask_downsample_ratio=mask_downsample_ratio)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = model.names if hasattr(model, 'names') else model.module.names # get class names if isinstance(names, (list, tuple)): # old format names = dict(enumerate(names)) class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', "R", "mAP50", "mAP50-95)", "Mask(P", "R", "mAP50", "mAP50-95)") dt = Profile(), Profile(), Profile() metrics = Metrics() loss = torch.zeros(4, device=device) jdict, stats = [], [] # callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar): # callbacks.run('on_val_batch_start') with dt[0]: if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) masks = masks.to(device) masks = masks.float() im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width # Inference with dt[1]: preds, train_out = model(im)# if compute_loss else (*model(im, augment=augment)[:2], None) #train_out, preds, protos = p if len(p) == 3 else p[1] #preds = p #train_out = p[1][0] if len(p[1]) == 3 else p[0] protos = train_out[-1] #print(preds.shape) #print(train_out[0].shape) #print(train_out[1].shape) #print(train_out[2].shape) # Loss if compute_loss: loss += compute_loss(train_out, targets, masks)[1] # box, obj, cls # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: preds = non_max_suppression(preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det, nm=nm) # Metrics plot_masks = [] # masks for plotting for si, (pred, proto) in enumerate(zip(preds, protos)): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init seen += 1 if npr == 0: if nl: stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0])) if plots: confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) continue # Masks midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]) # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct_bboxes = process_batch(predn, labelsn, iouv) correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True) if plots: confusion_matrix.process_batch(predn, labelsn) stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (conf, pcls, tcls) pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) if plots and batch_i < 3: plot_masks.append(pred_masks[:15].cpu()) # filter top 15 to plot # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') if save_json: pred_masks = scale_image(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary # callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: if len(plot_masks): plot_masks = torch.cat(plot_masks, dim=0) plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) plot_images_and_masks(im, output_to_target(preds, max_det=15), plot_masks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred # callbacks.run('on_val_batch_end') # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names) metrics.update(results) nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class # Print results pf = '%22s' + '%11i' * 2 + '%11.3g' * 8 # print format LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results())) if nt.sum() == 0: LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels') # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(metrics.ap_class_index): LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i))) # Print speeds t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) # callbacks.run('on_val_end') mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results() # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api results = [] for eval in COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm'): if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # img ID to evaluate eval.evaluate() eval.accumulate() eval.summarize() results.extend(eval.stats[:2]) # update results (mAP@0.5:0.95, mAP@0.5) map_bbox, map50_bbox, map_mask, map50_mask = results except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo-seg.pt', help='model path(s)') parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--verbose', action='store_true', help='report mAP by class') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') parser.add_argument('--project', default=ROOT / 'runs/val-seg', help='save results to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML # opt.save_json |= opt.data.endswith('coco.yaml') opt.save_txt |= opt.save_hybrid print_args(vars(opt)) return opt def main(opt): #check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop')) if opt.task in ('train', 'val', 'test'): # run normally if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 LOGGER.warning(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results') if opt.save_hybrid: LOGGER.warning('WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone') run(**vars(opt)) else: weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] opt.half = torch.cuda.is_available() and opt.device != 'cpu' # FP16 for fastest results if opt.task == 'speed': # speed benchmarks # python val.py --task speed --data coco.yaml --batch 1 --weights yolo.pt... opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False for opt.weights in weights: run(**vars(opt), plots=False) elif opt.task == 'study': # speed vs mAP benchmarks # python val.py --task study --data coco.yaml --iou 0.7 --weights yolo.pt... for opt.weights in weights: f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis for opt.imgsz in x: # img-size LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') r, _, t = run(**vars(opt), plots=False) y.append(r + t) # results and times np.savetxt(f, y, fmt='%10.4g') # save os.system('zip -r study.zip study_*.txt') plot_val_study(x=x) # plot if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: segment/val_dual.py ================================================ import argparse import json import os import sys from multiprocessing.pool import ThreadPool from pathlib import Path import numpy as np import torch from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import torch.nn.functional as F from models.common import DetectMultiBackend from models.yolo import SegmentationModel from utils.callbacks import Callbacks from utils.general import (LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_boxes, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, box_iou from utils.plots import output_to_target, plot_val_study from utils.segment.dataloaders import create_dataloader from utils.segment.general import mask_iou, process_mask, process_mask_upsample, scale_image from utils.segment.metrics import Metrics, ap_per_class_box_and_mask from utils.segment.plots import plot_images_and_masks from utils.torch_utils import de_parallel, select_device, smart_inference_mode def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') def save_one_json(predn, jdict, path, class_map, pred_masks): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} from pycocotools.mask import encode def single_encode(x): rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0] rle["counts"] = rle["counts"].decode("utf-8") return rle image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner pred_masks = np.transpose(pred_masks, (2, 0, 1)) with ThreadPool(NUM_THREADS) as pool: rles = pool.map(single_encode, pred_masks) for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())): jdict.append({ 'image_id': image_id, 'category_id': class_map[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5), 'segmentation': rles[i]}) def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False): """ Return correct prediction matrix Arguments: detections (array[N, 6]), x1, y1, x2, y2, conf, class labels (array[M, 5]), class, x1, y1, x2, y2 Returns: correct (array[N, 10]), for 10 IoU levels """ if masks: if overlap: nl = len(labels) index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1 gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640) gt_masks = torch.where(gt_masks == index, 1.0, 0.0) if gt_masks.shape[1:] != pred_masks.shape[1:]: gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0] gt_masks = gt_masks.gt_(0.5) iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)) else: # boxes iou = box_iou(labels[:, 1:], detections[:, :4]) correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(iouv)): x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.6, # NMS IoU threshold max_det=300, # maximum detections per image task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file project=ROOT / 'runs/val-seg', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference model=None, dataloader=None, save_dir=Path(''), plots=True, overlap=False, mask_downsample_ratio=1, compute_loss=None, callbacks=Callbacks(), ): if save_json: check_requirements(['pycocotools']) process = process_mask_upsample # more accurate else: process = process_mask # faster # Initialize/load model and set device training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() nm = de_parallel(model).model[-1].nm # number of masks else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32 # number of masks if engine: batch_size = model.batch_size else: device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') # Data data = check_dataset(data) # check # Configure model.eval() cuda = device.type != 'cpu' #is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data ncm = model.model.nc assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad, rect = (0.0, False) if task == 'speed' else (0.5, pt) # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=rect, workers=workers, prefix=colorstr(f'{task}: '), overlap_mask=overlap, mask_downsample_ratio=mask_downsample_ratio)[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = model.names if hasattr(model, 'names') else model.module.names # get class names if isinstance(names, (list, tuple)): # old format names = dict(enumerate(names)) class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', "R", "mAP50", "mAP50-95)", "Mask(P", "R", "mAP50", "mAP50-95)") dt = Profile(), Profile(), Profile() metrics = Metrics() loss = torch.zeros(4, device=device) jdict, stats = [], [] # callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar): # callbacks.run('on_val_batch_start') with dt[0]: if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) masks = masks.to(device) masks = masks.float() im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width # Inference with dt[1]: preds, train_out = model(im)# if compute_loss else (*model(im, augment=augment)[:2], None) #preds = preds[1] #train_out, preds, protos = p if len(p) == 3 else p[1] #preds = p #train_out = p[1][0] if len(p[1]) == 3 else p[0] protos = train_out[-1] #print(preds.shape) #print(train_out[0].shape) #print(train_out[1].shape) #print(train_out[2].shape) # Loss #if compute_loss: # loss += compute_loss(train_out, targets, masks)[1] # box, obj, cls # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: preds = non_max_suppression(preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det, nm=nm) # Metrics plot_masks = [] # masks for plotting for si, (pred, proto) in enumerate(zip(preds, protos)): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init seen += 1 if npr == 0: if nl: stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0])) if plots: confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) continue # Masks midx = [si] if overlap else targets[:, 0] == si gt_masks = masks[midx] pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:]) # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct_bboxes = process_batch(predn, labelsn, iouv) correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True) if plots: confusion_matrix.process_batch(predn, labelsn) stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0])) # (conf, pcls, tcls) pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8) if plots and batch_i < 3: plot_masks.append(pred_masks[:15].cpu()) # filter top 15 to plot # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') if save_json: pred_masks = scale_image(im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]) save_one_json(predn, jdict, path, class_map, pred_masks) # append to COCO-JSON dictionary # callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: if len(plot_masks): plot_masks = torch.cat(plot_masks, dim=0) plot_images_and_masks(im, targets, masks, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) plot_images_and_masks(im, output_to_target(preds, max_det=15), plot_masks, paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred # callbacks.run('on_val_batch_end') # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names) metrics.update(results) nt = np.bincount(stats[4].astype(int), minlength=nc) # number of targets per class # Print results pf = '%22s' + '%11i' * 2 + '%11.3g' * 8 # print format LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results())) if nt.sum() == 0: LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels') # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(metrics.ap_class_index): LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i))) # Print speeds t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) # callbacks.run('on_val_end') mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results() # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api results = [] for eval in COCOeval(anno, pred, 'bbox'), COCOeval(anno, pred, 'segm'): if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # img ID to evaluate eval.evaluate() eval.accumulate() eval.summarize() results.extend(eval.stats[:2]) # update results (mAP@0.5:0.95, mAP@0.5) map_bbox, map50_bbox, map_mask, map50_mask = results except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco128-seg.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo-seg.pt', help='model path(s)') parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.6, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--verbose', action='store_true', help='report mAP by class') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') parser.add_argument('--project', default=ROOT / 'runs/val-seg', help='save results to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML # opt.save_json |= opt.data.endswith('coco.yaml') opt.save_txt |= opt.save_hybrid print_args(vars(opt)) return opt def main(opt): #check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop')) if opt.task in ('train', 'val', 'test'): # run normally if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 LOGGER.warning(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results') if opt.save_hybrid: LOGGER.warning('WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone') run(**vars(opt)) else: weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] opt.half = torch.cuda.is_available() and opt.device != 'cpu' # FP16 for fastest results if opt.task == 'speed': # speed benchmarks # python val.py --task speed --data coco.yaml --batch 1 --weights yolo.pt... opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False for opt.weights in weights: run(**vars(opt), plots=False) elif opt.task == 'study': # speed vs mAP benchmarks # python val.py --task study --data coco.yaml --iou 0.7 --weights yolo.pt... for opt.weights in weights: f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis for opt.imgsz in x: # img-size LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') r, _, t = run(**vars(opt), plots=False) y.append(r + t) # results and times np.savetxt(f, y, fmt='%10.4g') # save os.system('zip -r study.zip study_*.txt') plot_val_study(x=x) # plot if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: tools/reparameterization.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "id": "4beac401", "metadata": {}, "outputs": [], "source": [ "import torch\n", "from models.yolo import Model" ] }, { "cell_type": "markdown", "id": "d1a8399f", "metadata": {}, "source": [ "## Convert YOLOv9-S" ] }, { "cell_type": "code", "execution_count": null, "id": "c7a40f10", "metadata": {}, "outputs": [], "source": [ "device = torch.device(\"cpu\")\n", "cfg = \"./models/detect/gelan-s.yaml\"\n", "model = Model(cfg, ch=3, nc=80, anchors=3)\n", "#model = model.half()\n", "model = model.to(device)\n", "_ = model.eval()\n", "ckpt = torch.load('./yolov9-s.pt', map_location='cpu')\n", "model.names = ckpt['model'].names\n", "model.nc = ckpt['model'].nc" ] }, { "cell_type": "code", "execution_count": null, "id": "3b046bb2", "metadata": {}, "outputs": [], "source": [ "idx = 0\n", "for k, v in model.state_dict().items():\n", " if \"model.{}.\".format(idx) in k:\n", " if idx < 22:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv2.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv2.\".format(idx), \"model.{}.cv4.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv3.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv3.\".format(idx), \"model.{}.cv5.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.dfl.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.dfl.\".format(idx), \"model.{}.dfl2.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " else:\n", " while True:\n", " idx += 1\n", " if \"model.{}.\".format(idx) in k:\n", " break\n", " if idx < 22:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv2.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv2.\".format(idx), \"model.{}.cv4.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv3.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv3.\".format(idx), \"model.{}.cv5.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.dfl.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.dfl.\".format(idx), \"model.{}.dfl2.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", "_ = model.eval()" ] }, { "cell_type": "code", "execution_count": null, "id": "07eb0cde", "metadata": {}, "outputs": [], "source": [ "m_ckpt = {'model': model.half(),\n", " 'optimizer': None,\n", " 'best_fitness': None,\n", " 'ema': None,\n", " 'updates': None,\n", " 'opt': None,\n", " 'git': None,\n", " 'date': None,\n", " 'epoch': -1}\n", "torch.save(m_ckpt, \"./yolov9-s-converted.pt\")" ] }, { "cell_type": "markdown", "id": "ba87d10f", "metadata": {}, "source": [ "## Convert YOLOv9-M" ] }, { "cell_type": "code", "execution_count": null, "id": "cc41b027", "metadata": {}, "outputs": [], "source": [ "device = torch.device(\"cpu\")\n", "cfg = \"./models/detect/gelan-m.yaml\"\n", "model = Model(cfg, ch=3, nc=80, anchors=3)\n", "#model = model.half()\n", "model = model.to(device)\n", "_ = model.eval()\n", "ckpt = torch.load('./yolov9-m.pt', map_location='cpu')\n", "model.names = ckpt['model'].names\n", "model.nc = ckpt['model'].nc" ] }, { "cell_type": "code", "execution_count": null, "id": "cf7c3978", "metadata": {}, "outputs": [], "source": [ "idx = 0\n", "for k, v in model.state_dict().items():\n", " if \"model.{}.\".format(idx) in k:\n", " if idx < 22:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx+1))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv2.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv2.\".format(idx), \"model.{}.cv4.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv3.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv3.\".format(idx), \"model.{}.cv5.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.dfl.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.dfl.\".format(idx), \"model.{}.dfl2.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " else:\n", " while True:\n", " idx += 1\n", " if \"model.{}.\".format(idx) in k:\n", " break\n", " if idx < 22:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx+1))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv2.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv2.\".format(idx), \"model.{}.cv4.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv3.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv3.\".format(idx), \"model.{}.cv5.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.dfl.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.dfl.\".format(idx), \"model.{}.dfl2.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", "_ = model.eval()" ] }, { "cell_type": "code", "execution_count": null, "id": "00a92a45", "metadata": {}, "outputs": [], "source": [ "m_ckpt = {'model': model.half(),\n", " 'optimizer': None,\n", " 'best_fitness': None,\n", " 'ema': None,\n", " 'updates': None,\n", " 'opt': None,\n", " 'git': None,\n", " 'date': None,\n", " 'epoch': -1}\n", "torch.save(m_ckpt, \"./yolov9-m-converted.pt\")" ] }, { "cell_type": "markdown", "id": "8680f822", "metadata": {}, "source": [ "## Convert YOLOv9-C" ] }, { "cell_type": "code", "execution_count": null, "id": "59f0198d", "metadata": {}, "outputs": [], "source": [ "device = torch.device(\"cpu\")\n", "cfg = \"./models/detect/gelan-c.yaml\"\n", "model = Model(cfg, ch=3, nc=80, anchors=3)\n", "#model = model.half()\n", "model = model.to(device)\n", "_ = model.eval()\n", "ckpt = torch.load('./yolov9-c.pt', map_location='cpu')\n", "model.names = ckpt['model'].names\n", "model.nc = ckpt['model'].nc" ] }, { "cell_type": "code", "execution_count": null, "id": "2de7e1be", "metadata": {}, "outputs": [], "source": [ "idx = 0\n", "for k, v in model.state_dict().items():\n", " if \"model.{}.\".format(idx) in k:\n", " if idx < 22:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx+1))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " elif \"model.{}.cv2.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv2.\".format(idx), \"model.{}.cv4.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " elif \"model.{}.cv3.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv3.\".format(idx), \"model.{}.cv5.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " elif \"model.{}.dfl.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.dfl.\".format(idx), \"model.{}.dfl2.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " else:\n", " while True:\n", " idx += 1\n", " if \"model.{}.\".format(idx) in k:\n", " break\n", " if idx < 22:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx+1))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " elif \"model.{}.cv2.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv2.\".format(idx), \"model.{}.cv4.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " elif \"model.{}.cv3.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv3.\".format(idx), \"model.{}.cv5.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " elif \"model.{}.dfl.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.dfl.\".format(idx), \"model.{}.dfl2.\".format(idx+16))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", "_ = model.eval()" ] }, { "cell_type": "code", "execution_count": null, "id": "960796e3", "metadata": {}, "outputs": [], "source": [ "m_ckpt = {'model': model.half(),\n", " 'optimizer': None,\n", " 'best_fitness': None,\n", " 'ema': None,\n", " 'updates': None,\n", " 'opt': None,\n", " 'git': None,\n", " 'date': None,\n", " 'epoch': -1}\n", "torch.save(m_ckpt, \"./yolov9-c-converted.pt\")" ] }, { "cell_type": "markdown", "id": "47c6e6ae", "metadata": {}, "source": [ "## Convert YOLOv9-E" ] }, { "cell_type": "code", "execution_count": null, "id": "801a1b7c", "metadata": {}, "outputs": [], "source": [ "device = torch.device(\"cpu\")\n", "cfg = \"./models/detect/gelan-e.yaml\"\n", "model = Model(cfg, ch=3, nc=80, anchors=3)\n", "#model = model.half()\n", "model = model.to(device)\n", "_ = model.eval()\n", "ckpt = torch.load('./yolov9-e.pt', map_location='cpu')\n", "model.names = ckpt['model'].names\n", "model.nc = ckpt['model'].nc" ] }, { "cell_type": "code", "execution_count": null, "id": "a2ef4fe6", "metadata": {}, "outputs": [], "source": [ "idx = 0\n", "for k, v in model.state_dict().items():\n", " if \"model.{}.\".format(idx) in k:\n", " if idx < 29:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif idx < 42:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv2.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv2.\".format(idx), \"model.{}.cv4.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv3.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv3.\".format(idx), \"model.{}.cv5.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.dfl.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.dfl.\".format(idx), \"model.{}.dfl2.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " else:\n", " while True:\n", " idx += 1\n", " if \"model.{}.\".format(idx) in k:\n", " break\n", " if idx < 29:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif idx < 42:\n", " kr = k.replace(\"model.{}.\".format(idx), \"model.{}.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv2.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv2.\".format(idx), \"model.{}.cv4.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.cv3.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.cv3.\".format(idx), \"model.{}.cv5.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", " elif \"model.{}.dfl.\".format(idx) in k:\n", " kr = k.replace(\"model.{}.dfl.\".format(idx), \"model.{}.dfl2.\".format(idx+7))\n", " model.state_dict()[k] -= model.state_dict()[k]\n", " model.state_dict()[k] += ckpt['model'].state_dict()[kr]\n", " print(k, \"perfectly matched!!\")\n", "_ = model.eval()" ] }, { "cell_type": "code", "execution_count": null, "id": "27bc1869", "metadata": {}, "outputs": [], "source": [ "m_ckpt = {'model': model.half(),\n", " 'optimizer': None,\n", " 'best_fitness': None,\n", " 'ema': None,\n", " 'updates': None,\n", " 'opt': None,\n", " 'git': None,\n", " 'date': None,\n", " 'epoch': -1}\n", "torch.save(m_ckpt, \"./yolov9-e-converted.pt\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 } ================================================ FILE: train.py ================================================ import argparse import math import os import random import sys import time from copy import deepcopy from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import val as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.downloads import attempt_download, is_url from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_img_size, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, one_flat_cycle, print_args, print_mutation, strip_optimizer, yaml_save) from utils.loggers import Loggers from utils.loggers.comet.comet_utils import check_comet_resume from utils.loss_tal import ComputeLoss from utils.metrics import fitness from utils.plots import plot_evolve from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) GIT_INFO = None def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' last_striped, best_striped = w / 'last_striped.pt', w / 'best_striped.pt' # Hyperparameters if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) hyp['anchor_t'] = 5.0 opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: yaml_save(save_dir / 'hyp.yaml', hyp) yaml_save(save_dir / 'opt.yaml', vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance # Register actions for k in methods(loggers): callbacks.register_action(k, callback=getattr(loggers, k)) # Process custom dataset artifact link data_dict = loggers.remote_dataset if resume: # If resuming runs from remote artifact weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size # Config plots = not evolve and not opt.noplots # create plots cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names #is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset is_coco = isinstance(val_path, str) and val_path.endswith('val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create amp = check_amp(model) # check AMP # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): # v.requires_grad = True # train all layers TODO: uncomment this line as in master # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler if opt.cos_lr: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] elif opt.flat_cos_lr: lf = one_flat_cycle(1, hyp['lrf'], epochs) # flat cosine 1->hyp['lrf'] elif opt.fixed_lr: lf = lambda x: 1.0 else: lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # from utils.plots import plot_lr_scheduler; plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume best_fitness, start_epoch = 0.0, 0 if pretrained: if resume: best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if opt.cache == 'val' else opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, close_mosaic=opt.close_mosaic != 0, quad=opt.quad, prefix=colorstr('train: '), shuffle=True, min_items=opt.min_items) labels = np.concatenate(dataset.labels, 0) mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in {-1, 0}: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5, prefix=colorstr('val: '))[0] if not resume: # if not opt.noautoanchor: # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision callbacks.run('on_pretrain_routine_end', labels, names) # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) #hyp['box'] *= 3 / nl # scale to layers #hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers #hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nb = len(train_loader) # number of batches nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model) # init loss class callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx if epoch == (epochs - opt.close_mosaic): LOGGER.info("Closing dataloader mosaic") dataset.mosaic = False # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(3, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%11s' * 7) % ('Epoch', 'GPU_mem', 'box_loss', 'cls_loss', 'dfl_loss', 'Instances', 'Size')) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%11s' * 2 + '%11.4g' * 5) % (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths, list(mloss)) if callbacks.stop_training: return # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in {-1, 0}: # mAP callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP results, maps, _ = validate.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, half=amp, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'opt': vars(opt), 'git': GIT_INFO, # {remote, branch, commit} if a git repo 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f'epoch{epoch}.pt') del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training broadcast_list = [stop if RANK == 0 else None] dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks if RANK != 0: stop = broadcast_list[0] if stop: break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in {-1, 0}: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): if f is last: strip_optimizer(f, last_striped) # strip optimizers else: strip_optimizer(f, best_striped) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, callbacks=callbacks, compute_loss=compute_loss) # val best model with plots if is_coco: callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) callbacks.run('on_train_end', last, best, epoch, results) torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() # parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='initial weights path') # parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--weights', type=str, default='', help='initial weights path') parser.add_argument('--cfg', type=str, default='yolo.yaml', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=100, help='total training epochs') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noplots', action='store_true', help='save no plot files') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW', 'LION'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--flat-cos-lr', action='store_true', help='flat cosine LR scheduler') parser.add_argument('--fixed-lr', action='store_true', help='fixed LR scheduler') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') parser.add_argument('--min-items', type=int, default=0, help='Experimental') parser.add_argument('--close-mosaic', type=int, default=0, help='Experimental') # Logger arguments parser.add_argument('--entity', default=None, help='Entity') parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='Upload data, "val" option') parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval') parser.add_argument('--artifact_alias', type=str, default='latest', help='Version of dataset artifact to use') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) # Resume (from specified or most recent last.pt) if opt.resume and not check_comet_resume(opt) and not opt.evolve: last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors='ignore') as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location='cpu')['opt'] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = 'is not compatible with YOLO Multi-GPU DDP training' assert not opt.image_weights, f'--image-weights {msg}' assert not opt.evolve, f'--evolve {msg}' assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0), # image mixup (probability) 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) with open(opt.hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 if opt.noautoanchor: del hyp['anchors'], meta['anchors'] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' if opt.bucket: os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists for _ in range(opt.evolve): # generations to evolve if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss') print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolve(evolve_csv) LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' f"Results saved to {colorstr('bold', save_dir)}\n" f'Usage example: $ python train.py --hyp {evolve_yaml}') def run(**kwargs): # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolo.pt') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: train_dual.py ================================================ import argparse import math import os import random import sys import time from copy import deepcopy from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import val_dual as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.downloads import attempt_download, is_url from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_git_info, check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer, yaml_save, one_flat_cycle) from utils.loggers import Loggers from utils.loggers.comet.comet_utils import check_comet_resume from utils.loss_tal_dual import ComputeLoss #from utils.loss_tal_dual import ComputeLossLH as ComputeLoss #from utils.loss_tal_dual import ComputeLossLHCF as ComputeLoss from utils.metrics import fitness from utils.plots import plot_evolve from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) GIT_INFO = None#check_git_info() def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Hyperparameters if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) hyp['anchor_t'] = 5.0 opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: yaml_save(save_dir / 'hyp.yaml', hyp) yaml_save(save_dir / 'opt.yaml', vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance # Register actions for k in methods(loggers): callbacks.register_action(k, callback=getattr(loggers, k)) # Process custom dataset artifact link data_dict = loggers.remote_dataset if resume: # If resuming runs from remote artifact weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size # Config plots = not evolve and not opt.noplots # create plots cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names #is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset is_coco = isinstance(val_path, str) and val_path.endswith('val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create amp = check_amp(model) # check AMP # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): # v.requires_grad = True # train all layers TODO: uncomment this line as in master # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler if opt.cos_lr: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] elif opt.flat_cos_lr: lf = one_flat_cycle(1, hyp['lrf'], epochs) # flat cosine 1->hyp['lrf'] elif opt.fixed_lr: lf = lambda x: 1.0 else: lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear # def lf(x): # saw # return (1 - (x % 30) / 30) * (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # # def lf(x): # triangle start at min # return 2 * abs(x / 30 - math.floor(x / 30 + 1 / 2)) * (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # # def lf(x): # triangle start at max # return 2 * abs(x / 32 + .5 - math.floor(x / 32 + 1)) * (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # from utils.plots import plot_lr_scheduler; plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume best_fitness, start_epoch = 0.0, 0 if pretrained: if resume: best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if opt.cache == 'val' else opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, close_mosaic=opt.close_mosaic != 0, quad=opt.quad, prefix=colorstr('train: '), shuffle=True, min_items=opt.min_items) labels = np.concatenate(dataset.labels, 0) mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in {-1, 0}: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5, prefix=colorstr('val: '))[0] if not resume: # if not opt.noautoanchor: # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision callbacks.run('on_pretrain_routine_end', labels, names) # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) #hyp['box'] *= 3 / nl # scale to layers #hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers #hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nb = len(train_loader) # number of batches nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model) # init loss class callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx if epoch == (epochs - opt.close_mosaic): LOGGER.info("Closing dataloader mosaic") dataset.mosaic = False # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(3, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%11s' * 7) % ('Epoch', 'GPU_mem', 'box_loss', 'cls_loss', 'dfl_loss', 'Instances', 'Size')) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%11s' * 2 + '%11.4g' * 5) % (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths, list(mloss)) if callbacks.stop_training: return # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in {-1, 0}: # mAP callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP results, maps, _ = validate.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, half=amp, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'opt': vars(opt), 'git': GIT_INFO, # {remote, branch, commit} if a git repo 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f'epoch{epoch}.pt') del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training broadcast_list = [stop if RANK == 0 else None] dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks if RANK != 0: stop = broadcast_list[0] if stop: break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in {-1, 0}: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, callbacks=callbacks, compute_loss=compute_loss) # val best model with plots if is_coco: callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) callbacks.run('on_train_end', last, best, epoch, results) torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() # parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='initial weights path') # parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--weights', type=str, default='', help='initial weights path') parser.add_argument('--cfg', type=str, default='yolo.yaml', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-high.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=100, help='total training epochs') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noplots', action='store_true', help='save no plot files') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW', 'LION'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--flat-cos-lr', action='store_true', help='flat cosine LR scheduler') parser.add_argument('--fixed-lr', action='store_true', help='fixed LR scheduler') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') parser.add_argument('--min-items', type=int, default=0, help='Experimental') parser.add_argument('--close-mosaic', type=int, default=0, help='Experimental') # Logger arguments parser.add_argument('--entity', default=None, help='Entity') parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='Upload data, "val" option') parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval') parser.add_argument('--artifact_alias', type=str, default='latest', help='Version of dataset artifact to use') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) #check_git_status() #check_requirements() # Resume (from specified or most recent last.pt) if opt.resume and not check_comet_resume(opt) and not opt.evolve: last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors='ignore') as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location='cpu')['opt'] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = 'is not compatible with YOLO Multi-GPU DDP training' assert not opt.image_weights, f'--image-weights {msg}' assert not opt.evolve, f'--evolve {msg}' assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0), # image mixup (probability) 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) with open(opt.hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 if opt.noautoanchor: del hyp['anchors'], meta['anchors'] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' if opt.bucket: os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists for _ in range(opt.evolve): # generations to evolve if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss') print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolve(evolve_csv) LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' f"Results saved to {colorstr('bold', save_dir)}\n" f'Usage example: $ python train.py --hyp {evolve_yaml}') def run(**kwargs): # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolo.pt') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: train_triple.py ================================================ import argparse import math import os import random import sys import time from copy import deepcopy from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative import val_triple as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.downloads import attempt_download, is_url from utils.general import (LOGGER, TQDM_BAR_FORMAT, check_amp, check_dataset, check_file, check_git_info, check_git_status, check_img_size, check_requirements, check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, print_args, print_mutation, strip_optimizer, yaml_save) from utils.loggers import Loggers from utils.loggers.comet.comet_utils import check_comet_resume from utils.loss_tal_triple import ComputeLoss from utils.metrics import fitness from utils.plots import plot_evolve from utils.torch_utils import (EarlyStopping, ModelEMA, de_parallel, select_device, smart_DDP, smart_optimizer, smart_resume, torch_distributed_zero_first) LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) GIT_INFO = None#check_git_info() def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictionary save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.weights, opt.single_cls, opt.evolve, opt.data, opt.cfg, \ opt.resume, opt.noval, opt.nosave, opt.workers, opt.freeze callbacks.run('on_pretrain_routine_start') # Directories w = save_dir / 'weights' # weights dir (w.parent if evolve else w).mkdir(parents=True, exist_ok=True) # make dir last, best = w / 'last.pt', w / 'best.pt' # Hyperparameters if isinstance(hyp, str): with open(hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict LOGGER.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) hyp['anchor_t'] = 5.0 opt.hyp = hyp.copy() # for saving hyps to checkpoints # Save run settings if not evolve: yaml_save(save_dir / 'hyp.yaml', hyp) yaml_save(save_dir / 'opt.yaml', vars(opt)) # Loggers data_dict = None if RANK in {-1, 0}: loggers = Loggers(save_dir, weights, opt, hyp, LOGGER) # loggers instance # Register actions for k in methods(loggers): callbacks.register_action(k, callback=getattr(loggers, k)) # Process custom dataset artifact link data_dict = loggers.remote_dataset if resume: # If resuming runs from remote artifact weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size # Config plots = not evolve and not opt.noplots # create plots cuda = device.type != 'cpu' init_seeds(opt.seed + 1 + RANK, deterministic=True) with torch_distributed_zero_first(LOCAL_RANK): data_dict = data_dict or check_dataset(data) # check if None train_path, val_path = data_dict['train'], data_dict['val'] nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = {0: 'item'} if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names #is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset is_coco = isinstance(val_path, str) and val_path.endswith('val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(LOCAL_RANK): weights = attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location='cpu') # load checkpoint to CPU to avoid CUDA memory leak model = Model(cfg or ckpt['model'].yaml, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else [] # exclude keys csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(csd, strict=False) # load LOGGER.info(f'Transferred {len(csd)}/{len(model.state_dict())} items from {weights}') # report else: model = Model(cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device) # create amp = check_amp(model) # check AMP # Freeze freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): # v.requires_grad = True # train all layers TODO: uncomment this line as in master # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False # Image size gs = max(int(model.stride.max()), 32) # grid size (max stride) imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size batch_size = check_train_batch_size(model, imgsz, amp) loggers.on_params_update({"batch_size": batch_size}) # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay optimizer = smart_optimizer(model, opt.optimizer, hyp['lr0'], hyp['momentum'], hyp['weight_decay']) # Scheduler if opt.cos_lr: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] else: lf = lambda x: (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear # def lf(x): # saw # return (1 - (x % 30) / 30) * (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # # def lf(x): # triangle start at min # return 2 * abs(x / 30 - math.floor(x / 30 + 1 / 2)) * (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] # # def lf(x): # triangle start at max # return 2 * abs(x / 32 + .5 - math.floor(x / 32 + 1)) * (1 - x / epochs) * (1.0 - hyp['lrf']) + hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # from utils.plots import plot_lr_scheduler; plot_lr_scheduler(optimizer, scheduler, epochs) # EMA ema = ModelEMA(model) if RANK in {-1, 0} else None # Resume best_fitness, start_epoch = 0.0, 0 if pretrained: if resume: best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume) del ckpt, csd # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: LOGGER.warning('WARNING ⚠️ DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.') model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and RANK != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) LOGGER.info('Using SyncBatchNorm()') # Trainloader train_loader, dataset = create_dataloader(train_path, imgsz, batch_size // WORLD_SIZE, gs, single_cls, hyp=hyp, augment=True, cache=None if opt.cache == 'val' else opt.cache, rect=opt.rect, rank=LOCAL_RANK, workers=workers, image_weights=opt.image_weights, close_mosaic=opt.close_mosaic != 0, quad=opt.quad, prefix=colorstr('train: '), shuffle=True, min_items=opt.min_items) labels = np.concatenate(dataset.labels, 0) mlc = int(labels[:, 0].max()) # max label class assert mlc < nc, f'Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}' # Process 0 if RANK in {-1, 0}: val_loader = create_dataloader(val_path, imgsz, batch_size // WORLD_SIZE * 2, gs, single_cls, hyp=hyp, cache=None if noval else opt.cache, rect=True, rank=-1, workers=workers * 2, pad=0.5, prefix=colorstr('val: '))[0] if not resume: # if not opt.noautoanchor: # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # run AutoAnchor model.half().float() # pre-reduce anchor precision callbacks.run('on_pretrain_routine_end', labels, names) # DDP mode if cuda and RANK != -1: model = smart_DDP(model) # Model attributes nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) #hyp['box'] *= 3 / nl # scale to layers #hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers #hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nb = len(train_loader) # number of batches nw = max(round(hyp['warmup_epochs'] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training last_opt_step = -1 maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = torch.cuda.amp.GradScaler(enabled=amp) stopper, stop = EarlyStopping(patience=opt.patience), False compute_loss = ComputeLoss(model) # init loss class callbacks.run('on_train_start') LOGGER.info(f'Image sizes {imgsz} train, {imgsz} val\n' f'Using {train_loader.num_workers * WORLD_SIZE} dataloader workers\n' f"Logging results to {colorstr('bold', save_dir)}\n" f'Starting training for {epochs} epochs...') for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ callbacks.run('on_train_epoch_start') model.train() # Update image weights (optional, single-GPU only) if opt.image_weights: cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx if epoch == (epochs - opt.close_mosaic): LOGGER.info("Closing dataloader mosaic") dataset.mosaic = False # Update mosaic border (optional) # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(3, device=device) # mean losses if RANK != -1: train_loader.sampler.set_epoch(epoch) pbar = enumerate(train_loader) LOGGER.info(('\n' + '%11s' * 7) % ('Epoch', 'GPU_mem', 'box_loss', 'cls_loss', 'dfl_loss', 'Instances', 'Size')) if RANK in {-1, 0}: pbar = tqdm(pbar, total=nb, bar_format=TQDM_BAR_FORMAT) # progress bar optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- callbacks.run('on_train_batch_start') ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # compute_loss.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) imgs = nn.functional.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with torch.cuda.amp.autocast(amp): pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size if RANK != -1: loss *= WORLD_SIZE # gradient averaged between devices in DDP mode if opt.quad: loss *= 4. # Backward scaler.scale(loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= accumulate: scaler.unscale_(optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) last_opt_step = ni # Log if RANK in {-1, 0}: mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) pbar.set_description(('%11s' * 2 + '%11.4g' * 5) % (f'{epoch}/{epochs - 1}', mem, *mloss, targets.shape[0], imgs.shape[-1])) callbacks.run('on_train_batch_end', model, ni, imgs, targets, paths, list(mloss)) if callbacks.stop_training: return # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for loggers scheduler.step() if RANK in {-1, 0}: # mAP callbacks.run('on_train_epoch_end', epoch=epoch) ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == epochs) or stopper.possible_stop if not noval or final_epoch: # Calculate mAP results, maps, _ = validate.run(data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, half=amp, model=ema.ema, single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, plots=False, callbacks=callbacks, compute_loss=compute_loss) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] stop = stopper(epoch=epoch, fitness=fi) # early stop check if fi > best_fitness: best_fitness = fi log_vals = list(mloss) + list(results) + lr callbacks.run('on_fit_epoch_end', log_vals, epoch, best_fitness, fi) # Save model if (not nosave) or (final_epoch and not evolve): # if save ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'model': deepcopy(de_parallel(model)).half(), 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), 'opt': vars(opt), 'git': GIT_INFO, # {remote, branch, commit} if a git repo 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if opt.save_period > 0 and epoch % opt.save_period == 0: torch.save(ckpt, w / f'epoch{epoch}.pt') del ckpt callbacks.run('on_model_save', last, epoch, final_epoch, best_fitness, fi) # EarlyStopping if RANK != -1: # if DDP training broadcast_list = [stop if RANK == 0 else None] dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks if RANK != 0: stop = broadcast_list[0] if stop: break # must break all DDP ranks # end epoch ---------------------------------------------------------------------------------------------------- # end training ----------------------------------------------------------------------------------------------------- if RANK in {-1, 0}: LOGGER.info(f'\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.') for f in last, best: if f.exists(): strip_optimizer(f) # strip optimizers if f is best: LOGGER.info(f'\nValidating {f}...') results, _, _ = validate.run( data_dict, batch_size=batch_size // WORLD_SIZE * 2, imgsz=imgsz, model=attempt_load(f, device).half(), single_cls=single_cls, dataloader=val_loader, save_dir=save_dir, save_json=is_coco, verbose=True, plots=plots, callbacks=callbacks, compute_loss=compute_loss) # val best model with plots if is_coco: callbacks.run('on_fit_epoch_end', list(mloss) + list(results) + lr, epoch, best_fitness, fi) callbacks.run('on_train_end', last, best, epoch, results) torch.cuda.empty_cache() return results def parse_opt(known=False): parser = argparse.ArgumentParser() # parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='initial weights path') # parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--weights', type=str, default='', help='initial weights path') parser.add_argument('--cfg', type=str, default='yolo.yaml', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-high.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=100, help='total training epochs') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noplots', action='store_true', help='save no plot files') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='image --cache ram/disk') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW', 'LION'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') parser.add_argument('--min-items', type=int, default=0, help='Experimental') parser.add_argument('--close-mosaic', type=int, default=0, help='Experimental') # Logger arguments parser.add_argument('--entity', default=None, help='Entity') parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='Upload data, "val" option') parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval') parser.add_argument('--artifact_alias', type=str, default='latest', help='Version of dataset artifact to use') return parser.parse_known_args()[0] if known else parser.parse_args() def main(opt, callbacks=Callbacks()): # Checks if RANK in {-1, 0}: print_args(vars(opt)) #check_git_status() #check_requirements() # Resume (from specified or most recent last.pt) if opt.resume and not check_comet_resume(opt) and not opt.evolve: last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml opt_data = opt.data # original dataset if opt_yaml.is_file(): with open(opt_yaml, errors='ignore') as f: d = yaml.safe_load(f) else: d = torch.load(last, map_location='cpu')['opt'] opt = argparse.Namespace(**d) # replace opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate if is_url(opt_data): opt.data = check_file(opt_data) # avoid HUB resume auth timeout else: opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' if opt.evolve: if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume if opt.name == 'cfg': opt.name = Path(opt.cfg).stem # use model.yaml as name opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) if LOCAL_RANK != -1: msg = 'is not compatible with YOLO Multi-GPU DDP training' assert not opt.image_weights, f'--image-weights {msg}' assert not opt.evolve, f'--evolve {msg}' assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' torch.cuda.set_device(LOCAL_RANK) device = torch.device('cuda', LOCAL_RANK) dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") # Train if not opt.evolve: train(opt.hyp, opt, device, callbacks) # Evolve hyperparameters (optional) else: # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) meta = { 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay 'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok) 'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum 'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr 'box': (1, 0.02, 0.2), # box loss gain 'cls': (1, 0.2, 4.0), # cls loss gain 'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight 'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels) 'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight 'iou_t': (0, 0.1, 0.7), # IoU training threshold 'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore) 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5) 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction) 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction) 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction) 'degrees': (1, 0.0, 45.0), # image rotation (+/- deg) 'translate': (1, 0.0, 0.9), # image translation (+/- fraction) 'scale': (1, 0.0, 0.9), # image scale (+/- gain) 'shear': (1, 0.0, 10.0), # image shear (+/- deg) 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001 'flipud': (1, 0.0, 1.0), # image flip up-down (probability) 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability) 'mosaic': (1, 0.0, 1.0), # image mixup (probability) 'mixup': (1, 0.0, 1.0), # image mixup (probability) 'copy_paste': (1, 0.0, 1.0)} # segment copy-paste (probability) with open(opt.hyp, errors='ignore') as f: hyp = yaml.safe_load(f) # load hyps dict if 'anchors' not in hyp: # anchors commented in hyp.yaml hyp['anchors'] = 3 if opt.noautoanchor: del hyp['anchors'], meta['anchors'] opt.noval, opt.nosave, save_dir = True, True, Path(opt.save_dir) # only val/save final epoch # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices evolve_yaml, evolve_csv = save_dir / 'hyp_evolve.yaml', save_dir / 'evolve.csv' if opt.bucket: os.system(f'gsutil cp gs://{opt.bucket}/evolve.csv {evolve_csv}') # download evolve.csv if exists for _ in range(opt.evolve): # generations to evolve if evolve_csv.exists(): # if evolve.csv exists: select best hyps and mutate # Select parent(s) parent = 'single' # parent selection method: 'single' or 'weighted' x = np.loadtxt(evolve_csv, ndmin=2, delimiter=',', skiprows=1) n = min(5, len(x)) # number of previous results to consider x = x[np.argsort(-fitness(x))][:n] # top n mutations w = fitness(x) - fitness(x).min() + 1E-6 # weights (sum > 0) if parent == 'single' or len(x) == 1: # x = x[random.randint(0, n - 1)] # random selection x = x[random.choices(range(n), weights=w)[0]] # weighted selection elif parent == 'weighted': x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination # Mutate mp, s = 0.8, 0.2 # mutation probability, sigma npr = np.random npr.seed(int(time.time())) g = np.array([meta[k][0] for k in hyp.keys()]) # gains 0-1 ng = len(meta) v = np.ones(ng) while all(v == 1): # mutate until a change occurs (prevent duplicates) v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0) for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300) hyp[k] = float(x[i + 7] * v[i]) # mutate # Constrain to limits for k, v in meta.items(): hyp[k] = max(hyp[k], v[1]) # lower limit hyp[k] = min(hyp[k], v[2]) # upper limit hyp[k] = round(hyp[k], 5) # significant digits # Train mutation results = train(hyp.copy(), opt, device, callbacks) callbacks = Callbacks() # Write mutation results keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss') print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket) # Plot results plot_evolve(evolve_csv) LOGGER.info(f'Hyperparameter evolution finished {opt.evolve} generations\n' f"Results saved to {colorstr('bold', save_dir)}\n" f'Usage example: $ python train.py --hyp {evolve_yaml}') def run(**kwargs): # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolo.pt') opt = parse_opt(True) for k, v in kwargs.items(): setattr(opt, k, v) main(opt) return opt if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: utils/__init__.py ================================================ import contextlib import platform import threading def emojis(str=''): # Return platform-dependent emoji-safe version of string return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str class TryExcept(contextlib.ContextDecorator): # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager def __init__(self, msg=''): self.msg = msg def __enter__(self): pass def __exit__(self, exc_type, value, traceback): if value: print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}")) return True def threaded(func): # Multi-threads a target function and returns thread. Usage: @threaded decorator def wrapper(*args, **kwargs): thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) thread.start() return thread return wrapper def join_threads(verbose=False): # Join all daemon threads, i.e. atexit.register(lambda: join_threads()) main_thread = threading.current_thread() for t in threading.enumerate(): if t is not main_thread: if verbose: print(f'Joining thread {t.name}') t.join() def notebook_init(verbose=True): # Check system software and hardware print('Checking setup...') import os import shutil from utils.general import check_font, check_requirements, is_colab from utils.torch_utils import select_device # imports check_font() import psutil from IPython import display # to display images and clear console output if is_colab(): shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory # System info if verbose: gb = 1 << 30 # bytes to GiB (1024 ** 3) ram = psutil.virtual_memory().total total, used, free = shutil.disk_usage("/") display.clear_output() s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)' else: s = '' select_device(newline=False) print(emojis(f'Setup complete ✅ {s}')) return display ================================================ FILE: utils/activations.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class SiLU(nn.Module): # SiLU activation https://arxiv.org/pdf/1606.08415.pdf @staticmethod def forward(x): return x * torch.sigmoid(x) class Hardswish(nn.Module): # Hard-SiLU activation @staticmethod def forward(x): # return x * F.hardsigmoid(x) # for TorchScript and CoreML return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX class Mish(nn.Module): # Mish activation https://github.com/digantamisra98/Mish @staticmethod def forward(x): return x * F.softplus(x).tanh() class MemoryEfficientMish(nn.Module): # Mish activation memory-efficient class F(torch.autograd.Function): @staticmethod def forward(ctx, x): ctx.save_for_backward(x) return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) @staticmethod def backward(ctx, grad_output): x = ctx.saved_tensors[0] sx = torch.sigmoid(x) fx = F.softplus(x).tanh() return grad_output * (fx + x * sx * (1 - fx * fx)) def forward(self, x): return self.F.apply(x) class FReLU(nn.Module): # FReLU activation https://arxiv.org/abs/2007.11824 def __init__(self, c1, k=3): # ch_in, kernel super().__init__() self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) self.bn = nn.BatchNorm2d(c1) def forward(self, x): return torch.max(x, self.bn(self.conv(x))) class AconC(nn.Module): r""" ACON activation (activate or not) AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter according to "Activate or Not: Learning Customized Activation" . """ def __init__(self, c1): super().__init__() self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) self.beta = nn.Parameter(torch.ones(1, c1, 1, 1)) def forward(self, x): dpx = (self.p1 - self.p2) * x return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x class MetaAconC(nn.Module): r""" ACON activation (activate or not) MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network according to "Activate or Not: Learning Customized Activation" . """ def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r super().__init__() c2 = max(r, c1 // r) self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True) self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True) # self.bn1 = nn.BatchNorm2d(c2) # self.bn2 = nn.BatchNorm2d(c1) def forward(self, x): y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True) # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891 # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed dpx = (self.p1 - self.p2) * x return dpx * torch.sigmoid(beta * dpx) + self.p2 * x ================================================ FILE: utils/augmentations.py ================================================ import math import random import cv2 import numpy as np import torch import torchvision.transforms as T import torchvision.transforms.functional as TF from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy from utils.metrics import bbox_ioa IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation class Albumentations: # YOLOv5 Albumentations class (optional, only used if package is installed) def __init__(self, size=640): self.transform = None prefix = colorstr('albumentations: ') try: import albumentations as A check_version(A.__version__, '1.0.3', hard=True) # version requirement T = [ A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0), A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), A.CLAHE(p=0.01), A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), A.ImageCompression(quality_lower=75, p=0.0)] # transforms self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) except ImportError: # package not installed, skip pass except Exception as e: LOGGER.info(f'{prefix}{e}') def __call__(self, im, labels, p=1.0): if self.transform and random.random() < p: new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])]) return im, labels def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False): # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std return TF.normalize(x, mean, std, inplace=inplace) def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD): # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean for i in range(3): x[:, i] = x[:, i] * std[i] + mean[i] return x def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): # HSV color-space augmentation if hgain or sgain or vgain: r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV)) dtype = im.dtype # uint8 x = np.arange(0, 256, dtype=r.dtype) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed def hist_equalize(im, clahe=True, bgr=False): # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255 yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV) if clahe: c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) yuv[:, :, 0] = c.apply(yuv[:, :, 0]) else: yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB def replicate(im, labels): # Replicate labels h, w = im.shape[:2] boxes = labels[:, 1:].astype(int) x1, y1, x2, y2 = boxes.T s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices x1b, y1b, x2b, y2b = boxes[i] bh, bw = y2b - y1b, x2b - x1b yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax] labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) return im, labels def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): # Resize and pad image while meeting stride-multiple constraints shape = im.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scaleup: # only scale down, do not scale up (for better val mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimum rectangle dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im, ratio, (dw, dh) def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] height = im.shape[0] + border[0] * 2 # shape(h,w,c) width = im.shape[1] + border[1] * 2 # Center C = np.eye(3) C[0, 2] = -im.shape[1] / 2 # x translation (pixels) C[1, 2] = -im.shape[0] / 2 # y translation (pixels) # Perspective P = np.eye(3) P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) # Rotation and Scale R = np.eye(3) a = random.uniform(-degrees, degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - scale, 1 + scale) # s = 2 ** random.uniform(-scale, scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) # Shear S = np.eye(3) S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) # Translation T = np.eye(3) T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) # Combined rotation matrix M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed if perspective: im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) else: # affine im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) # Visualize # import matplotlib.pyplot as plt # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() # ax[0].imshow(im[:, :, ::-1]) # base # ax[1].imshow(im2[:, :, ::-1]) # warped # Transform label coordinates n = len(targets) if n: use_segments = any(x.any() for x in segments) new = np.zeros((n, 4)) if use_segments: # warp segments segments = resample_segments(segments) # upsample for i, segment in enumerate(segments): xy = np.ones((len(segment), 3)) xy[:, :2] = segment xy = xy @ M.T # transform xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine # clip new[i] = segment2box(xy, width, height) else: # warp boxes xy = np.ones((n * 4, 3)) xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 xy = xy @ M.T # transform xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine # create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T # clip new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) # filter candidates i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) targets = targets[i] targets[:, 1:5] = new[i] return im, targets def copy_paste(im, labels, segments, p=0.5): # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) n = len(segments) if p and n: h, w, c = im.shape # height, width, channels im_new = np.zeros(im.shape, np.uint8) # calculate ioa first then select indexes randomly boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) n = len(indexes) for j in random.sample(list(indexes), k=round(p * n)): l, box, s = labels[j], boxes[j], segments[j] labels = np.concatenate((labels, [[l[0], *box]]), 0) segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED) result = cv2.flip(im, 1) # augment segments (flip left-right) i = cv2.flip(im_new, 1).astype(bool) im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug return im, labels, segments def cutout(im, labels, p=0.5): # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 if random.random() < p: h, w = im.shape[:2] scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction for s in scales: mask_h = random.randint(1, int(h * s)) # create random masks mask_w = random.randint(1, int(w * s)) # box xmin = max(0, random.randint(0, w) - mask_w // 2) ymin = max(0, random.randint(0, h) - mask_h // 2) xmax = min(w, xmin + mask_w) ymax = min(h, ymin + mask_h) # apply random color mask im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] # return unobscured labels if len(labels) and s > 0.03: box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32) ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area labels = labels[ioa < 0.60] # remove >60% obscured labels return labels def mixup(im, labels, im2, labels2): # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 im = (im * r + im2 * (1 - r)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) return im, labels def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates def classify_albumentations( augment=True, size=224, scale=(0.08, 1.0), ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33 hflip=0.5, vflip=0.0, jitter=0.4, mean=IMAGENET_MEAN, std=IMAGENET_STD, auto_aug=False): # YOLOv5 classification Albumentations (optional, only used if package is installed) prefix = colorstr('albumentations: ') try: import albumentations as A from albumentations.pytorch import ToTensorV2 check_version(A.__version__, '1.0.3', hard=True) # version requirement if augment: # Resize and crop T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)] if auto_aug: # TODO: implement AugMix, AutoAug & RandAug in albumentation LOGGER.info(f'{prefix}auto augmentations are currently not supported') else: if hflip > 0: T += [A.HorizontalFlip(p=hflip)] if vflip > 0: T += [A.VerticalFlip(p=vflip)] if jitter > 0: color_jitter = (float(jitter),) * 3 # repeat value for brightness, contrast, satuaration, 0 hue T += [A.ColorJitter(*color_jitter, 0)] else: # Use fixed crop for eval set (reproducibility) T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) return A.Compose(T) except ImportError: # package not installed, skip LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)') except Exception as e: LOGGER.info(f'{prefix}{e}') def classify_transforms(size=224): # Transforms to apply if albumentations not installed assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)' # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) class LetterBox: # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) def __init__(self, size=(640, 640), auto=False, stride=32): super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size self.auto = auto # pass max size integer, automatically solve for short side using stride self.stride = stride # used with auto def __call__(self, im): # im = np.array HWC imh, imw = im.shape[:2] r = min(self.h / imh, self.w / imw) # ratio of new/old h, w = round(imh * r), round(imw * r) # resized image hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype) im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) return im_out class CenterCrop: # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) def __init__(self, size=640): super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size def __call__(self, im): # im = np.array HWC imh, imw = im.shape[:2] m = min(imh, imw) # min dimension top, left = (imh - m) // 2, (imw - m) // 2 return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) class ToTensor: # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) def __init__(self, half=False): super().__init__() self.half = half def __call__(self, im): # im = np.array HWC in BGR order im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous im = torch.from_numpy(im) # to torch im = im.half() if self.half else im.float() # uint8 to fp16/32 im /= 255.0 # 0-255 to 0.0-1.0 return im ================================================ FILE: utils/autoanchor.py ================================================ import random import numpy as np import torch import yaml from tqdm import tqdm from utils import TryExcept from utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr PREFIX = colorstr('AutoAnchor: ') def check_anchor_order(m): # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer da = a[-1] - a[0] # delta a ds = m.stride[-1] - m.stride[0] # delta s if da and (da.sign() != ds.sign()): # same order LOGGER.info(f'{PREFIX}Reversing anchor order') m.anchors[:] = m.anchors.flip(0) @TryExcept(f'{PREFIX}ERROR') def check_anchors(dataset, model, thr=4.0, imgsz=640): # Check anchor fit to data, recompute if necessary m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh def metric(k): # compute metric r = wh[:, None] / k[None] x = torch.min(r, 1 / r).min(2)[0] # ratio metric best = x.max(1)[0] # best_x aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold bpr = (best > 1 / thr).float().mean() # best possible recall return bpr, aat stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides anchors = m.anchors.clone() * stride # current anchors bpr, aat = metric(anchors.cpu().view(-1, 2)) s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). ' if bpr > 0.98: # threshold to recompute LOGGER.info(f'{s}Current anchors are a good fit to dataset ✅') else: LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...') na = m.anchors.numel() // 2 # number of anchors anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) new_bpr = metric(anchors)[0] if new_bpr > bpr: # replace anchors anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) m.anchors[:] = anchors.clone().view_as(m.anchors) check_anchor_order(m) # must be in pixel-space (not grid-space) m.anchors /= stride s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)' else: s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)' LOGGER.info(s) def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): """ Creates kmeans-evolved anchors from training dataset Arguments: dataset: path to data.yaml, or a loaded dataset n: number of anchors img_size: image size used for training thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 gen: generations to evolve anchors using genetic algorithm verbose: print all results Return: k: kmeans evolved anchors Usage: from utils.autoanchor import *; _ = kmean_anchors() """ from scipy.cluster.vq import kmeans npr = np.random thr = 1 / thr def metric(k, wh): # compute metrics r = wh[:, None] / k[None] x = torch.min(r, 1 / r).min(2)[0] # ratio metric # x = wh_iou(wh, torch.tensor(k)) # iou metric return x, x.max(1)[0] # x, best_x def anchor_fitness(k): # mutation fitness _, best = metric(torch.tensor(k, dtype=torch.float32), wh) return (best * (best > thr).float()).mean() # fitness def print_results(k, verbose=True): k = k[np.argsort(k.prod(1))] # sort small to large x, best = metric(k, wh0) bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \ f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \ f'past_thr={x[x > thr].mean():.3f}-mean: ' for x in k: s += '%i,%i, ' % (round(x[0]), round(x[1])) if verbose: LOGGER.info(s[:-2]) return k if isinstance(dataset, str): # *.yaml file with open(dataset, errors='ignore') as f: data_dict = yaml.safe_load(f) # model dict from utils.dataloaders import LoadImagesAndLabels dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) # Get label wh shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh # Filter i = (wh0 < 3.0).any(1).sum() if i: LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size') wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 # Kmeans init try: LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...') assert n <= len(wh) # apply overdetermined constraint s = wh.std(0) # sigmas for whitening k = kmeans(wh / s, n, iter=30)[0] * s # points assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar except Exception: LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init') k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0)) k = print_results(k, verbose=False) # Plot # k, d = [None] * 20, [None] * 20 # for i in tqdm(range(1, 21)): # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) # ax = ax.ravel() # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh # ax[0].hist(wh[wh[:, 0]<100, 0],400) # ax[1].hist(wh[wh[:, 1]<100, 1],400) # fig.savefig('wh.png', dpi=200) # Evolve f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT) # progress bar for _ in pbar: v = np.ones(sh) while (v == 1).all(): # mutate until a change occurs (prevent duplicates) v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) kg = (k.copy() * v).clip(min=2.0) fg = anchor_fitness(kg) if fg > f: f, k = fg, kg.copy() pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' if verbose: print_results(k, verbose) return print_results(k).astype(np.float32) ================================================ FILE: utils/autobatch.py ================================================ from copy import deepcopy import numpy as np import torch from utils.general import LOGGER, colorstr from utils.torch_utils import profile def check_train_batch_size(model, imgsz=640, amp=True): # Check YOLOv5 training batch size with torch.cuda.amp.autocast(amp): return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size def autobatch(model, imgsz=640, fraction=0.8, batch_size=16): # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory # Usage: # import torch # from utils.autobatch import autobatch # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) # print(autobatch(model)) # Check device prefix = colorstr('AutoBatch: ') LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') device = next(model.parameters()).device # get model device if device.type == 'cpu': LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') return batch_size if torch.backends.cudnn.benchmark: LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}') return batch_size # Inspect CUDA memory gb = 1 << 30 # bytes to GiB (1024 ** 3) d = str(device).upper() # 'CUDA:0' properties = torch.cuda.get_device_properties(device) # device properties t = properties.total_memory / gb # GiB total r = torch.cuda.memory_reserved(device) / gb # GiB reserved a = torch.cuda.memory_allocated(device) / gb # GiB allocated f = t - (r + a) # GiB free LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') # Profile batch sizes batch_sizes = [1, 2, 4, 8, 16] try: img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] results = profile(img, model, n=3, device=device) except Exception as e: LOGGER.warning(f'{prefix}{e}') # Fit a solution y = [x[2] for x in results if x] # memory [2] p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) if None in results: # some sizes failed i = results.index(None) # first fail index if b >= batch_sizes[i]: # y intercept above failure point b = batch_sizes[max(i - 1, 0)] # select prior safe point if b < 1 or b > 1024: # b outside of safe range b = batch_size LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.') fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') return b ================================================ FILE: utils/callbacks.py ================================================ import threading class Callbacks: """" Handles all registered callbacks for YOLOv5 Hooks """ def __init__(self): # Define the available callbacks self._callbacks = { 'on_pretrain_routine_start': [], 'on_pretrain_routine_end': [], 'on_train_start': [], 'on_train_epoch_start': [], 'on_train_batch_start': [], 'optimizer_step': [], 'on_before_zero_grad': [], 'on_train_batch_end': [], 'on_train_epoch_end': [], 'on_val_start': [], 'on_val_batch_start': [], 'on_val_image_end': [], 'on_val_batch_end': [], 'on_val_end': [], 'on_fit_epoch_end': [], # fit = train + val 'on_model_save': [], 'on_train_end': [], 'on_params_update': [], 'teardown': [],} self.stop_training = False # set True to interrupt training def register_action(self, hook, name='', callback=None): """ Register a new action to a callback hook Args: hook: The callback hook name to register the action to name: The name of the action for later reference callback: The callback to fire """ assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" assert callable(callback), f"callback '{callback}' is not callable" self._callbacks[hook].append({'name': name, 'callback': callback}) def get_registered_actions(self, hook=None): """" Returns all the registered actions by callback hook Args: hook: The name of the hook to check, defaults to all """ return self._callbacks[hook] if hook else self._callbacks def run(self, hook, *args, thread=False, **kwargs): """ Loop through the registered actions and fire all callbacks on main thread Args: hook: The name of the hook to check, defaults to all args: Arguments to receive from YOLOv5 thread: (boolean) Run callbacks in daemon thread kwargs: Keyword Arguments to receive from YOLOv5 """ assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" for logger in self._callbacks[hook]: if thread: threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start() else: logger['callback'](*args, **kwargs) ================================================ FILE: utils/coco_utils.py ================================================ import cv2 from pycocotools.coco import COCO from pycocotools import mask as maskUtils # coco id: https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ all_instances_ids = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, ] all_stuff_ids = [ 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, # other 183, # unlabeled 0, ] # panoptic id: https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json panoptic_stuff_ids = [ 92, 93, 95, 100, 107, 109, 112, 118, 119, 122, 125, 128, 130, 133, 138, 141, 144, 145, 147, 148, 149, 151, 154, 155, 156, 159, 161, 166, 168, 171, 175, 176, 177, 178, 180, 181, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, # unlabeled 0, ] def getCocoIds(name = 'semantic'): if 'instances' == name: return all_instances_ids elif 'stuff' == name: return all_stuff_ids elif 'panoptic' == name: return all_instances_ids + panoptic_stuff_ids else: # semantic return all_instances_ids + all_stuff_ids def getMappingId(index, name = 'semantic'): ids = getCocoIds(name = name) return ids[index] def getMappingIndex(id, name = 'semantic'): ids = getCocoIds(name = name) return ids.index(id) # convert ann to rle encoded string def annToRLE(ann, img_size): h, w = img_size segm = ann['segmentation'] if list == type(segm): # polygon -- a single object might consist of multiple parts # we merge all parts into one mask rle code rles = maskUtils.frPyObjects(segm, h, w) rle = maskUtils.merge(rles) elif list == type(segm['counts']): # uncompressed RLE rle = maskUtils.frPyObjects(segm, h, w) else: # rle rle = ann['segmentation'] return rle # decode ann to mask martix def annToMask(ann, img_size): rle = annToRLE(ann, img_size) m = maskUtils.decode(rle) return m # convert mask to polygans def convert_to_polys(mask): # opencv 3.2 contours, hierarchy = cv2.findContours((mask).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # before opencv 3.2 # contours, hierarchy = cv2.findContours((mask).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) segmentation = [] for contour in contours: contour = contour.flatten().tolist() if 4 < len(contour): segmentation.append(contour) return segmentation ================================================ FILE: utils/dataloaders.py ================================================ import contextlib import glob import hashlib import json import math import os import random import shutil import time from itertools import repeat from multiprocessing.pool import Pool, ThreadPool from pathlib import Path from threading import Thread from urllib.parse import urlparse import numpy as np import psutil import torch import torch.nn.functional as F import torchvision import yaml from PIL import ExifTags, Image, ImageOps from torch.utils.data import DataLoader, Dataset, dataloader, distributed from tqdm import tqdm from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, letterbox, mixup, random_perspective) from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, check_dataset, check_requirements, check_yaml, clean_str, cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) from utils.torch_utils import torch_distributed_zero_first # Parameters HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # include image suffixes VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders # Get orientation exif tag for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == 'Orientation': break def get_hash(paths): # Returns a single hash value of a list of paths (files or dirs) size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes h = hashlib.md5(str(size).encode()) # hash sizes h.update(''.join(paths).encode()) # hash paths return h.hexdigest() # return hash def exif_size(img): # Returns exif-corrected PIL size s = img.size # (width, height) with contextlib.suppress(Exception): rotation = dict(img._getexif().items())[orientation] if rotation in [6, 8]: # rotation 270 or 90 s = (s[1], s[0]) return s def exif_transpose(image): """ Transpose a PIL image accordingly if it has an EXIF Orientation tag. Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose() :param image: The image to transpose. :return: An image. """ exif = image.getexif() orientation = exif.get(0x0112, 1) # default 1 if orientation > 1: method = { 2: Image.FLIP_LEFT_RIGHT, 3: Image.ROTATE_180, 4: Image.FLIP_TOP_BOTTOM, 5: Image.TRANSPOSE, 6: Image.ROTATE_270, 7: Image.TRANSVERSE, 8: Image.ROTATE_90}.get(orientation) if method is not None: image = image.transpose(method) del exif[0x0112] image.info["exif"] = exif.tobytes() return image def seed_worker(worker_id): # Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader worker_seed = torch.initial_seed() % 2 ** 32 np.random.seed(worker_seed) random.seed(worker_seed) def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, rect=False, rank=-1, workers=8, image_weights=False, close_mosaic=False, quad=False, min_items=0, prefix='', shuffle=False): if rect and shuffle: LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') shuffle = False with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = LoadImagesAndLabels( path, imgsz, batch_size, augment=augment, # augmentation hyp=hyp, # hyperparameters rect=rect, # rectangular batches cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights, min_items=min_items, prefix=prefix) batch_size = min(batch_size, len(dataset)) nd = torch.cuda.device_count() # number of CUDA devices nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) #loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader generator = torch.Generator() generator.manual_seed(6148914691236517205 + RANK) return loader(dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, pin_memory=PIN_MEMORY, collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, worker_init_fn=seed_worker, generator=generator), dataset class InfiniteDataLoader(dataloader.DataLoader): """ Dataloader that reuses workers Uses same syntax as vanilla DataLoader """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) self.iterator = super().__iter__() def __len__(self): return len(self.batch_sampler.sampler) def __iter__(self): for _ in range(len(self)): yield next(self.iterator) class _RepeatSampler: """ Sampler that repeats forever Args: sampler (Sampler) """ def __init__(self, sampler): self.sampler = sampler def __iter__(self): while True: yield from iter(self.sampler) class LoadScreenshots: # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"` def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None): # source = [screen_number left top width height] (pixels) check_requirements('mss') import mss source, *params = source.split() self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0 if len(params) == 1: self.screen = int(params[0]) elif len(params) == 4: left, top, width, height = (int(x) for x in params) elif len(params) == 5: self.screen, left, top, width, height = (int(x) for x in params) self.img_size = img_size self.stride = stride self.transforms = transforms self.auto = auto self.mode = 'stream' self.frame = 0 self.sct = mss.mss() # Parse monitor shape monitor = self.sct.monitors[self.screen] self.top = monitor["top"] if top is None else (monitor["top"] + top) self.left = monitor["left"] if left is None else (monitor["left"] + left) self.width = width or monitor["width"] self.height = height or monitor["height"] self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height} def __iter__(self): return self def __next__(self): # mss screen capture: get raw pixels from the screen as np array im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: " if self.transforms: im = self.transforms(im0) # transforms else: im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB im = np.ascontiguousarray(im) # contiguous self.frame += 1 return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s class LoadImages: # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): files = [] for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: p = str(Path(p).resolve()) if '*' in p: files.extend(sorted(glob.glob(p, recursive=True))) # glob elif os.path.isdir(p): files.extend(sorted(glob.glob(os.path.join(p, '*.*')))) # dir elif os.path.isfile(p): files.append(p) # files else: raise FileNotFoundError(f'{p} does not exist') images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] ni, nv = len(images), len(videos) self.img_size = img_size self.stride = stride self.files = images + videos self.nf = ni + nv # number of files self.video_flag = [False] * ni + [True] * nv self.mode = 'image' self.auto = auto self.transforms = transforms # optional self.vid_stride = vid_stride # video frame-rate stride if any(videos): self._new_video(videos[0]) # new video else: self.cap = None assert self.nf > 0, f'No images or videos found in {p}. ' \ f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}' def __iter__(self): self.count = 0 return self def __next__(self): if self.count == self.nf: raise StopIteration path = self.files[self.count] if self.video_flag[self.count]: # Read video self.mode = 'video' for _ in range(self.vid_stride): self.cap.grab() ret_val, im0 = self.cap.retrieve() while not ret_val: self.count += 1 self.cap.release() if self.count == self.nf: # last video raise StopIteration path = self.files[self.count] self._new_video(path) ret_val, im0 = self.cap.read() self.frame += 1 # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' else: # Read image self.count += 1 im0 = cv2.imread(path) # BGR assert im0 is not None, f'Image Not Found {path}' s = f'image {self.count}/{self.nf} {path}: ' if self.transforms: im = self.transforms(im0) # transforms else: im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB im = np.ascontiguousarray(im) # contiguous return path, im, im0, self.cap, s def _new_video(self, path): # Create a new video capture object self.frame = 0 self.cap = cv2.VideoCapture(path) self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 def _cv2_rotate(self, im): # Rotate a cv2 video manually if self.orientation == 0: return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) elif self.orientation == 180: return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) elif self.orientation == 90: return cv2.rotate(im, cv2.ROTATE_180) return im def __len__(self): return self.nf # number of files class LoadStreams: # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams` def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): torch.backends.cudnn.benchmark = True # faster for fixed-size inference self.mode = 'stream' self.img_size = img_size self.stride = stride self.vid_stride = vid_stride # video frame-rate stride sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources] n = len(sources) self.sources = [clean_str(x) for x in sources] # clean source names for later self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n for i, s in enumerate(sources): # index, source # Start thread to read frames from video stream st = f'{i + 1}/{n}: {s}... ' if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/Zgi9g1ksQHc' check_requirements(('pafy', 'youtube_dl==2020.12.2')) import pafy s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam if s == 0: assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.' assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.' cap = cv2.VideoCapture(s) assert cap.isOpened(), f'{st}Failed to open {s}' w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback _, self.imgs[i] = cap.read() # guarantee first frame self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)") self.threads[i].start() LOGGER.info('') # newline # check for common shapes s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs]) self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal self.auto = auto and self.rect self.transforms = transforms # optional if not self.rect: LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.') def update(self, i, cap, stream): # Read stream `i` frames in daemon thread n, f = 0, self.frames[i] # frame number, frame array while cap.isOpened() and n < f: n += 1 cap.grab() # .read() = .grab() followed by .retrieve() if n % self.vid_stride == 0: success, im = cap.retrieve() if success: self.imgs[i] = im else: LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.') self.imgs[i] = np.zeros_like(self.imgs[i]) cap.open(stream) # re-open stream if signal was lost time.sleep(0.0) # wait time def __iter__(self): self.count = -1 return self def __next__(self): self.count += 1 if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit cv2.destroyAllWindows() raise StopIteration im0 = self.imgs.copy() if self.transforms: im = np.stack([self.transforms(x) for x in im0]) # transforms else: im = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0] for x in im0]) # resize im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW im = np.ascontiguousarray(im) # contiguous return self.sources, im, im0, None, '' def __len__(self): return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years def img2label_paths(img_paths): # Define label paths as a function of image paths sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] class LoadImagesAndLabels(Dataset): # YOLOv5 train_loader/val_loader, loads images and labels for training and validation cache_version = 0.6 # dataset labels *.cache version rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0, min_items=0, prefix=''): self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride self.path = path self.albumentations = Albumentations(size=img_size) if augment else None try: f = [] # image files for p in path if isinstance(path, list) else [path]: p = Path(p) # os-agnostic if p.is_dir(): # dir f += glob.glob(str(p / '**' / '*.*'), recursive=True) # f = list(p.rglob('*.*')) # pathlib elif p.is_file(): # file with open(p) as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep f += [x.replace('./', parent, 1) if x.startswith('./') else x for x in t] # to global path # f += [p.parent / x.lstrip(os.sep) for x in t] # to global path (pathlib) else: raise FileNotFoundError(f'{prefix}{p} does not exist') self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib assert self.im_files, f'{prefix}No images found' except Exception as e: raise Exception(f'{prefix}Error loading data from {path}: {e}\n{HELP_URL}') from e # Check cache self.label_files = img2label_paths(self.im_files) # labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') try: cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict assert cache['version'] == self.cache_version # matches current version assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash except Exception: cache, exists = self.cache_labels(cache_path, prefix), False # run cache ops # Display cache nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total if exists and LOCAL_RANK in {-1, 0}: d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt" tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results if cache['msgs']: LOGGER.info('\n'.join(cache['msgs'])) # display warnings assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}' # Read cache [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items labels, shapes, self.segments = zip(*cache.values()) nl = len(np.concatenate(labels, 0)) # number of labels assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}' self.labels = list(labels) self.shapes = np.array(shapes) self.im_files = list(cache.keys()) # update self.label_files = img2label_paths(cache.keys()) # update # Filter images if min_items: include = np.array([len(x) >= min_items for x in self.labels]).nonzero()[0].astype(int) LOGGER.info(f'{prefix}{n - len(include)}/{n} images filtered from dataset') self.im_files = [self.im_files[i] for i in include] self.label_files = [self.label_files[i] for i in include] self.labels = [self.labels[i] for i in include] self.segments = [self.segments[i] for i in include] self.shapes = self.shapes[include] # wh # Create indices n = len(self.shapes) # number of images bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index nb = bi[-1] + 1 # number of batches self.batch = bi # batch index of image self.n = n self.indices = range(n) # Update labels include_class = [] # filter labels to include only these classes (optional) include_class_array = np.array(include_class).reshape(1, -1) for i, (label, segment) in enumerate(zip(self.labels, self.segments)): if include_class: j = (label[:, 0:1] == include_class_array).any(1) self.labels[i] = label[j] if segment: self.segments[i] = segment[j] if single_cls: # single-class training, merge all classes into 0 self.labels[i][:, 0] = 0 # Rectangular Training if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() self.im_files = [self.im_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] self.segments = [self.segments[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride # Cache images into RAM/disk for faster training if cache_images == 'ram' and not self.check_cache_ram(prefix=prefix): cache_images = False self.ims = [None] * n self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] if cache_images: b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes self.im_hw0, self.im_hw = [None] * n, [None] * n fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image results = ThreadPool(NUM_THREADS).imap(fcn, range(n)) pbar = tqdm(enumerate(results), total=n, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0) for i, x in pbar: if cache_images == 'disk': b += self.npy_files[i].stat().st_size else: # 'ram' self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) b += self.ims[i].nbytes pbar.desc = f'{prefix}Caching images ({b / gb:.1f}GB {cache_images})' pbar.close() def check_cache_ram(self, safety_margin=0.1, prefix=''): # Check image caching requirements vs available memory b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes n = min(self.n, 30) # extrapolate from 30 random images for _ in range(n): im = cv2.imread(random.choice(self.im_files)) # sample image ratio = self.img_size / max(im.shape[0], im.shape[1]) # max(h, w) # ratio b += im.nbytes * ratio ** 2 mem_required = b * self.n / n # GB required to cache dataset into RAM mem = psutil.virtual_memory() cache = mem_required * (1 + safety_margin) < mem.available # to cache or not to cache, that is the question if not cache: LOGGER.info(f"{prefix}{mem_required / gb:.1f}GB RAM required, " f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, " f"{'caching images ✅' if cache else 'not caching images ⚠️'}") return cache def cache_labels(self, path=Path('./labels.cache'), prefix=''): # Cache dataset labels, check images and read shapes x = {} # dict nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages desc = f"{prefix}Scanning {path.parent / path.stem}..." with Pool(NUM_THREADS) as pool: pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))), desc=desc, total=len(self.im_files), bar_format=TQDM_BAR_FORMAT) for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: nm += nm_f nf += nf_f ne += ne_f nc += nc_f if im_file: x[im_file] = [lb, shape, segments] if msg: msgs.append(msg) pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt" pbar.close() if msgs: LOGGER.info('\n'.join(msgs)) if nf == 0: LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}') x['hash'] = get_hash(self.label_files + self.im_files) x['results'] = nf, nm, ne, nc, len(self.im_files) x['msgs'] = msgs # warnings x['version'] = self.cache_version # cache version try: np.save(path, x) # save cache for next time path.with_suffix('.cache.npy').rename(path) # remove .npy suffix LOGGER.info(f'{prefix}New cache created: {path}') except Exception as e: LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}') # not writeable return x def __len__(self): return len(self.im_files) # def __iter__(self): # self.count = -1 # print('ran dataset iter') # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) # return self def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights hyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] if mosaic: # Load mosaic img, labels = self.load_mosaic(index) shapes = None # MixUp augmentation if random.random() < hyp['mixup']: img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1))) else: # Load image img, (h0, w0), (h, w) = self.load_image(index) # Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling labels = self.labels[index].copy() if labels.size: # normalized xywh to pixel xyxy format labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: img, labels = random_perspective(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective']) nl = len(labels) # number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) if self.augment: # Albumentations img, labels = self.albumentations(img, labels) nl = len(labels) # update after albumentations # HSV color-space augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Flip up-down if random.random() < hyp['flipud']: img = np.flipud(img) if nl: labels[:, 2] = 1 - labels[:, 2] # Flip left-right if random.random() < hyp['fliplr']: img = np.fliplr(img) if nl: labels[:, 1] = 1 - labels[:, 1] # Cutouts # labels = cutout(img, labels, p=0.5) # nl = len(labels) # update after cutout labels_out = torch.zeros((nl, 6)) if nl: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.im_files[index], shapes def load_image(self, i): # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw) im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i], if im is None: # not cached in RAM if fn.exists(): # load npy im = np.load(fn) else: # read image im = cv2.imread(f) # BGR assert im is not None, f'Image Not Found {f}' h0, w0 = im.shape[:2] # orig hw r = self.img_size / max(h0, w0) # ratio if r != 1: # if sizes are not equal interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp) return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized def cache_images_to_disk(self, i): # Saves an image as an *.npy file for faster loading f = self.npy_files[i] if not f.exists(): np.save(f.as_posix(), cv2.imread(self.im_files[i])) def load_mosaic(self, index): # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic labels4, segments4 = [], [] s = self.img_size yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices random.shuffle(indices) for i, index in enumerate(indices): # Load image img, _, (h, w) = self.load_image(index) # place img in img4 if i == 0: # top left img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b # Labels labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padw, padh) for x in segments] labels4.append(labels) segments4.extend(segments) # Concat/clip labels labels4 = np.concatenate(labels4, 0) for x in (labels4[:, 1:], *segments4): np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() # img4, labels4 = replicate(img4, labels4) # replicate # Augment img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste']) img4, labels4 = random_perspective(img4, labels4, segments4, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], border=self.mosaic_border) # border to remove return img4, labels4 def load_mosaic9(self, index): # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic labels9, segments9 = [], [] s = self.img_size indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices random.shuffle(indices) hp, wp = -1, -1 # height, width previous for i, index in enumerate(indices): # Load image img, _, (h, w) = self.load_image(index) # place img in img9 if i == 0: # center img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles h0, w0 = h, w c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates elif i == 1: # top c = s, s - h, s + w, s elif i == 2: # top right c = s + wp, s - h, s + wp + w, s elif i == 3: # right c = s + w0, s, s + w0 + w, s + h elif i == 4: # bottom right c = s + w0, s + hp, s + w0 + w, s + hp + h elif i == 5: # bottom c = s + w0 - w, s + h0, s + w0, s + h0 + h elif i == 6: # bottom left c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h elif i == 7: # left c = s - w, s + h0 - h, s, s + h0 elif i == 8: # top left c = s - w, s + h0 - hp - h, s, s + h0 - hp padx, pady = c[:2] x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords # Labels labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padx, pady) for x in segments] labels9.append(labels) segments9.extend(segments) # Image img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] hp, wp = h, w # height, width previous # Offset yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] # Concat/clip labels labels9 = np.concatenate(labels9, 0) labels9[:, [1, 3]] -= xc labels9[:, [2, 4]] -= yc c = np.array([xc, yc]) # centers segments9 = [x - c for x in segments9] for x in (labels9[:, 1:], *segments9): np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() # img9, labels9 = replicate(img9, labels9) # replicate # Augment img9, labels9, segments9 = copy_paste(img9, labels9, segments9, p=self.hyp['copy_paste']) img9, labels9 = random_perspective(img9, labels9, segments9, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], border=self.mosaic_border) # border to remove return img9, labels9 @staticmethod def collate_fn(batch): im, label, path, shapes = zip(*batch) # transposed for i, lb in enumerate(label): lb[:, 0] = i # add target image index for build_targets() return torch.stack(im, 0), torch.cat(label, 0), path, shapes @staticmethod def collate_fn4(batch): im, label, path, shapes = zip(*batch) # transposed n = len(shapes) // 4 im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW i *= 4 if random.random() < 0.5: im1 = F.interpolate(im[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', align_corners=False)[0].type(im[i].type()) lb = label[i] else: im1 = torch.cat((torch.cat((im[i], im[i + 1]), 1), torch.cat((im[i + 2], im[i + 3]), 1)), 2) lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s im4.append(im1) label4.append(lb) for i, lb in enumerate(label4): lb[:, 0] = i # add target image index for build_targets() return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4 # Ancillary functions -------------------------------------------------------------------------------------------------- def flatten_recursive(path=DATASETS_DIR / 'coco128'): # Flatten a recursive directory by bringing all files to top level new_path = Path(f'{str(path)}_flat') if os.path.exists(new_path): shutil.rmtree(new_path) # delete output folder os.makedirs(new_path) # make new output folder for file in tqdm(glob.glob(f'{str(Path(path))}/**/*.*', recursive=True)): shutil.copyfile(file, new_path / Path(file).name) def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders import *; extract_boxes() # Convert detection dataset into classification dataset, with one directory per class path = Path(path) # images dir shutil.rmtree(path / 'classification') if (path / 'classification').is_dir() else None # remove existing files = list(path.rglob('*.*')) n = len(files) # number of files for im_file in tqdm(files, total=n): if im_file.suffix[1:] in IMG_FORMATS: # image im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB h, w = im.shape[:2] # labels lb_file = Path(img2label_paths([str(im_file)])[0]) if Path(lb_file).exists(): with open(lb_file) as f: lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels for j, x in enumerate(lb): c = int(x[0]) # class f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename if not f.parent.is_dir(): f.parent.mkdir(parents=True) b = x[1:] * [w, h, w, h] # box # b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.2 + 3 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files Usage: from utils.dataloaders import *; autosplit() Arguments path: Path to images directory weights: Train, val, test weights (list, tuple) annotated_only: Only use images with an annotated txt file """ path = Path(path) # images dir files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only n = len(files) # number of files random.seed(0) # for reproducibility indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files for x in txt: if (path.parent / x).exists(): (path.parent / x).unlink() # remove existing print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only) for i, img in tqdm(zip(indices, files), total=n): if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label with open(path.parent / txt[i], 'a') as f: f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file def verify_image_label(args): # Verify one image-label pair im_file, lb_file, prefix = args nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments try: # verify images im = Image.open(im_file) im.verify() # PIL verify shape = exif_size(im) # image size assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels' assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}' if im.format.lower() in ('jpg', 'jpeg'): with open(im_file, 'rb') as f: f.seek(-2, 2) if f.read() != b'\xff\xd9': # corrupt JPEG ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' # verify labels if os.path.isfile(lb_file): nf = 1 # label found with open(lb_file) as f: lb = [x.split() for x in f.read().strip().splitlines() if len(x)] if any(len(x) > 6 for x in lb): # is segment classes = np.array([x[0] for x in lb], dtype=np.float32) segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) lb = np.array(lb, dtype=np.float32) nl = len(lb) if nl: assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}' assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}' _, i = np.unique(lb, axis=0, return_index=True) if len(i) < nl: # duplicate row check lb = lb[i] # remove duplicates if segments: segments = [segments[x] for x in i] msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed' else: ne = 1 # label empty lb = np.zeros((0, 5), dtype=np.float32) else: nm = 1 # label missing lb = np.zeros((0, 5), dtype=np.float32) return im_file, lb, shape, segments, nm, nf, ne, nc, msg except Exception as e: nc = 1 msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}' return [None, None, None, None, nm, nf, ne, nc, msg] class HUBDatasetStats(): """ Class for generating HUB dataset JSON and `-hub` dataset directory Arguments path: Path to data.yaml or data.zip (with data.yaml inside data.zip) autodownload: Attempt to download dataset if not found locally Usage from utils.dataloaders import HUBDatasetStats stats = HUBDatasetStats('coco128.yaml', autodownload=True) # usage 1 stats = HUBDatasetStats('path/to/coco128.zip') # usage 2 stats.get_json(save=False) stats.process_images() """ def __init__(self, path='coco128.yaml', autodownload=False): # Initialize class zipped, data_dir, yaml_path = self._unzip(Path(path)) try: with open(check_yaml(yaml_path), errors='ignore') as f: data = yaml.safe_load(f) # data dict if zipped: data['path'] = data_dir except Exception as e: raise Exception("error/HUB/dataset_stats/yaml_load") from e check_dataset(data, autodownload) # download dataset if missing self.hub_dir = Path(data['path'] + '-hub') self.im_dir = self.hub_dir / 'images' self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images self.stats = {'nc': data['nc'], 'names': list(data['names'].values())} # statistics dictionary self.data = data @staticmethod def _find_yaml(dir): # Return data.yaml file files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive assert files, f'No *.yaml file found in {dir}' if len(files) > 1: files = [f for f in files if f.stem == dir.stem] # prefer *.yaml files that match dir name assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed' assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}' return files[0] def _unzip(self, path): # Unzip data.zip if not str(path).endswith('.zip'): # path is data.yaml return False, None, path assert Path(path).is_file(), f'Error unzipping {path}, file not found' unzip_file(path, path=path.parent) dir = path.with_suffix('') # dataset directory == zip name assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path def _hub_ops(self, f, max_dim=1920): # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing f_new = self.im_dir / Path(f).name # dataset-hub image filename try: # use PIL im = Image.open(f) r = max_dim / max(im.height, im.width) # ratio if r < 1.0: # image too large im = im.resize((int(im.width * r), int(im.height * r))) im.save(f_new, 'JPEG', quality=50, optimize=True) # save except Exception as e: # use OpenCV LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}') im = cv2.imread(f) im_height, im_width = im.shape[:2] r = max_dim / max(im_height, im_width) # ratio if r < 1.0: # image too large im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA) cv2.imwrite(str(f_new), im) def get_json(self, save=False, verbose=False): # Return dataset JSON for Ultralytics HUB def _round(labels): # Update labels to integer class and 6 decimal place floats return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels] for split in 'train', 'val', 'test': if self.data.get(split) is None: self.stats[split] = None # i.e. no test set continue dataset = LoadImagesAndLabels(self.data[split]) # load dataset x = np.array([ np.bincount(label[:, 0].astype(int), minlength=self.data['nc']) for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics')]) # shape(128x80) self.stats[split] = { 'instance_stats': { 'total': int(x.sum()), 'per_class': x.sum(0).tolist()}, 'image_stats': { 'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()), 'per_class': (x > 0).sum(0).tolist()}, 'labels': [{ str(Path(k).name): _round(v.tolist())} for k, v in zip(dataset.im_files, dataset.labels)]} # Save, print and return if save: stats_path = self.hub_dir / 'stats.json' print(f'Saving {stats_path.resolve()}...') with open(stats_path, 'w') as f: json.dump(self.stats, f) # save stats.json if verbose: print(json.dumps(self.stats, indent=2, sort_keys=False)) return self.stats def process_images(self): # Compress images for Ultralytics HUB for split in 'train', 'val', 'test': if self.data.get(split) is None: continue dataset = LoadImagesAndLabels(self.data[split]) # load dataset desc = f'{split} images' for _ in tqdm(ThreadPool(NUM_THREADS).imap(self._hub_ops, dataset.im_files), total=dataset.n, desc=desc): pass print(f'Done. All images saved to {self.im_dir}') return self.im_dir # Classification dataloaders ------------------------------------------------------------------------------------------- class ClassificationDataset(torchvision.datasets.ImageFolder): """ YOLOv5 Classification Dataset. Arguments root: Dataset path transform: torchvision transforms, used by default album_transform: Albumentations transforms, used if installed """ def __init__(self, root, augment, imgsz, cache=False): super().__init__(root=root) self.torch_transforms = classify_transforms(imgsz) self.album_transforms = classify_albumentations(augment, imgsz) if augment else None self.cache_ram = cache is True or cache == 'ram' self.cache_disk = cache == 'disk' self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im def __getitem__(self, i): f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image if self.cache_ram and im is None: im = self.samples[i][3] = cv2.imread(f) elif self.cache_disk: if not fn.exists(): # load npy np.save(fn.as_posix(), cv2.imread(f)) im = np.load(fn) else: # read image im = cv2.imread(f) # BGR if self.album_transforms: sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))["image"] else: sample = self.torch_transforms(im) return sample, j def create_classification_dataloader(path, imgsz=224, batch_size=16, augment=True, cache=False, rank=-1, workers=8, shuffle=True): # Returns Dataloader object to be used with YOLOv5 Classifier with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache) batch_size = min(batch_size, len(dataset)) nd = torch.cuda.device_count() nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) generator = torch.Generator() generator.manual_seed(6148914691236517205 + RANK) return InfiniteDataLoader(dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, pin_memory=PIN_MEMORY, worker_init_fn=seed_worker, generator=generator) # or DataLoader(persistent_workers=True) ================================================ FILE: utils/downloads.py ================================================ import logging import os import subprocess import urllib from pathlib import Path import requests import torch def is_url(url, check=True): # Check if string is URL and check if URL exists try: url = str(url) result = urllib.parse.urlparse(url) assert all([result.scheme, result.netloc]) # check if is url return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online except (AssertionError, urllib.request.HTTPError): return False def gsutil_getsize(url=''): # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') return eval(s.split(' ')[0]) if len(s) else 0 # bytes def url_getsize(url='https://ultralytics.com/images/bus.jpg'): # Return downloadable file size in bytes response = requests.head(url, allow_redirects=True) return int(response.headers.get('content-length', -1)) def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes from utils.general import LOGGER file = Path(file) assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" try: # url1 LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO) assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check except Exception as e: # url2 if file.exists(): file.unlink() # remove partial downloads LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail finally: if not file.exists() or file.stat().st_size < min_bytes: # check if file.exists(): file.unlink() # remove partial downloads LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}") LOGGER.info('') def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'): # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc. from utils.general import LOGGER def github_assets(repository, version='latest'): # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...]) if version != 'latest': version = f'tags/{version}' # i.e. tags/v7.0 response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets file = Path(str(file).strip().replace("'", '')) if not file.exists(): # URL specified name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. if str(file).startswith(('http:/', 'https:/')): # download url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... if Path(file).is_file(): LOGGER.info(f'Found {url} locally at {file}') # file already exists else: safe_download(file=file, url=url, min_bytes=1E5) return file # GitHub assets assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default try: tag, assets = github_assets(repo, release) except Exception: try: tag, assets = github_assets(repo) # latest release except Exception: try: tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] except Exception: tag = release file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) if name in assets: url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror safe_download( file, url=f'https://github.com/{repo}/releases/download/{tag}/{name}', min_bytes=1E5, error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') return str(file) ================================================ FILE: utils/general.py ================================================ import contextlib import glob import inspect import logging import logging.config import math import os import platform import random import re import signal import sys import time import urllib from copy import deepcopy from datetime import datetime from itertools import repeat from multiprocessing.pool import ThreadPool from pathlib import Path from subprocess import check_output from tarfile import is_tarfile from typing import Optional from zipfile import ZipFile, is_zipfile import cv2 import IPython import numpy as np import pandas as pd import pkg_resources as pkg import torch import torchvision import yaml from utils import TryExcept, emojis from utils.downloads import gsutil_getsize from utils.metrics import box_iou, fitness FILE = Path(__file__).resolve() ROOT = FILE.parents[1] # YOLO root directory RANK = int(os.getenv('RANK', -1)) # Settings NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets')) # global datasets directory AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode TQDM_BAR_FORMAT = '{l_bar}{bar:10}| {n_fmt}/{total_fmt} {elapsed}' # tqdm bar format FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf torch.set_printoptions(linewidth=320, precision=5, profile='long') np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 pd.options.display.max_columns = 10 cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads os.environ['OMP_NUM_THREADS'] = '1' if platform.system() == 'darwin' else str(NUM_THREADS) # OpenMP (PyTorch and SciPy) def is_ascii(s=''): # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7) s = str(s) # convert list, tuple, None, etc. to str return len(s.encode().decode('ascii', 'ignore')) == len(s) def is_chinese(s='人工智能'): # Is string composed of any Chinese characters? return bool(re.search('[\u4e00-\u9fff]', str(s))) def is_colab(): # Is environment a Google Colab instance? return 'google.colab' in sys.modules def is_notebook(): # Is environment a Jupyter notebook? Verified on Colab, Jupyterlab, Kaggle, Paperspace ipython_type = str(type(IPython.get_ipython())) return 'colab' in ipython_type or 'zmqshell' in ipython_type def is_kaggle(): # Is environment a Kaggle Notebook? return os.environ.get('PWD') == '/kaggle/working' and os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com' def is_docker() -> bool: """Check if the process runs inside a docker container.""" if Path("/.dockerenv").exists(): return True try: # check if docker is in control groups with open("/proc/self/cgroup") as file: return any("docker" in line for line in file) except OSError: return False def is_writeable(dir, test=False): # Return True if directory has write permissions, test opening a file with write permissions if test=True if not test: return os.access(dir, os.W_OK) # possible issues on Windows file = Path(dir) / 'tmp.txt' try: with open(file, 'w'): # open file with write permissions pass file.unlink() # remove file return True except OSError: return False LOGGING_NAME = "yolov5" def set_logging(name=LOGGING_NAME, verbose=True): # sets up logging for the given name rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR logging.config.dictConfig({ "version": 1, "disable_existing_loggers": False, "formatters": { name: { "format": "%(message)s"}}, "handlers": { name: { "class": "logging.StreamHandler", "formatter": name, "level": level,}}, "loggers": { name: { "level": level, "handlers": [name], "propagate": False,}}}) set_logging(LOGGING_NAME) # run before defining LOGGER LOGGER = logging.getLogger(LOGGING_NAME) # define globally (used in train.py, val.py, detect.py, etc.) if platform.system() == 'Windows': for fn in LOGGER.info, LOGGER.warning: setattr(LOGGER, fn.__name__, lambda x: fn(emojis(x))) # emoji safe logging def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'): # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required. env = os.getenv(env_var) if env: path = Path(env) # use environment variable else: cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable path.mkdir(exist_ok=True) # make if required return path CONFIG_DIR = user_config_dir() # Ultralytics settings dir class Profile(contextlib.ContextDecorator): # YOLO Profile class. Usage: @Profile() decorator or 'with Profile():' context manager def __init__(self, t=0.0): self.t = t self.cuda = torch.cuda.is_available() def __enter__(self): self.start = self.time() return self def __exit__(self, type, value, traceback): self.dt = self.time() - self.start # delta-time self.t += self.dt # accumulate dt def time(self): if self.cuda: torch.cuda.synchronize() return time.time() class Timeout(contextlib.ContextDecorator): # YOLO Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True): self.seconds = int(seconds) self.timeout_message = timeout_msg self.suppress = bool(suppress_timeout_errors) def _timeout_handler(self, signum, frame): raise TimeoutError(self.timeout_message) def __enter__(self): if platform.system() != 'Windows': # not supported on Windows signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM signal.alarm(self.seconds) # start countdown for SIGALRM to be raised def __exit__(self, exc_type, exc_val, exc_tb): if platform.system() != 'Windows': signal.alarm(0) # Cancel SIGALRM if it's scheduled if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError return True class WorkingDirectory(contextlib.ContextDecorator): # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager def __init__(self, new_dir): self.dir = new_dir # new dir self.cwd = Path.cwd().resolve() # current dir def __enter__(self): os.chdir(self.dir) def __exit__(self, exc_type, exc_val, exc_tb): os.chdir(self.cwd) def methods(instance): # Get class/instance methods return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")] def print_args(args: Optional[dict] = None, show_file=True, show_func=False): # Print function arguments (optional args dict) x = inspect.currentframe().f_back # previous frame file, _, func, _, _ = inspect.getframeinfo(x) if args is None: # get args automatically args, _, _, frm = inspect.getargvalues(x) args = {k: v for k, v in frm.items() if k in args} try: file = Path(file).resolve().relative_to(ROOT).with_suffix('') except ValueError: file = Path(file).stem s = (f'{file}: ' if show_file else '') + (f'{func}: ' if show_func else '') LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items())) def init_seeds(seed=0, deterministic=False): # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe # torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287 if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 torch.use_deterministic_algorithms(True) torch.backends.cudnn.deterministic = True os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' os.environ['PYTHONHASHSEED'] = str(seed) def intersect_dicts(da, db, exclude=()): # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape} def get_default_args(func): # Get func() default arguments signature = inspect.signature(func) return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty} def get_latest_run(search_dir='.'): # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) return max(last_list, key=os.path.getctime) if last_list else '' def file_age(path=__file__): # Return days since last file update dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta return dt.days # + dt.seconds / 86400 # fractional days def file_date(path=__file__): # Return human-readable file modification date, i.e. '2021-3-26' t = datetime.fromtimestamp(Path(path).stat().st_mtime) return f'{t.year}-{t.month}-{t.day}' def file_size(path): # Return file/dir size (MB) mb = 1 << 20 # bytes to MiB (1024 ** 2) path = Path(path) if path.is_file(): return path.stat().st_size / mb elif path.is_dir(): return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb else: return 0.0 def check_online(): # Check internet connectivity import socket def run_once(): # Check once try: socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility return True except OSError: return False return run_once() or run_once() # check twice to increase robustness to intermittent connectivity issues def git_describe(path=ROOT): # path must be a directory # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe try: assert (Path(path) / '.git').is_dir() return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1] except Exception: return '' @TryExcept() @WorkingDirectory(ROOT) def check_git_status(repo='WongKinYiu/yolov9', branch='main'): # YOLO status check, recommend 'git pull' if code is out of date url = f'https://github.com/{repo}' msg = f', for updates see {url}' s = colorstr('github: ') # string assert Path('.git').exists(), s + 'skipping check (not a git repository)' + msg assert check_online(), s + 'skipping check (offline)' + msg splits = re.split(pattern=r'\s', string=check_output('git remote -v', shell=True).decode()) matches = [repo in s for s in splits] if any(matches): remote = splits[matches.index(True) - 1] else: remote = 'ultralytics' check_output(f'git remote add {remote} {url}', shell=True) check_output(f'git fetch {remote}', shell=True, timeout=5) # git fetch local_branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out n = int(check_output(f'git rev-list {local_branch}..{remote}/{branch} --count', shell=True)) # commits behind if n > 0: pull = 'git pull' if remote == 'origin' else f'git pull {remote} {branch}' s += f"⚠️ YOLO is out of date by {n} commit{'s' * (n > 1)}. Use `{pull}` or `git clone {url}` to update." else: s += f'up to date with {url} ✅' LOGGER.info(s) @WorkingDirectory(ROOT) def check_git_info(path='.'): # YOLO git info check, return {remote, branch, commit} check_requirements('gitpython') import git try: repo = git.Repo(path) remote = repo.remotes.origin.url.replace('.git', '') # i.e. 'https://github.com/WongKinYiu/yolov9' commit = repo.head.commit.hexsha # i.e. '3134699c73af83aac2a481435550b968d5792c0d' try: branch = repo.active_branch.name # i.e. 'main' except TypeError: # not on any branch branch = None # i.e. 'detached HEAD' state return {'remote': remote, 'branch': branch, 'commit': commit} except git.exc.InvalidGitRepositoryError: # path is not a git dir return {'remote': None, 'branch': None, 'commit': None} def check_python(minimum='3.7.0'): # Check current python version vs. required python version check_version(platform.python_version(), minimum, name='Python ', hard=True) def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False): # Check version vs. required version current, minimum = (pkg.parse_version(x) for x in (current, minimum)) result = (current == minimum) if pinned else (current >= minimum) # bool s = f'WARNING ⚠️ {name}{minimum} is required by YOLO, but {name}{current} is currently installed' # string if hard: assert result, emojis(s) # assert min requirements met if verbose and not result: LOGGER.warning(s) return result @TryExcept() def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''): # Check installed dependencies meet YOLO requirements (pass *.txt file or list of packages or single package str) prefix = colorstr('red', 'bold', 'requirements:') check_python() # check python version if isinstance(requirements, Path): # requirements.txt file file = requirements.resolve() assert file.exists(), f"{prefix} {file} not found, check failed." with file.open() as f: requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude] elif isinstance(requirements, str): requirements = [requirements] s = '' n = 0 for r in requirements: try: pkg.require(r) except (pkg.VersionConflict, pkg.DistributionNotFound): # exception if requirements not met s += f'"{r}" ' n += 1 if s and install and AUTOINSTALL: # check environment variable LOGGER.info(f"{prefix} YOLO requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...") try: # assert check_online(), "AutoUpdate skipped (offline)" LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode()) source = file if 'file' in locals() else requirements s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \ f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n" LOGGER.info(s) except Exception as e: LOGGER.warning(f'{prefix} ❌ {e}') def check_img_size(imgsz, s=32, floor=0): # Verify image size is a multiple of stride s in each dimension if isinstance(imgsz, int): # integer i.e. img_size=640 new_size = max(make_divisible(imgsz, int(s)), floor) else: # list i.e. img_size=[640, 480] imgsz = list(imgsz) # convert to list if tuple new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz] if new_size != imgsz: LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}') return new_size def check_imshow(warn=False): # Check if environment supports image displays try: assert not is_notebook() assert not is_docker() cv2.imshow('test', np.zeros((1, 1, 3))) cv2.waitKey(1) cv2.destroyAllWindows() cv2.waitKey(1) return True except Exception as e: if warn: LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show()\n{e}') return False def check_suffix(file='yolo.pt', suffix=('.pt',), msg=''): # Check file(s) for acceptable suffix if file and suffix: if isinstance(suffix, str): suffix = [suffix] for f in file if isinstance(file, (list, tuple)) else [file]: s = Path(f).suffix.lower() # file suffix if len(s): assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}" def check_yaml(file, suffix=('.yaml', '.yml')): # Search/download YAML file (if necessary) and return path, checking suffix return check_file(file, suffix) def check_file(file, suffix=''): # Search/download file (if necessary) and return path check_suffix(file, suffix) # optional file = str(file) # convert to str() if os.path.isfile(file) or not file: # exists return file elif file.startswith(('http:/', 'https:/')): # download url = file # warning: Pathlib turns :// -> :/ file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth if os.path.isfile(file): LOGGER.info(f'Found {url} locally at {file}') # file already exists else: LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, file) assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check return file elif file.startswith('clearml://'): # ClearML Dataset ID assert 'clearml' in sys.modules, "ClearML is not installed, so cannot use ClearML dataset. Try running 'pip install clearml'." return file else: # search files = [] for d in 'data', 'models', 'utils': # search directories files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file assert len(files), f'File not found: {file}' # assert file was found assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique return files[0] # return file def check_font(font=FONT, progress=False): # Download font to CONFIG_DIR if necessary font = Path(font) file = CONFIG_DIR / font.name if not font.exists() and not file.exists(): url = f'https://ultralytics.com/assets/{font.name}' LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, str(file), progress=progress) def check_dataset(data, autodownload=True): # Download, check and/or unzip dataset if not found locally # Download (optional) extract_dir = '' if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)): download(data, dir=f'{DATASETS_DIR}/{Path(data).stem}', unzip=True, delete=False, curl=False, threads=1) data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml')) extract_dir, autodownload = data.parent, False # Read yaml (optional) if isinstance(data, (str, Path)): data = yaml_load(data) # dictionary # Checks for k in 'train', 'val', 'names': assert k in data, emojis(f"data.yaml '{k}:' field missing ❌") if isinstance(data['names'], (list, tuple)): # old array format data['names'] = dict(enumerate(data['names'])) # convert to dict assert all(isinstance(k, int) for k in data['names'].keys()), 'data.yaml names keys must be integers, i.e. 2: car' data['nc'] = len(data['names']) # Resolve paths path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.' if not path.is_absolute(): path = (ROOT / path).resolve() data['path'] = path # download scripts for k in 'train', 'val', 'test': if data.get(k): # prepend path if isinstance(data[k], str): x = (path / data[k]).resolve() if not x.exists() and data[k].startswith('../'): x = (path / data[k][3:]).resolve() data[k] = str(x) else: data[k] = [str((path / x).resolve()) for x in data[k]] # Parse yaml train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) if val: val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path if not all(x.exists() for x in val): LOGGER.info('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]) if not s or not autodownload: raise Exception('Dataset not found ❌') t = time.time() if s.startswith('http') and s.endswith('.zip'): # URL f = Path(s).name # filename LOGGER.info(f'Downloading {s} to {f}...') torch.hub.download_url_to_file(s, f) Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root unzip_file(f, path=DATASETS_DIR) # unzip Path(f).unlink() # remove zip r = None # success elif s.startswith('bash '): # bash script LOGGER.info(f'Running {s} ...') r = os.system(s) else: # python script r = exec(s, {'yaml': data}) # return None dt = f'({round(time.time() - t, 1)}s)' s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌" LOGGER.info(f"Dataset download {s}") check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts return data # dictionary def check_amp(model): # Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation from models.common import AutoShape, DetectMultiBackend def amp_allclose(model, im): # All close FP32 vs AMP results m = AutoShape(model, verbose=False) # model a = m(im).xywhn[0] # FP32 inference m.amp = True b = m(im).xywhn[0] # AMP inference return a.shape == b.shape and torch.allclose(a, b, atol=0.1) # close to 10% absolute tolerance prefix = colorstr('AMP: ') device = next(model.parameters()).device # get model device if device.type in ('cpu', 'mps'): return False # AMP only used on CUDA devices f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3)) try: #assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolo.pt', device), im) LOGGER.info(f'{prefix}checks passed ✅') return True except Exception: help_url = 'https://github.com/ultralytics/yolov5/issues/7908' LOGGER.warning(f'{prefix}checks failed ❌, disabling Automatic Mixed Precision. See {help_url}') return False def yaml_load(file='data.yaml'): # Single-line safe yaml loading with open(file, errors='ignore') as f: return yaml.safe_load(f) def yaml_save(file='data.yaml', data={}): # Single-line safe yaml saving with open(file, 'w') as f: yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False) def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')): # Unzip a *.zip file to path/, excluding files containing strings in exclude list if path is None: path = Path(file).parent # default path with ZipFile(file) as zipObj: for f in zipObj.namelist(): # list all archived filenames in the zip if all(x not in f for x in exclude): zipObj.extract(f, path=path) def url2file(url): # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ return Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry=3): # Multithreaded file download and unzip function, used in data.yaml for autodownload def download_one(url, dir): # Download 1 file success = True if os.path.isfile(url): f = Path(url) # filename else: # does not exist f = dir / Path(url).name LOGGER.info(f'Downloading {url} to {f}...') for i in range(retry + 1): if curl: s = 'sS' if threads > 1 else '' # silent r = os.system( f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue success = r == 0 else: torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download success = f.is_file() if success: break elif i < retry: LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...') else: LOGGER.warning(f'❌ Failed to download {url}...') if unzip and success and (f.suffix == '.gz' or is_zipfile(f) or is_tarfile(f)): LOGGER.info(f'Unzipping {f}...') if is_zipfile(f): unzip_file(f, dir) # unzip elif is_tarfile(f): os.system(f'tar xf {f} --directory {f.parent}') # unzip elif f.suffix == '.gz': os.system(f'tar xfz {f} --directory {f.parent}') # unzip if delete: f.unlink() # remove zip dir = Path(dir) dir.mkdir(parents=True, exist_ok=True) # make directory if threads > 1: pool = ThreadPool(threads) pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multithreaded pool.close() pool.join() else: for u in [url] if isinstance(url, (str, Path)) else url: download_one(u, dir) def make_divisible(x, divisor): # Returns nearest x divisible by divisor if isinstance(divisor, torch.Tensor): divisor = int(divisor.max()) # to int return math.ceil(x / divisor) * divisor def clean_str(s): # Cleans a string by replacing special characters with underscore _ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) def one_cycle(y1=0.0, y2=1.0, steps=100): # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 def one_flat_cycle(y1=0.0, y2=1.0, steps=100): # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf #return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 return lambda x: ((1 - math.cos((x - (steps // 2)) * math.pi / (steps // 2))) / 2) * (y2 - y1) + y1 if (x > (steps // 2)) else y1 def colorstr(*input): # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string colors = { 'black': '\033[30m', # basic colors 'red': '\033[31m', 'green': '\033[32m', 'yellow': '\033[33m', 'blue': '\033[34m', 'magenta': '\033[35m', 'cyan': '\033[36m', 'white': '\033[37m', 'bright_black': '\033[90m', # bright colors 'bright_red': '\033[91m', 'bright_green': '\033[92m', 'bright_yellow': '\033[93m', 'bright_blue': '\033[94m', 'bright_magenta': '\033[95m', 'bright_cyan': '\033[96m', 'bright_white': '\033[97m', 'end': '\033[0m', # misc 'bold': '\033[1m', 'underline': '\033[4m'} return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] def labels_to_class_weights(labels, nc=80): # Get class weights (inverse frequency) from training labels if labels[0] is None: # no labels loaded return torch.Tensor() labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO classes = labels[:, 0].astype(int) # labels = [class xywh] weights = np.bincount(classes, minlength=nc) # occurrences per class # Prepend gridpoint count (for uCE training) # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start weights[weights == 0] = 1 # replace empty bins with 1 weights = 1 / weights # number of targets per class weights /= weights.sum() # normalize return torch.from_numpy(weights).float() def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): # Produces image weights based on class_weights and image contents # Usage: index = random.choices(range(n), weights=image_weights, k=1) # weighted image sample class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels]) return (class_weights.reshape(1, nc) * class_counts).sum(1) def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet return [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] def xyxy2xywh(x): # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center y[..., 2] = x[..., 2] - x[..., 0] # width y[..., 3] = x[..., 3] - x[..., 1] # height return y def xywh2xyxy(x): # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y return y def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y return y def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right if clip: clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center y[..., 2] = (x[..., 2] - x[..., 0]) / w # width y[..., 3] = (x[..., 3] - x[..., 1]) / h # height return y def xyn2xy(x, w=640, h=640, padw=0, padh=0): # Convert normalized segments into pixel segments, shape (n,2) y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = w * x[..., 0] + padw # top left x y[..., 1] = h * x[..., 1] + padh # top left y return y def segment2box(segment, width=640, height=640): # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) x, y = segment.T # segment xy inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) x, y, = x[inside], y[inside] return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy def segments2boxes(segments): # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) boxes = [] for s in segments: x, y = s.T # segment xy boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy return xyxy2xywh(np.array(boxes)) # cls, xywh def resample_segments(segments, n=1000): # Up-sample an (n,2) segment for i, s in enumerate(segments): s = np.concatenate((s, s[0:1, :]), axis=0) x = np.linspace(0, len(s) - 1, n) xp = np.arange(len(s)) segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy return segments def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): # Rescale boxes (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] boxes[:, [0, 2]] -= pad[0] # x padding boxes[:, [1, 3]] -= pad[1] # y padding boxes[:, :4] /= gain clip_boxes(boxes, img0_shape) return boxes def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False): # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] segments[:, 0] -= pad[0] # x padding segments[:, 1] -= pad[1] # y padding segments /= gain clip_segments(segments, img0_shape) if normalize: segments[:, 0] /= img0_shape[1] # width segments[:, 1] /= img0_shape[0] # height return segments def clip_boxes(boxes, shape): # Clip boxes (xyxy) to image shape (height, width) if isinstance(boxes, torch.Tensor): # faster individually boxes[:, 0].clamp_(0, shape[1]) # x1 boxes[:, 1].clamp_(0, shape[0]) # y1 boxes[:, 2].clamp_(0, shape[1]) # x2 boxes[:, 3].clamp_(0, shape[0]) # y2 else: # np.array (faster grouped) boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 def clip_segments(segments, shape): # Clip segments (xy1,xy2,...) to image shape (height, width) if isinstance(segments, torch.Tensor): # faster individually segments[:, 0].clamp_(0, shape[1]) # x segments[:, 1].clamp_(0, shape[0]) # y else: # np.array (faster grouped) segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y def non_max_suppression( prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, labels=(), max_det=300, nm=0, # number of masks ): """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] """ if isinstance(prediction, (list, tuple)): # YOLO model in validation model, output = (inference_out, loss_out) prediction = prediction[0] # select only inference output device = prediction.device mps = 'mps' in device.type # Apple MPS if mps: # MPS not fully supported yet, convert tensors to CPU before NMS prediction = prediction.cpu() bs = prediction.shape[0] # batch size nc = prediction.shape[1] - nm - 4 # number of classes mi = 4 + nc # mask start index xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates # Checks assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' # Settings # min_wh = 2 # (pixels) minimum box width and height max_wh = 7680 # (pixels) maximum box width and height max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 2.5 + 0.05 * bs # seconds to quit after redundant = True # require redundant detections multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x.T[xc[xi]] # confidence # Cat apriori labels if autolabelling if labels and len(labels[xi]): lb = labels[xi] v = torch.zeros((len(lb), nc + nm + 5), device=x.device) v[:, :4] = lb[:, 1:5] # box v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image if not x.shape[0]: continue # Detections matrix nx6 (xyxy, conf, cls) box, cls, mask = x.split((4, nc, nm), 1) box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2) if multi_label: i, j = (cls > conf_thres).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) else: # best class only conf, j = cls.max(1, keepdim=True) x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # Check shape n = x.shape[0] # number of boxes if not n: # no boxes continue elif n > max_nms: # excess boxes x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence else: x = x[x[:, 4].argsort(descending=True)] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS if i.shape[0] > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy output[xi] = x[i] if mps: output[xi] = output[xi].to(device) if (time.time() - t) > time_limit: LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') break # time limit exceeded return output def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer() # Strip optimizer from 'f' to finalize training, optionally save as 's' x = torch.load(f, map_location=torch.device('cpu')) if x.get('ema'): x['model'] = x['ema'] # replace model with ema for k in 'optimizer', 'best_fitness', 'ema', 'updates': # keys x[k] = None x['epoch'] = -1 x['model'].half() # to FP16 for p in x['model'].parameters(): p.requires_grad = False torch.save(x, s or f) mb = os.path.getsize(s or f) / 1E6 # filesize LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB") def print_mutation(keys, results, hyp, save_dir, bucket, prefix=colorstr('evolve: ')): evolve_csv = save_dir / 'evolve.csv' evolve_yaml = save_dir / 'hyp_evolve.yaml' keys = tuple(keys) + tuple(hyp.keys()) # [results + hyps] keys = tuple(x.strip() for x in keys) vals = results + tuple(hyp.values()) n = len(keys) # Download (optional) if bucket: url = f'gs://{bucket}/evolve.csv' if gsutil_getsize(url) > (evolve_csv.stat().st_size if evolve_csv.exists() else 0): os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local # Log to evolve.csv s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header with open(evolve_csv, 'a') as f: f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n') # Save yaml with open(evolve_yaml, 'w') as f: data = pd.read_csv(evolve_csv) data = data.rename(columns=lambda x: x.strip()) # strip keys i = np.argmax(fitness(data.values[:, :4])) # generations = len(data) f.write('# YOLO Hyperparameter Evolution Results\n' + f'# Best generation: {i}\n' + f'# Last generation: {generations - 1}\n' + '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + '\n' + '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n') yaml.safe_dump(data.loc[i][7:].to_dict(), f, sort_keys=False) # Print to screen LOGGER.info(prefix + f'{generations} generations finished, current result:\n' + prefix + ', '.join(f'{x.strip():>20s}' for x in keys) + '\n' + prefix + ', '.join(f'{x:20.5g}' for x in vals) + '\n\n') if bucket: os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload def apply_classifier(x, model, img, im0): # Apply a second stage classifier to YOLO outputs # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval() im0 = [im0] if isinstance(im0, np.ndarray) else im0 for i, d in enumerate(x): # per image if d is not None and len(d): d = d.clone() # Reshape and pad cutouts b = xyxy2xywh(d[:, :4]) # boxes b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad d[:, :4] = xywh2xyxy(b).long() # Rescale boxes from img_size to im0 size scale_boxes(img.shape[2:], d[:, :4], im0[i].shape) # Classes pred_cls1 = d[:, 5].long() ims = [] for a in d: cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] im = cv2.resize(cutout, (224, 224)) # BGR im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 im /= 255 # 0 - 255 to 0.0 - 1.0 ims.append(im) pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections return x def increment_path(path, exist_ok=False, sep='', mkdir=False): # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. path = Path(path) # os-agnostic if path.exists() and not exist_ok: path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') # Method 1 for n in range(2, 9999): p = f'{path}{sep}{n}{suffix}' # increment path if not os.path.exists(p): # break path = Path(p) # Method 2 (deprecated) # dirs = glob.glob(f"{path}{sep}*") # similar paths # matches = [re.search(rf"{path.stem}{sep}(\d+)", d) for d in dirs] # i = [int(m.groups()[0]) for m in matches if m] # indices # n = max(i) + 1 if i else 2 # increment number # path = Path(f"{path}{sep}{n}{suffix}") # increment path if mkdir: path.mkdir(parents=True, exist_ok=True) # make directory return path # OpenCV Chinese-friendly functions ------------------------------------------------------------------------------------ imshow_ = cv2.imshow # copy to avoid recursion errors def imread(path, flags=cv2.IMREAD_COLOR): return cv2.imdecode(np.fromfile(path, np.uint8), flags) def imwrite(path, im): try: cv2.imencode(Path(path).suffix, im)[1].tofile(path) return True except Exception: return False def imshow(path, im): imshow_(path.encode('unicode_escape').decode(), im) cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine # Variables ------------------------------------------------------------------------------------------------------------ ================================================ FILE: utils/lion.py ================================================ """PyTorch implementation of the Lion optimizer.""" import torch from torch.optim.optimizer import Optimizer class Lion(Optimizer): r"""Implements Lion algorithm.""" def __init__(self, params, lr=1e-4, betas=(0.9, 0.99), weight_decay=0.0): """Initialize the hyperparameters. Args: params (iterable): iterable of parameters to optimize or dicts defining parameter groups lr (float, optional): learning rate (default: 1e-4) betas (Tuple[float, float], optional): coefficients used for computing running averages of gradient and its square (default: (0.9, 0.99)) weight_decay (float, optional): weight decay coefficient (default: 0) """ if not 0.0 <= lr: raise ValueError('Invalid learning rate: {}'.format(lr)) if not 0.0 <= betas[0] < 1.0: raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0])) if not 0.0 <= betas[1] < 1.0: raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1])) defaults = dict(lr=lr, betas=betas, weight_decay=weight_decay) super().__init__(params, defaults) @torch.no_grad() def step(self, closure=None): """Performs a single optimization step. Args: closure (callable, optional): A closure that reevaluates the model and returns the loss. Returns: the loss. """ loss = None if closure is not None: with torch.enable_grad(): loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue # Perform stepweight decay p.data.mul_(1 - group['lr'] * group['weight_decay']) grad = p.grad state = self.state[p] # State initialization if len(state) == 0: # Exponential moving average of gradient values state['exp_avg'] = torch.zeros_like(p) exp_avg = state['exp_avg'] beta1, beta2 = group['betas'] # Weight update update = exp_avg * beta1 + grad * (1 - beta1) p.add_(torch.sign(update), alpha=-group['lr']) # Decay the momentum running average coefficient exp_avg.mul_(beta2).add_(grad, alpha=1 - beta2) return loss ================================================ FILE: utils/loggers/__init__.py ================================================ import os import warnings from pathlib import Path import pkg_resources as pkg import torch from torch.utils.tensorboard import SummaryWriter from utils.general import LOGGER, colorstr, cv2 from utils.loggers.clearml.clearml_utils import ClearmlLogger from utils.loggers.wandb.wandb_utils import WandbLogger from utils.plots import plot_images, plot_labels, plot_results from utils.torch_utils import de_parallel LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet') # *.csv, TensorBoard, Weights & Biases, ClearML RANK = int(os.getenv('RANK', -1)) try: import wandb assert hasattr(wandb, '__version__') # verify package import not local dir if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in {0, -1}: try: wandb_login_success = wandb.login(timeout=30) except wandb.errors.UsageError: # known non-TTY terminal issue wandb_login_success = False if not wandb_login_success: wandb = None except (ImportError, AssertionError): wandb = None try: import clearml assert hasattr(clearml, '__version__') # verify package import not local dir except (ImportError, AssertionError): clearml = None try: if RANK not in [0, -1]: comet_ml = None else: import comet_ml assert hasattr(comet_ml, '__version__') # verify package import not local dir from utils.loggers.comet import CometLogger except (ModuleNotFoundError, ImportError, AssertionError): comet_ml = None class Loggers(): # YOLO Loggers class def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS): self.save_dir = save_dir self.weights = weights self.opt = opt self.hyp = hyp self.plots = not opt.noplots # plot results self.logger = logger # for printing results to console self.include = include self.keys = [ 'train/box_loss', 'train/cls_loss', 'train/dfl_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', # metrics 'val/box_loss', 'val/cls_loss', 'val/dfl_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2'] # params self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95'] for k in LOGGERS: setattr(self, k, None) # init empty logger dictionary self.csv = True # always log to csv # Messages # if not wandb: # prefix = colorstr('Weights & Biases: ') # s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLO 🚀 runs in Weights & Biases" # self.logger.info(s) if not clearml: prefix = colorstr('ClearML: ') s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLO 🚀 in ClearML" self.logger.info(s) if not comet_ml: prefix = colorstr('Comet: ') s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLO 🚀 runs in Comet" self.logger.info(s) # TensorBoard s = self.save_dir if 'tb' in self.include and not self.opt.evolve: prefix = colorstr('TensorBoard: ') self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/") self.tb = SummaryWriter(str(s)) # W&B if wandb and 'wandb' in self.include: wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://') run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None self.opt.hyp = self.hyp # add hyperparameters self.wandb = WandbLogger(self.opt, run_id) # temp warn. because nested artifacts not supported after 0.12.10 # if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.11'): # s = "YOLO temporarily requires wandb version 0.12.10 or below. Some features may not work as expected." # self.logger.warning(s) else: self.wandb = None # ClearML if clearml and 'clearml' in self.include: self.clearml = ClearmlLogger(self.opt, self.hyp) else: self.clearml = None # Comet if comet_ml and 'comet' in self.include: if isinstance(self.opt.resume, str) and self.opt.resume.startswith("comet://"): run_id = self.opt.resume.split("/")[-1] self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id) else: self.comet_logger = CometLogger(self.opt, self.hyp) else: self.comet_logger = None @property def remote_dataset(self): # Get data_dict if custom dataset artifact link is provided data_dict = None if self.clearml: data_dict = self.clearml.data_dict if self.wandb: data_dict = self.wandb.data_dict if self.comet_logger: data_dict = self.comet_logger.data_dict return data_dict def on_train_start(self): if self.comet_logger: self.comet_logger.on_train_start() def on_pretrain_routine_start(self): if self.comet_logger: self.comet_logger.on_pretrain_routine_start() def on_pretrain_routine_end(self, labels, names): # Callback runs on pre-train routine end if self.plots: plot_labels(labels, names, self.save_dir) paths = self.save_dir.glob('*labels*.jpg') # training labels if self.wandb: self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]}) # if self.clearml: # pass # ClearML saves these images automatically using hooks if self.comet_logger: self.comet_logger.on_pretrain_routine_end(paths) def on_train_batch_end(self, model, ni, imgs, targets, paths, vals): log_dict = dict(zip(self.keys[0:3], vals)) # Callback runs on train batch end # ni: number integrated batches (since train start) if self.plots: if ni < 3: f = self.save_dir / f'train_batch{ni}.jpg' # filename plot_images(imgs, targets, paths, f) if ni == 0 and self.tb and not self.opt.sync_bn: log_tensorboard_graph(self.tb, model, imgsz=(self.opt.imgsz, self.opt.imgsz)) if ni == 10 and (self.wandb or self.clearml): files = sorted(self.save_dir.glob('train*.jpg')) if self.wandb: self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]}) if self.clearml: self.clearml.log_debug_samples(files, title='Mosaics') if self.comet_logger: self.comet_logger.on_train_batch_end(log_dict, step=ni) def on_train_epoch_end(self, epoch): # Callback runs on train epoch end if self.wandb: self.wandb.current_epoch = epoch + 1 if self.comet_logger: self.comet_logger.on_train_epoch_end(epoch) def on_val_start(self): if self.comet_logger: self.comet_logger.on_val_start() def on_val_image_end(self, pred, predn, path, names, im): # Callback runs on val image end if self.wandb: self.wandb.val_one_image(pred, predn, path, names, im) if self.clearml: self.clearml.log_image_with_boxes(path, pred, names, im) def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out): if self.comet_logger: self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out) def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix): # Callback runs on val end if self.wandb or self.clearml: files = sorted(self.save_dir.glob('val*.jpg')) if self.wandb: self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]}) if self.clearml: self.clearml.log_debug_samples(files, title='Validation') if self.comet_logger: self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix) def on_fit_epoch_end(self, vals, epoch, best_fitness, fi): # Callback runs at the end of each fit (train+val) epoch x = dict(zip(self.keys, vals)) if self.csv: file = self.save_dir / 'results.csv' n = len(x) + 1 # number of cols s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header with open(file, 'a') as f: f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') if self.tb: for k, v in x.items(): self.tb.add_scalar(k, v, epoch) elif self.clearml: # log to ClearML if TensorBoard not used for k, v in x.items(): title, series = k.split('/') self.clearml.task.get_logger().report_scalar(title, series, v, epoch) if self.wandb: if best_fitness == fi: best_results = [epoch] + vals[3:7] for i, name in enumerate(self.best_keys): self.wandb.wandb_run.summary[name] = best_results[i] # log best results in the summary self.wandb.log(x) self.wandb.end_epoch(best_result=best_fitness == fi) if self.clearml: self.clearml.current_epoch_logged_images = set() # reset epoch image limit self.clearml.current_epoch += 1 if self.comet_logger: self.comet_logger.on_fit_epoch_end(x, epoch=epoch) def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): # Callback runs on model save event if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1: if self.wandb: self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) if self.clearml: self.clearml.task.update_output_model(model_path=str(last), model_name='Latest Model', auto_delete_file=False) if self.comet_logger: self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi) def on_train_end(self, last, best, epoch, results): # Callback runs on training end, i.e. saving best model if self.plots: plot_results(file=self.save_dir / 'results.csv') # save results.png files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}") if self.tb and not self.clearml: # These images are already captured by ClearML by now, we don't want doubles for f in files: self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') if self.wandb: self.wandb.log(dict(zip(self.keys[3:10], results))) self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model if not self.opt.evolve: wandb.log_artifact(str(best if best.exists() else last), type='model', name=f'run_{self.wandb.wandb_run.id}_model', aliases=['latest', 'best', 'stripped']) self.wandb.finish_run() if self.clearml and not self.opt.evolve: self.clearml.task.update_output_model(model_path=str(best if best.exists() else last), name='Best Model', auto_delete_file=False) if self.comet_logger: final_results = dict(zip(self.keys[3:10], results)) self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results) def on_params_update(self, params: dict): # Update hyperparams or configs of the experiment if self.wandb: self.wandb.wandb_run.config.update(params, allow_val_change=True) if self.comet_logger: self.comet_logger.on_params_update(params) class GenericLogger: """ YOLO General purpose logger for non-task specific logging Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...) Arguments opt: Run arguments console_logger: Console logger include: loggers to include """ def __init__(self, opt, console_logger, include=('tb', 'wandb')): # init default loggers self.save_dir = Path(opt.save_dir) self.include = include self.console_logger = console_logger self.csv = self.save_dir / 'results.csv' # CSV logger if 'tb' in self.include: prefix = colorstr('TensorBoard: ') self.console_logger.info( f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/") self.tb = SummaryWriter(str(self.save_dir)) if wandb and 'wandb' in self.include: self.wandb = wandb.init(project=web_project_name(str(opt.project)), name=None if opt.name == "exp" else opt.name, config=opt) else: self.wandb = None def log_metrics(self, metrics, epoch): # Log metrics dictionary to all loggers if self.csv: keys, vals = list(metrics.keys()), list(metrics.values()) n = len(metrics) + 1 # number of cols s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header with open(self.csv, 'a') as f: f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') if self.tb: for k, v in metrics.items(): self.tb.add_scalar(k, v, epoch) if self.wandb: self.wandb.log(metrics, step=epoch) def log_images(self, files, name='Images', epoch=0): # Log images to all loggers files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])] # to Path files = [f for f in files if f.exists()] # filter by exists if self.tb: for f in files: self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') if self.wandb: self.wandb.log({name: [wandb.Image(str(f), caption=f.name) for f in files]}, step=epoch) def log_graph(self, model, imgsz=(640, 640)): # Log model graph to all loggers if self.tb: log_tensorboard_graph(self.tb, model, imgsz) def log_model(self, model_path, epoch=0, metadata={}): # Log model to all loggers if self.wandb: art = wandb.Artifact(name=f"run_{wandb.run.id}_model", type="model", metadata=metadata) art.add_file(str(model_path)) wandb.log_artifact(art) def update_params(self, params): # Update the paramters logged if self.wandb: wandb.run.config.update(params, allow_val_change=True) def log_tensorboard_graph(tb, model, imgsz=(640, 640)): # Log model graph to TensorBoard try: p = next(model.parameters()) # for device, type imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz # expand im = torch.zeros((1, 3, *imgsz)).to(p.device).type_as(p) # input image (WARNING: must be zeros, not empty) with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress jit trace warning tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), []) except Exception as e: LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}') def web_project_name(project): # Convert local project name to web project name if not project.startswith('runs/train'): return project suffix = '-Classify' if project.endswith('-cls') else '-Segment' if project.endswith('-seg') else '' return f'YOLO{suffix}' ================================================ FILE: utils/loggers/clearml/__init__.py ================================================ # init ================================================ FILE: utils/loggers/clearml/clearml_utils.py ================================================ """Main Logger class for ClearML experiment tracking.""" import glob import re from pathlib import Path import numpy as np import yaml from utils.plots import Annotator, colors try: import clearml from clearml import Dataset, Task assert hasattr(clearml, '__version__') # verify package import not local dir except (ImportError, AssertionError): clearml = None def construct_dataset(clearml_info_string): """Load in a clearml dataset and fill the internal data_dict with its contents. """ dataset_id = clearml_info_string.replace('clearml://', '') dataset = Dataset.get(dataset_id=dataset_id) dataset_root_path = Path(dataset.get_local_copy()) # We'll search for the yaml file definition in the dataset yaml_filenames = list(glob.glob(str(dataset_root_path / "*.yaml")) + glob.glob(str(dataset_root_path / "*.yml"))) if len(yaml_filenames) > 1: raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains ' 'the dataset definition this way.') elif len(yaml_filenames) == 0: raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file ' 'inside the dataset root path.') with open(yaml_filenames[0]) as f: dataset_definition = yaml.safe_load(f) assert set(dataset_definition.keys()).issuperset( {'train', 'test', 'val', 'nc', 'names'} ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')" data_dict = dict() data_dict['train'] = str( (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None data_dict['test'] = str( (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None data_dict['val'] = str( (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None data_dict['nc'] = dataset_definition['nc'] data_dict['names'] = dataset_definition['names'] return data_dict class ClearmlLogger: """Log training runs, datasets, models, and predictions to ClearML. This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default, this information includes hyperparameters, system configuration and metrics, model metrics, code information and basic data metrics and analyses. By providing additional command line arguments to train.py, datasets, models and predictions can also be logged. """ def __init__(self, opt, hyp): """ - Initialize ClearML Task, this object will capture the experiment - Upload dataset version to ClearML Data if opt.upload_dataset is True arguments: opt (namespace) -- Commandline arguments for this run hyp (dict) -- Hyperparameters for this run """ self.current_epoch = 0 # Keep tracked of amount of logged images to enforce a limit self.current_epoch_logged_images = set() # Maximum number of images to log to clearML per epoch self.max_imgs_to_log_per_epoch = 16 # Get the interval of epochs when bounding box images should be logged self.bbox_interval = opt.bbox_interval self.clearml = clearml self.task = None self.data_dict = None if self.clearml: self.task = Task.init( project_name=opt.project if opt.project != 'runs/train' else 'YOLOv5', task_name=opt.name if opt.name != 'exp' else 'Training', tags=['YOLOv5'], output_uri=True, auto_connect_frameworks={'pytorch': False} # We disconnect pytorch auto-detection, because we added manual model save points in the code ) # ClearML's hooks will already grab all general parameters # Only the hyperparameters coming from the yaml config file # will have to be added manually! self.task.connect(hyp, name='Hyperparameters') # Get ClearML Dataset Version if requested if opt.data.startswith('clearml://'): # data_dict should have the following keys: # names, nc (number of classes), test, train, val (all three relative paths to ../datasets) self.data_dict = construct_dataset(opt.data) # Set data to data_dict because wandb will crash without this information and opt is the best way # to give it to them opt.data = self.data_dict def log_debug_samples(self, files, title='Debug Samples'): """ Log files (images) as debug samples in the ClearML task. arguments: files (List(PosixPath)) a list of file paths in PosixPath format title (str) A title that groups together images with the same values """ for f in files: if f.exists(): it = re.search(r'_batch(\d+)', f.name) iteration = int(it.groups()[0]) if it else 0 self.task.get_logger().report_image(title=title, series=f.name.replace(it.group(), ''), local_path=str(f), iteration=iteration) def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25): """ Draw the bounding boxes on a single image and report the result as a ClearML debug sample. arguments: image_path (PosixPath) the path the original image file boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] class_names (dict): dict containing mapping of class int to class name image (Tensor): A torch tensor containing the actual image data """ if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0: # Log every bbox_interval times and deduplicate for any intermittend extra eval runs if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images: im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2)) annotator = Annotator(im=im, pil=True) for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])): color = colors(i) class_name = class_names[int(class_nr)] confidence_percentage = round(float(conf) * 100, 2) label = f"{class_name}: {confidence_percentage}%" if conf > conf_threshold: annotator.rectangle(box.cpu().numpy(), outline=color) annotator.box_label(box.cpu().numpy(), label=label, color=color) annotated_image = annotator.result() self.task.get_logger().report_image(title='Bounding Boxes', series=image_path.name, iteration=self.current_epoch, image=annotated_image) self.current_epoch_logged_images.add(image_path) ================================================ FILE: utils/loggers/clearml/hpo.py ================================================ from clearml import Task # Connecting ClearML with the current process, # from here on everything is logged automatically from clearml.automation import HyperParameterOptimizer, UniformParameterRange from clearml.automation.optuna import OptimizerOptuna task = Task.init(project_name='Hyper-Parameter Optimization', task_name='YOLOv5', task_type=Task.TaskTypes.optimizer, reuse_last_task_id=False) # Example use case: optimizer = HyperParameterOptimizer( # This is the experiment we want to optimize base_task_id='', # here we define the hyper-parameters to optimize # Notice: The parameter name should exactly match what you see in the UI: / # For Example, here we see in the base experiment a section Named: "General" # under it a parameter named "batch_size", this becomes "General/batch_size" # If you have `argparse` for example, then arguments will appear under the "Args" section, # and you should instead pass "Args/batch_size" hyper_parameters=[ UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1), UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0), UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98), UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001), UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0), UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95), UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2), UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2), UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0), UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0), UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0), UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0), UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7), UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0), UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0), UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1), UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9), UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9), UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0), UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9), UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9), UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0), UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001), UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0), UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0), UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0), UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0), UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)], # this is the objective metric we want to maximize/minimize objective_metric_title='metrics', objective_metric_series='mAP_0.5', # now we decide if we want to maximize it or minimize it (accuracy we maximize) objective_metric_sign='max', # let us limit the number of concurrent experiments, # this in turn will make sure we do dont bombard the scheduler with experiments. # if we have an auto-scaler connected, this, by proxy, will limit the number of machine max_number_of_concurrent_tasks=1, # this is the optimizer class (actually doing the optimization) # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) optimizer_class=OptimizerOptuna, # If specified only the top K performing Tasks will be kept, the others will be automatically archived save_top_k_tasks_only=5, # 5, compute_time_limit=None, total_max_jobs=20, min_iteration_per_job=None, max_iteration_per_job=None, ) # report every 10 seconds, this is way too often, but we are testing here optimizer.set_report_period(10 / 60) # You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent # an_optimizer.start_locally(job_complete_callback=job_complete_callback) # set the time limit for the optimization process (2 hours) optimizer.set_time_limit(in_minutes=120.0) # Start the optimization process in the local environment optimizer.start_locally() # wait until process is done (notice we are controlling the optimization process in the background) optimizer.wait() # make sure background optimization stopped optimizer.stop() print('We are done, good bye') ================================================ FILE: utils/loggers/comet/__init__.py ================================================ import glob import json import logging import os import sys from pathlib import Path logger = logging.getLogger(__name__) FILE = Path(__file__).resolve() ROOT = FILE.parents[3] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH try: import comet_ml # Project Configuration config = comet_ml.config.get_config() COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5") except (ModuleNotFoundError, ImportError): comet_ml = None COMET_PROJECT_NAME = None import PIL import torch import torchvision.transforms as T import yaml from utils.dataloaders import img2label_paths from utils.general import check_dataset, scale_boxes, xywh2xyxy from utils.metrics import box_iou COMET_PREFIX = "comet://" COMET_MODE = os.getenv("COMET_MODE", "online") # Model Saving Settings COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5") # Dataset Artifact Settings COMET_UPLOAD_DATASET = os.getenv("COMET_UPLOAD_DATASET", "false").lower() == "true" # Evaluation Settings COMET_LOG_CONFUSION_MATRIX = os.getenv("COMET_LOG_CONFUSION_MATRIX", "true").lower() == "true" COMET_LOG_PREDICTIONS = os.getenv("COMET_LOG_PREDICTIONS", "true").lower() == "true" COMET_MAX_IMAGE_UPLOADS = int(os.getenv("COMET_MAX_IMAGE_UPLOADS", 100)) # Confusion Matrix Settings CONF_THRES = float(os.getenv("CONF_THRES", 0.001)) IOU_THRES = float(os.getenv("IOU_THRES", 0.6)) # Batch Logging Settings COMET_LOG_BATCH_METRICS = os.getenv("COMET_LOG_BATCH_METRICS", "false").lower() == "true" COMET_BATCH_LOGGING_INTERVAL = os.getenv("COMET_BATCH_LOGGING_INTERVAL", 1) COMET_PREDICTION_LOGGING_INTERVAL = os.getenv("COMET_PREDICTION_LOGGING_INTERVAL", 1) COMET_LOG_PER_CLASS_METRICS = os.getenv("COMET_LOG_PER_CLASS_METRICS", "false").lower() == "true" RANK = int(os.getenv("RANK", -1)) to_pil = T.ToPILImage() class CometLogger: """Log metrics, parameters, source code, models and much more with Comet """ def __init__(self, opt, hyp, run_id=None, job_type="Training", **experiment_kwargs) -> None: self.job_type = job_type self.opt = opt self.hyp = hyp # Comet Flags self.comet_mode = COMET_MODE self.save_model = opt.save_period > -1 self.model_name = COMET_MODEL_NAME # Batch Logging Settings self.log_batch_metrics = COMET_LOG_BATCH_METRICS self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL # Dataset Artifact Settings self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET self.resume = self.opt.resume # Default parameters to pass to Experiment objects self.default_experiment_kwargs = { "log_code": False, "log_env_gpu": True, "log_env_cpu": True, "project_name": COMET_PROJECT_NAME,} self.default_experiment_kwargs.update(experiment_kwargs) self.experiment = self._get_experiment(self.comet_mode, run_id) self.data_dict = self.check_dataset(self.opt.data) self.class_names = self.data_dict["names"] self.num_classes = self.data_dict["nc"] self.logged_images_count = 0 self.max_images = COMET_MAX_IMAGE_UPLOADS if run_id is None: self.experiment.log_other("Created from", "YOLOv5") if not isinstance(self.experiment, comet_ml.OfflineExperiment): workspace, project_name, experiment_id = self.experiment.url.split("/")[-3:] self.experiment.log_other( "Run Path", f"{workspace}/{project_name}/{experiment_id}", ) self.log_parameters(vars(opt)) self.log_parameters(self.opt.hyp) self.log_asset_data( self.opt.hyp, name="hyperparameters.json", metadata={"type": "hyp-config-file"}, ) self.log_asset( f"{self.opt.save_dir}/opt.yaml", metadata={"type": "opt-config-file"}, ) self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX if hasattr(self.opt, "conf_thres"): self.conf_thres = self.opt.conf_thres else: self.conf_thres = CONF_THRES if hasattr(self.opt, "iou_thres"): self.iou_thres = self.opt.iou_thres else: self.iou_thres = IOU_THRES self.log_parameters({"val_iou_threshold": self.iou_thres, "val_conf_threshold": self.conf_thres}) self.comet_log_predictions = COMET_LOG_PREDICTIONS if self.opt.bbox_interval == -1: self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10 else: self.comet_log_prediction_interval = self.opt.bbox_interval if self.comet_log_predictions: self.metadata_dict = {} self.logged_image_names = [] self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS self.experiment.log_others({ "comet_mode": COMET_MODE, "comet_max_image_uploads": COMET_MAX_IMAGE_UPLOADS, "comet_log_per_class_metrics": COMET_LOG_PER_CLASS_METRICS, "comet_log_batch_metrics": COMET_LOG_BATCH_METRICS, "comet_log_confusion_matrix": COMET_LOG_CONFUSION_MATRIX, "comet_model_name": COMET_MODEL_NAME,}) # Check if running the Experiment with the Comet Optimizer if hasattr(self.opt, "comet_optimizer_id"): self.experiment.log_other("optimizer_id", self.opt.comet_optimizer_id) self.experiment.log_other("optimizer_objective", self.opt.comet_optimizer_objective) self.experiment.log_other("optimizer_metric", self.opt.comet_optimizer_metric) self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp)) def _get_experiment(self, mode, experiment_id=None): if mode == "offline": if experiment_id is not None: return comet_ml.ExistingOfflineExperiment( previous_experiment=experiment_id, **self.default_experiment_kwargs, ) return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,) else: try: if experiment_id is not None: return comet_ml.ExistingExperiment( previous_experiment=experiment_id, **self.default_experiment_kwargs, ) return comet_ml.Experiment(**self.default_experiment_kwargs) except ValueError: logger.warning("COMET WARNING: " "Comet credentials have not been set. " "Comet will default to offline logging. " "Please set your credentials to enable online logging.") return self._get_experiment("offline", experiment_id) return def log_metrics(self, log_dict, **kwargs): self.experiment.log_metrics(log_dict, **kwargs) def log_parameters(self, log_dict, **kwargs): self.experiment.log_parameters(log_dict, **kwargs) def log_asset(self, asset_path, **kwargs): self.experiment.log_asset(asset_path, **kwargs) def log_asset_data(self, asset, **kwargs): self.experiment.log_asset_data(asset, **kwargs) def log_image(self, img, **kwargs): self.experiment.log_image(img, **kwargs) def log_model(self, path, opt, epoch, fitness_score, best_model=False): if not self.save_model: return model_metadata = { "fitness_score": fitness_score[-1], "epochs_trained": epoch + 1, "save_period": opt.save_period, "total_epochs": opt.epochs,} model_files = glob.glob(f"{path}/*.pt") for model_path in model_files: name = Path(model_path).name self.experiment.log_model( self.model_name, file_or_folder=model_path, file_name=name, metadata=model_metadata, overwrite=True, ) def check_dataset(self, data_file): with open(data_file) as f: data_config = yaml.safe_load(f) if data_config['path'].startswith(COMET_PREFIX): path = data_config['path'].replace(COMET_PREFIX, "") data_dict = self.download_dataset_artifact(path) return data_dict self.log_asset(self.opt.data, metadata={"type": "data-config-file"}) return check_dataset(data_file) def log_predictions(self, image, labelsn, path, shape, predn): if self.logged_images_count >= self.max_images: return detections = predn[predn[:, 4] > self.conf_thres] iou = box_iou(labelsn[:, 1:], detections[:, :4]) mask, _ = torch.where(iou > self.iou_thres) if len(mask) == 0: return filtered_detections = detections[mask] filtered_labels = labelsn[mask] image_id = path.split("/")[-1].split(".")[0] image_name = f"{image_id}_curr_epoch_{self.experiment.curr_epoch}" if image_name not in self.logged_image_names: native_scale_image = PIL.Image.open(path) self.log_image(native_scale_image, name=image_name) self.logged_image_names.append(image_name) metadata = [] for cls, *xyxy in filtered_labels.tolist(): metadata.append({ "label": f"{self.class_names[int(cls)]}-gt", "score": 100, "box": { "x": xyxy[0], "y": xyxy[1], "x2": xyxy[2], "y2": xyxy[3]},}) for *xyxy, conf, cls in filtered_detections.tolist(): metadata.append({ "label": f"{self.class_names[int(cls)]}", "score": conf * 100, "box": { "x": xyxy[0], "y": xyxy[1], "x2": xyxy[2], "y2": xyxy[3]},}) self.metadata_dict[image_name] = metadata self.logged_images_count += 1 return def preprocess_prediction(self, image, labels, shape, pred): nl, _ = labels.shape[0], pred.shape[0] # Predictions if self.opt.single_cls: pred[:, 5] = 0 predn = pred.clone() scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) labelsn = None if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_boxes(image.shape[1:], tbox, shape[0], shape[1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) # native-space pred return predn, labelsn def add_assets_to_artifact(self, artifact, path, asset_path, split): img_paths = sorted(glob.glob(f"{asset_path}/*")) label_paths = img2label_paths(img_paths) for image_file, label_file in zip(img_paths, label_paths): image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file]) try: artifact.add(image_file, logical_path=image_logical_path, metadata={"split": split}) artifact.add(label_file, logical_path=label_logical_path, metadata={"split": split}) except ValueError as e: logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.') logger.error(f"COMET ERROR: {e}") continue return artifact def upload_dataset_artifact(self): dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset") path = str((ROOT / Path(self.data_dict["path"])).resolve()) metadata = self.data_dict.copy() for key in ["train", "val", "test"]: split_path = metadata.get(key) if split_path is not None: metadata[key] = split_path.replace(path, "") artifact = comet_ml.Artifact(name=dataset_name, artifact_type="dataset", metadata=metadata) for key in metadata.keys(): if key in ["train", "val", "test"]: if isinstance(self.upload_dataset, str) and (key != self.upload_dataset): continue asset_path = self.data_dict.get(key) if asset_path is not None: artifact = self.add_assets_to_artifact(artifact, path, asset_path, key) self.experiment.log_artifact(artifact) return def download_dataset_artifact(self, artifact_path): logged_artifact = self.experiment.get_artifact(artifact_path) artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name) logged_artifact.download(artifact_save_dir) metadata = logged_artifact.metadata data_dict = metadata.copy() data_dict["path"] = artifact_save_dir metadata_names = metadata.get("names") if type(metadata_names) == dict: data_dict["names"] = {int(k): v for k, v in metadata.get("names").items()} elif type(metadata_names) == list: data_dict["names"] = {int(k): v for k, v in zip(range(len(metadata_names)), metadata_names)} else: raise "Invalid 'names' field in dataset yaml file. Please use a list or dictionary" data_dict = self.update_data_paths(data_dict) return data_dict def update_data_paths(self, data_dict): path = data_dict.get("path", "") for split in ["train", "val", "test"]: if data_dict.get(split): split_path = data_dict.get(split) data_dict[split] = (f"{path}/{split_path}" if isinstance(split, str) else [ f"{path}/{x}" for x in split_path]) return data_dict def on_pretrain_routine_end(self, paths): if self.opt.resume: return for path in paths: self.log_asset(str(path)) if self.upload_dataset: if not self.resume: self.upload_dataset_artifact() return def on_train_start(self): self.log_parameters(self.hyp) def on_train_epoch_start(self): return def on_train_epoch_end(self, epoch): self.experiment.curr_epoch = epoch return def on_train_batch_start(self): return def on_train_batch_end(self, log_dict, step): self.experiment.curr_step = step if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0): self.log_metrics(log_dict, step=step) return def on_train_end(self, files, save_dir, last, best, epoch, results): if self.comet_log_predictions: curr_epoch = self.experiment.curr_epoch self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch) for f in files: self.log_asset(f, metadata={"epoch": epoch}) self.log_asset(f"{save_dir}/results.csv", metadata={"epoch": epoch}) if not self.opt.evolve: model_path = str(best if best.exists() else last) name = Path(model_path).name if self.save_model: self.experiment.log_model( self.model_name, file_or_folder=model_path, file_name=name, overwrite=True, ) # Check if running Experiment with Comet Optimizer if hasattr(self.opt, 'comet_optimizer_id'): metric = results.get(self.opt.comet_optimizer_metric) self.experiment.log_other('optimizer_metric_value', metric) self.finish_run() def on_val_start(self): return def on_val_batch_start(self): return def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs): if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)): return for si, pred in enumerate(outputs): if len(pred) == 0: continue image = images[si] labels = targets[targets[:, 0] == si, 1:] shape = shapes[si] path = paths[si] predn, labelsn = self.preprocess_prediction(image, labels, shape, pred) if labelsn is not None: self.log_predictions(image, labelsn, path, shape, predn) return def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix): if self.comet_log_per_class_metrics: if self.num_classes > 1: for i, c in enumerate(ap_class): class_name = self.class_names[c] self.experiment.log_metrics( { 'mAP@.5': ap50[i], 'mAP@.5:.95': ap[i], 'precision': p[i], 'recall': r[i], 'f1': f1[i], 'true_positives': tp[i], 'false_positives': fp[i], 'support': nt[c]}, prefix=class_name) if self.comet_log_confusion_matrix: epoch = self.experiment.curr_epoch class_names = list(self.class_names.values()) class_names.append("background") num_classes = len(class_names) self.experiment.log_confusion_matrix( matrix=confusion_matrix.matrix, max_categories=num_classes, labels=class_names, epoch=epoch, column_label='Actual Category', row_label='Predicted Category', file_name=f"confusion-matrix-epoch-{epoch}.json", ) def on_fit_epoch_end(self, result, epoch): self.log_metrics(result, epoch=epoch) def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1: self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) def on_params_update(self, params): self.log_parameters(params) def finish_run(self): self.experiment.end() ================================================ FILE: utils/loggers/comet/comet_utils.py ================================================ import logging import os from urllib.parse import urlparse try: import comet_ml except (ModuleNotFoundError, ImportError): comet_ml = None import yaml logger = logging.getLogger(__name__) COMET_PREFIX = "comet://" COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5") COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt") def download_model_checkpoint(opt, experiment): model_dir = f"{opt.project}/{experiment.name}" os.makedirs(model_dir, exist_ok=True) model_name = COMET_MODEL_NAME model_asset_list = experiment.get_model_asset_list(model_name) if len(model_asset_list) == 0: logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}") return model_asset_list = sorted( model_asset_list, key=lambda x: x["step"], reverse=True, ) logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list} resource_url = urlparse(opt.weights) checkpoint_filename = resource_url.query if checkpoint_filename: asset_id = logged_checkpoint_map.get(checkpoint_filename) else: asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME) checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME if asset_id is None: logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment") return try: logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}") asset_filename = checkpoint_filename model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) model_download_path = f"{model_dir}/{asset_filename}" with open(model_download_path, "wb") as f: f.write(model_binary) opt.weights = model_download_path except Exception as e: logger.warning("COMET WARNING: Unable to download checkpoint from Comet") logger.exception(e) def set_opt_parameters(opt, experiment): """Update the opts Namespace with parameters from Comet's ExistingExperiment when resuming a run Args: opt (argparse.Namespace): Namespace of command line options experiment (comet_ml.APIExperiment): Comet API Experiment object """ asset_list = experiment.get_asset_list() resume_string = opt.resume for asset in asset_list: if asset["fileName"] == "opt.yaml": asset_id = asset["assetId"] asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) opt_dict = yaml.safe_load(asset_binary) for key, value in opt_dict.items(): setattr(opt, key, value) opt.resume = resume_string # Save hyperparameters to YAML file # Necessary to pass checks in training script save_dir = f"{opt.project}/{experiment.name}" os.makedirs(save_dir, exist_ok=True) hyp_yaml_path = f"{save_dir}/hyp.yaml" with open(hyp_yaml_path, "w") as f: yaml.dump(opt.hyp, f) opt.hyp = hyp_yaml_path def check_comet_weights(opt): """Downloads model weights from Comet and updates the weights path to point to saved weights location Args: opt (argparse.Namespace): Command Line arguments passed to YOLOv5 training script Returns: None/bool: Return True if weights are successfully downloaded else return None """ if comet_ml is None: return if isinstance(opt.weights, str): if opt.weights.startswith(COMET_PREFIX): api = comet_ml.API() resource = urlparse(opt.weights) experiment_path = f"{resource.netloc}{resource.path}" experiment = api.get(experiment_path) download_model_checkpoint(opt, experiment) return True return None def check_comet_resume(opt): """Restores run parameters to its original state based on the model checkpoint and logged Experiment parameters. Args: opt (argparse.Namespace): Command Line arguments passed to YOLOv5 training script Returns: None/bool: Return True if the run is restored successfully else return None """ if comet_ml is None: return if isinstance(opt.resume, str): if opt.resume.startswith(COMET_PREFIX): api = comet_ml.API() resource = urlparse(opt.resume) experiment_path = f"{resource.netloc}{resource.path}" experiment = api.get(experiment_path) set_opt_parameters(opt, experiment) download_model_checkpoint(opt, experiment) return True return None ================================================ FILE: utils/loggers/comet/hpo.py ================================================ import argparse import json import logging import os import sys from pathlib import Path import comet_ml logger = logging.getLogger(__name__) FILE = Path(__file__).resolve() ROOT = FILE.parents[3] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH from train import train from utils.callbacks import Callbacks from utils.general import increment_path from utils.torch_utils import select_device # Project Configuration config = comet_ml.config.get_config() COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5") def get_args(known=False): parser = argparse.ArgumentParser() parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=300, help='total training epochs') parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') parser.add_argument('--noplots', action='store_true', help='save no plot files') parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') parser.add_argument('--seed', type=int, default=0, help='Global training seed') parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') # Weights & Biases arguments parser.add_argument('--entity', default=None, help='W&B: Entity') parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') # Comet Arguments parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.") parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.") parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.") parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.") parser.add_argument("--comet_optimizer_workers", type=int, default=1, help="Comet: Number of Parallel Workers to use with the Comet Optimizer.") return parser.parse_known_args()[0] if known else parser.parse_args() def run(parameters, opt): hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]} opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) opt.batch_size = parameters.get("batch_size") opt.epochs = parameters.get("epochs") device = select_device(opt.device, batch_size=opt.batch_size) train(hyp_dict, opt, device, callbacks=Callbacks()) if __name__ == "__main__": opt = get_args(known=True) opt.weights = str(opt.weights) opt.cfg = str(opt.cfg) opt.data = str(opt.data) opt.project = str(opt.project) optimizer_id = os.getenv("COMET_OPTIMIZER_ID") if optimizer_id is None: with open(opt.comet_optimizer_config) as f: optimizer_config = json.load(f) optimizer = comet_ml.Optimizer(optimizer_config) else: optimizer = comet_ml.Optimizer(optimizer_id) opt.comet_optimizer_id = optimizer.id status = optimizer.status() opt.comet_optimizer_objective = status["spec"]["objective"] opt.comet_optimizer_metric = status["spec"]["metric"] logger.info("COMET INFO: Starting Hyperparameter Sweep") for parameter in optimizer.get_parameters(): run(parameter["parameters"], opt) ================================================ FILE: utils/loggers/comet/optimizer_config.json ================================================ { "algorithm": "random", "parameters": { "anchor_t": { "type": "discrete", "values": [ 2, 8 ] }, "batch_size": { "type": "discrete", "values": [ 16, 32, 64 ] }, "box": { "type": "discrete", "values": [ 0.02, 0.2 ] }, "cls": { "type": "discrete", "values": [ 0.2 ] }, "cls_pw": { "type": "discrete", "values": [ 0.5 ] }, "copy_paste": { "type": "discrete", "values": [ 1 ] }, "degrees": { "type": "discrete", "values": [ 0, 45 ] }, "epochs": { "type": "discrete", "values": [ 5 ] }, "fl_gamma": { "type": "discrete", "values": [ 0 ] }, "fliplr": { "type": "discrete", "values": [ 0 ] }, "flipud": { "type": "discrete", "values": [ 0 ] }, "hsv_h": { "type": "discrete", "values": [ 0 ] }, "hsv_s": { "type": "discrete", "values": [ 0 ] }, "hsv_v": { "type": "discrete", "values": [ 0 ] }, "iou_t": { "type": "discrete", "values": [ 0.7 ] }, "lr0": { "type": "discrete", "values": [ 1e-05, 0.1 ] }, "lrf": { "type": "discrete", "values": [ 0.01, 1 ] }, "mixup": { "type": "discrete", "values": [ 1 ] }, "momentum": { "type": "discrete", "values": [ 0.6 ] }, "mosaic": { "type": "discrete", "values": [ 0 ] }, "obj": { "type": "discrete", "values": [ 0.2 ] }, "obj_pw": { "type": "discrete", "values": [ 0.5 ] }, "optimizer": { "type": "categorical", "values": [ "SGD", "Adam", "AdamW" ] }, "perspective": { "type": "discrete", "values": [ 0 ] }, "scale": { "type": "discrete", "values": [ 0 ] }, "shear": { "type": "discrete", "values": [ 0 ] }, "translate": { "type": "discrete", "values": [ 0 ] }, "warmup_bias_lr": { "type": "discrete", "values": [ 0, 0.2 ] }, "warmup_epochs": { "type": "discrete", "values": [ 5 ] }, "warmup_momentum": { "type": "discrete", "values": [ 0, 0.95 ] }, "weight_decay": { "type": "discrete", "values": [ 0, 0.001 ] } }, "spec": { "maxCombo": 0, "metric": "metrics/mAP_0.5", "objective": "maximize" }, "trials": 1 } ================================================ FILE: utils/loggers/wandb/__init__.py ================================================ # init ================================================ FILE: utils/loggers/wandb/log_dataset.py ================================================ import argparse from wandb_utils import WandbLogger from utils.general import LOGGER WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' def create_dataset_artifact(opt): logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused if not logger.wandb: LOGGER.info("install wandb using `pip install wandb` to log the dataset") if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') parser.add_argument('--entity', default=None, help='W&B entity') parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run') opt = parser.parse_args() opt.resume = False # Explicitly disallow resume check for dataset upload job create_dataset_artifact(opt) ================================================ FILE: utils/loggers/wandb/sweep.py ================================================ import sys from pathlib import Path import wandb FILE = Path(__file__).resolve() ROOT = FILE.parents[3] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH from train import parse_opt, train from utils.callbacks import Callbacks from utils.general import increment_path from utils.torch_utils import select_device def sweep(): wandb.init() # Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb. hyp_dict = vars(wandb.config).get("_items").copy() # Workaround: get necessary opt args opt = parse_opt(known=True) opt.batch_size = hyp_dict.get("batch_size") opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) opt.epochs = hyp_dict.get("epochs") opt.nosave = True opt.data = hyp_dict.get("data") opt.weights = str(opt.weights) opt.cfg = str(opt.cfg) opt.data = str(opt.data) opt.hyp = str(opt.hyp) opt.project = str(opt.project) device = select_device(opt.device, batch_size=opt.batch_size) # train train(hyp_dict, opt, device, callbacks=Callbacks()) if __name__ == "__main__": sweep() ================================================ FILE: utils/loggers/wandb/sweep.yaml ================================================ # Hyperparameters for training # To set range- # Provide min and max values as: # parameter: # # min: scalar # max: scalar # OR # # Set a specific list of search space- # parameter: # values: [scalar1, scalar2, scalar3...] # # You can use grid, bayesian and hyperopt search strategy # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration program: utils/loggers/wandb/sweep.py method: random metric: name: metrics/mAP_0.5 goal: maximize parameters: # hyperparameters: set either min, max range or values list data: value: "data/coco128.yaml" batch_size: values: [64] epochs: values: [10] lr0: distribution: uniform min: 1e-5 max: 1e-1 lrf: distribution: uniform min: 0.01 max: 1.0 momentum: distribution: uniform min: 0.6 max: 0.98 weight_decay: distribution: uniform min: 0.0 max: 0.001 warmup_epochs: distribution: uniform min: 0.0 max: 5.0 warmup_momentum: distribution: uniform min: 0.0 max: 0.95 warmup_bias_lr: distribution: uniform min: 0.0 max: 0.2 box: distribution: uniform min: 0.02 max: 0.2 cls: distribution: uniform min: 0.2 max: 4.0 cls_pw: distribution: uniform min: 0.5 max: 2.0 obj: distribution: uniform min: 0.2 max: 4.0 obj_pw: distribution: uniform min: 0.5 max: 2.0 iou_t: distribution: uniform min: 0.1 max: 0.7 anchor_t: distribution: uniform min: 2.0 max: 8.0 fl_gamma: distribution: uniform min: 0.0 max: 4.0 hsv_h: distribution: uniform min: 0.0 max: 0.1 hsv_s: distribution: uniform min: 0.0 max: 0.9 hsv_v: distribution: uniform min: 0.0 max: 0.9 degrees: distribution: uniform min: 0.0 max: 45.0 translate: distribution: uniform min: 0.0 max: 0.9 scale: distribution: uniform min: 0.0 max: 0.9 shear: distribution: uniform min: 0.0 max: 10.0 perspective: distribution: uniform min: 0.0 max: 0.001 flipud: distribution: uniform min: 0.0 max: 1.0 fliplr: distribution: uniform min: 0.0 max: 1.0 mosaic: distribution: uniform min: 0.0 max: 1.0 mixup: distribution: uniform min: 0.0 max: 1.0 copy_paste: distribution: uniform min: 0.0 max: 1.0 ================================================ FILE: utils/loggers/wandb/wandb_utils.py ================================================ """Utilities and tools for tracking runs with Weights & Biases.""" import logging import os import sys from contextlib import contextmanager from pathlib import Path from typing import Dict import yaml from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[3] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH from utils.dataloaders import LoadImagesAndLabels, img2label_paths from utils.general import LOGGER, check_dataset, check_file try: import wandb assert hasattr(wandb, '__version__') # verify package import not local dir except (ImportError, AssertionError): wandb = None RANK = int(os.getenv('RANK', -1)) WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX): return from_string[len(prefix):] def check_wandb_config_file(data_config_file): wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path if Path(wandb_config).is_file(): return wandb_config return data_config_file def check_wandb_dataset(data_file): is_trainset_wandb_artifact = False is_valset_wandb_artifact = False if isinstance(data_file, dict): # In that case another dataset manager has already processed it and we don't have to return data_file if check_file(data_file) and data_file.endswith('.yaml'): with open(data_file, errors='ignore') as f: data_dict = yaml.safe_load(f) is_trainset_wandb_artifact = isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX) is_valset_wandb_artifact = isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX) if is_trainset_wandb_artifact or is_valset_wandb_artifact: return data_dict else: return check_dataset(data_file) def get_run_info(run_path): run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX)) run_id = run_path.stem project = run_path.parent.stem entity = run_path.parent.parent.stem model_artifact_name = 'run_' + run_id + '_model' return entity, project, run_id, model_artifact_name def check_wandb_resume(opt): process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None if isinstance(opt.resume, str): if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): if RANK not in [-1, 0]: # For resuming DDP runs entity, project, run_id, model_artifact_name = get_run_info(opt.resume) api = wandb.Api() artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest') modeldir = artifact.download() opt.weights = str(Path(modeldir) / "last.pt") return True return None def process_wandb_config_ddp_mode(opt): with open(check_file(opt.data), errors='ignore') as f: data_dict = yaml.safe_load(f) # data dict train_dir, val_dir = None, None if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX): api = wandb.Api() train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias) train_dir = train_artifact.download() train_path = Path(train_dir) / 'data/images/' data_dict['train'] = str(train_path) if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX): api = wandb.Api() val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias) val_dir = val_artifact.download() val_path = Path(val_dir) / 'data/images/' data_dict['val'] = str(val_path) if train_dir or val_dir: ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml') with open(ddp_data_path, 'w') as f: yaml.safe_dump(data_dict, f) opt.data = ddp_data_path class WandbLogger(): """Log training runs, datasets, models, and predictions to Weights & Biases. This logger sends information to W&B at wandb.ai. By default, this information includes hyperparameters, system configuration and metrics, model metrics, and basic data metrics and analyses. By providing additional command line arguments to train.py, datasets, models and predictions can also be logged. For more on how this logger is used, see the Weights & Biases documentation: https://docs.wandb.com/guides/integrations/yolov5 """ def __init__(self, opt, run_id=None, job_type='Training'): """ - Initialize WandbLogger instance - Upload dataset if opt.upload_dataset is True - Setup training processes if job_type is 'Training' arguments: opt (namespace) -- Commandline arguments for this run run_id (str) -- Run ID of W&B run to be resumed job_type (str) -- To set the job_type for this run """ # Temporary-fix if opt.upload_dataset: opt.upload_dataset = False # LOGGER.info("Uploading Dataset functionality is not being supported temporarily due to a bug.") # Pre-training routine -- self.job_type = job_type self.wandb, self.wandb_run = wandb, None if not wandb else wandb.run self.val_artifact, self.train_artifact = None, None self.train_artifact_path, self.val_artifact_path = None, None self.result_artifact = None self.val_table, self.result_table = None, None self.bbox_media_panel_images = [] self.val_table_path_map = None self.max_imgs_to_log = 16 self.wandb_artifact_data_dict = None self.data_dict = None # It's more elegant to stick to 1 wandb.init call, # but useful config data is overwritten in the WandbLogger's wandb.init call if isinstance(opt.resume, str): # checks resume from artifact if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): entity, project, run_id, model_artifact_name = get_run_info(opt.resume) model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name assert wandb, 'install wandb to resume wandb runs' # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config self.wandb_run = wandb.init(id=run_id, project=project, entity=entity, resume='allow', allow_val_change=True) opt.resume = model_artifact_name elif self.wandb: self.wandb_run = wandb.init(config=opt, resume="allow", project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, entity=opt.entity, name=opt.name if opt.name != 'exp' else None, job_type=job_type, id=run_id, allow_val_change=True) if not wandb.run else wandb.run if self.wandb_run: if self.job_type == 'Training': if opt.upload_dataset: if not opt.resume: self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt) if isinstance(opt.data, dict): # This means another dataset manager has already processed the dataset info (e.g. ClearML) # and they will have stored the already processed dict in opt.data self.data_dict = opt.data elif opt.resume: # resume from artifact if isinstance(opt.resume, str) and opt.resume.startswith(WANDB_ARTIFACT_PREFIX): self.data_dict = dict(self.wandb_run.config.data_dict) else: # local resume self.data_dict = check_wandb_dataset(opt.data) else: self.data_dict = check_wandb_dataset(opt.data) self.wandb_artifact_data_dict = self.wandb_artifact_data_dict or self.data_dict # write data_dict to config. useful for resuming from artifacts. Do this only when not resuming. self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict}, allow_val_change=True) self.setup_training(opt) if self.job_type == 'Dataset Creation': self.wandb_run.config.update({"upload_dataset": True}) self.data_dict = self.check_and_upload_dataset(opt) def check_and_upload_dataset(self, opt): """ Check if the dataset format is compatible and upload it as W&B artifact arguments: opt (namespace)-- Commandline arguments for current run returns: Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links. """ assert wandb, 'Install wandb to upload dataset' config_path = self.log_dataset_artifact(opt.data, opt.single_cls, 'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem) with open(config_path, errors='ignore') as f: wandb_data_dict = yaml.safe_load(f) return wandb_data_dict def setup_training(self, opt): """ Setup the necessary processes for training YOLO models: - Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded - Setup log_dict, initialize bbox_interval arguments: opt (namespace) -- commandline arguments for this run """ self.log_dict, self.current_epoch = {}, 0 self.bbox_interval = opt.bbox_interval if isinstance(opt.resume, str): modeldir, _ = self.download_model_artifact(opt) if modeldir: self.weights = Path(modeldir) / "last.pt" config = self.wandb_run.config opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp, opt.imgsz = str( self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs,\ config.hyp, config.imgsz data_dict = self.data_dict if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download self.train_artifact_path, self.train_artifact = self.download_dataset_artifact( data_dict.get('train'), opt.artifact_alias) self.val_artifact_path, self.val_artifact = self.download_dataset_artifact( data_dict.get('val'), opt.artifact_alias) if self.train_artifact_path is not None: train_path = Path(self.train_artifact_path) / 'data/images/' data_dict['train'] = str(train_path) if self.val_artifact_path is not None: val_path = Path(self.val_artifact_path) / 'data/images/' data_dict['val'] = str(val_path) if self.val_artifact is not None: self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation") columns = ["epoch", "id", "ground truth", "prediction"] columns.extend(self.data_dict['names']) self.result_table = wandb.Table(columns) self.val_table = self.val_artifact.get("val") if self.val_table_path_map is None: self.map_val_table_path() if opt.bbox_interval == -1: self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1 if opt.evolve or opt.noplots: self.bbox_interval = opt.bbox_interval = opt.epochs + 1 # disable bbox_interval train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None # Update the the data_dict to point to local artifacts dir if train_from_artifact: self.data_dict = data_dict def download_dataset_artifact(self, path, alias): """ download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX arguments: path -- path of the dataset to be used for training alias (str)-- alias of the artifact to be download/used for training returns: (str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset is found otherwise returns (None, None) """ if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX): artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias) dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/")) assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'" datadir = dataset_artifact.download() return datadir, dataset_artifact return None, None def download_model_artifact(self, opt): """ download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX arguments: opt (namespace) -- Commandline arguments for this run """ if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest") assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist' modeldir = model_artifact.download() # epochs_trained = model_artifact.metadata.get('epochs_trained') total_epochs = model_artifact.metadata.get('total_epochs') is_finished = total_epochs is None assert not is_finished, 'training is finished, can only resume incomplete runs.' return modeldir, model_artifact return None, None def log_model(self, path, opt, epoch, fitness_score, best_model=False): """ Log the model checkpoint as W&B artifact arguments: path (Path) -- Path of directory containing the checkpoints opt (namespace) -- Command line arguments for this run epoch (int) -- Current epoch number fitness_score (float) -- fitness score for current epoch best_model (boolean) -- Boolean representing if the current checkpoint is the best yet. """ model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={ 'original_url': str(path), 'epochs_trained': epoch + 1, 'save period': opt.save_period, 'project': opt.project, 'total_epochs': opt.epochs, 'fitness_score': fitness_score}) model_artifact.add_file(str(path / 'last.pt'), name='last.pt') wandb.log_artifact(model_artifact, aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else '']) LOGGER.info(f"Saving model artifact on epoch {epoch + 1}") def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False): """ Log the dataset as W&B artifact and return the new data file with W&B links arguments: data_file (str) -- the .yaml file with information about the dataset like - path, classes etc. single_class (boolean) -- train multi-class data as single-class project (str) -- project name. Used to construct the artifact path overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new file with _wandb postfix. Eg -> data_wandb.yaml returns: the new .yaml file with artifact links. it can be used to start training directly from artifacts """ upload_dataset = self.wandb_run.config.upload_dataset log_val_only = isinstance(upload_dataset, str) and upload_dataset == 'val' self.data_dict = check_dataset(data_file) # parse and check data = dict(self.data_dict) nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names']) names = {k: v for k, v in enumerate(names)} # to index dictionary # log train set if not log_val_only: self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(data['train'], rect=True, batch_size=1), names, name='train') if data.get('train') else None if data.get('train'): data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train') self.val_artifact = self.create_dataset_table( LoadImagesAndLabels(data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None if data.get('val'): data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val') path = Path(data_file) # create a _wandb.yaml file with artifacts links if both train and test set are logged if not log_val_only: path = (path.stem if overwrite_config else path.stem + '_wandb') + '.yaml' # updated data.yaml path path = ROOT / 'data' / path data.pop('download', None) data.pop('path', None) with open(path, 'w') as f: yaml.safe_dump(data, f) LOGGER.info(f"Created dataset config file {path}") if self.job_type == 'Training': # builds correct artifact pipeline graph if not log_val_only: self.wandb_run.log_artifact( self.train_artifact) # calling use_artifact downloads the dataset. NOT NEEDED! self.wandb_run.use_artifact(self.val_artifact) self.val_artifact.wait() self.val_table = self.val_artifact.get('val') self.map_val_table_path() else: self.wandb_run.log_artifact(self.train_artifact) self.wandb_run.log_artifact(self.val_artifact) return path def map_val_table_path(self): """ Map the validation dataset Table like name of file -> it's id in the W&B Table. Useful for - referencing artifacts for evaluation. """ self.val_table_path_map = {} LOGGER.info("Mapping dataset") for i, data in enumerate(tqdm(self.val_table.data)): self.val_table_path_map[data[3]] = data[0] def create_dataset_table(self, dataset: LoadImagesAndLabels, class_to_id: Dict[int, str], name: str = 'dataset'): """ Create and return W&B artifact containing W&B Table of the dataset. arguments: dataset -- instance of LoadImagesAndLabels class used to iterate over the data to build Table class_to_id -- hash map that maps class ids to labels name -- name of the artifact returns: dataset artifact to be logged or used """ # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging artifact = wandb.Artifact(name=name, type="dataset") img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None img_files = tqdm(dataset.im_files) if not img_files else img_files for img_file in img_files: if Path(img_file).is_dir(): artifact.add_dir(img_file, name='data/images') labels_path = 'labels'.join(dataset.path.rsplit('images', 1)) artifact.add_dir(labels_path, name='data/labels') else: artifact.add_file(img_file, name='data/images/' + Path(img_file).name) label_file = Path(img2label_paths([img_file])[0]) artifact.add_file(str(label_file), name='data/labels/' + label_file.name) if label_file.exists() else None table = wandb.Table(columns=["id", "train_image", "Classes", "name"]) class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()]) for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)): box_data, img_classes = [], {} for cls, *xywh in labels[:, 1:].tolist(): cls = int(cls) box_data.append({ "position": { "middle": [xywh[0], xywh[1]], "width": xywh[2], "height": xywh[3]}, "class_id": cls, "box_caption": "%s" % (class_to_id[cls])}) img_classes[cls] = class_to_id[cls] boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()), Path(paths).name) artifact.add(table, name) return artifact def log_training_progress(self, predn, path, names): """ Build evaluation Table. Uses reference from validation dataset table. arguments: predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class] path (str): local path of the current evaluation image names (dict(int, str)): hash map that maps class ids to labels """ class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()]) box_data = [] avg_conf_per_class = [0] * len(self.data_dict['names']) pred_class_count = {} for *xyxy, conf, cls in predn.tolist(): if conf >= 0.25: cls = int(cls) box_data.append({ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, "class_id": cls, "box_caption": f"{names[cls]} {conf:.3f}", "scores": { "class_score": conf}, "domain": "pixel"}) avg_conf_per_class[cls] += conf if cls in pred_class_count: pred_class_count[cls] += 1 else: pred_class_count[cls] = 1 for pred_class in pred_class_count.keys(): avg_conf_per_class[pred_class] = avg_conf_per_class[pred_class] / pred_class_count[pred_class] boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space id = self.val_table_path_map[Path(path).name] self.result_table.add_data(self.current_epoch, id, self.val_table.data[id][1], wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set), *avg_conf_per_class) def val_one_image(self, pred, predn, path, names, im): """ Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel arguments: pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class] path (str): local path of the current evaluation image """ if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact self.log_training_progress(predn, path, names) if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0: if self.current_epoch % self.bbox_interval == 0: box_data = [{ "position": { "minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, "class_id": int(cls), "box_caption": f"{names[int(cls)]} {conf:.3f}", "scores": { "class_score": conf}, "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name)) def log(self, log_dict): """ save the metrics to the logging dictionary arguments: log_dict (Dict) -- metrics/media to be logged in current step """ if self.wandb_run: for key, value in log_dict.items(): self.log_dict[key] = value def end_epoch(self, best_result=False): """ commit the log_dict, model artifacts and Tables to W&B and flush the log_dict. arguments: best_result (boolean): Boolean representing if the result of this evaluation is best or not """ if self.wandb_run: with all_logging_disabled(): if self.bbox_media_panel_images: self.log_dict["BoundingBoxDebugger"] = self.bbox_media_panel_images try: wandb.log(self.log_dict) except BaseException as e: LOGGER.info( f"An error occurred in wandb logger. The training will proceed without interruption. More info\n{e}" ) self.wandb_run.finish() self.wandb_run = None self.log_dict = {} self.bbox_media_panel_images = [] if self.result_artifact: self.result_artifact.add(self.result_table, 'result') wandb.log_artifact(self.result_artifact, aliases=[ 'latest', 'last', 'epoch ' + str(self.current_epoch), ('best' if best_result else '')]) wandb.log({"evaluation": self.result_table}) columns = ["epoch", "id", "ground truth", "prediction"] columns.extend(self.data_dict['names']) self.result_table = wandb.Table(columns) self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation") def finish_run(self): """ Log metrics if any and finish the current W&B run """ if self.wandb_run: if self.log_dict: with all_logging_disabled(): wandb.log(self.log_dict) wandb.run.finish() @contextmanager def all_logging_disabled(highest_level=logging.CRITICAL): """ source - https://gist.github.com/simon-weber/7853144 A context manager that will prevent any logging messages triggered during the body from being processed. :param highest_level: the maximum logging level in use. This would only need to be changed if a custom level greater than CRITICAL is defined. """ previous_level = logging.root.manager.disable logging.disable(highest_level) try: yield finally: logging.disable(previous_level) ================================================ FILE: utils/loss.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from utils.metrics import bbox_iou from utils.torch_utils import de_parallel def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps class BCEBlurWithLogitsLoss(nn.Module): # BCEwithLogitLoss() with reduced missing label effects. def __init__(self, alpha=0.05): super().__init__() self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() self.alpha = alpha def forward(self, pred, true): loss = self.loss_fcn(pred, true) pred = torch.sigmoid(pred) # prob from logits dx = pred - true # reduce only missing label effects # dx = (pred - true).abs() # reduce missing label and false label effects alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) loss *= alpha_factor return loss.mean() class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = 'none' # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == 'mean': return loss.mean() elif self.reduction == 'sum': return loss.sum() else: # 'none' return loss class QFocalLoss(nn.Module): # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = 'none' # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) pred_prob = torch.sigmoid(pred) # prob from logits alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = torch.abs(true - pred_prob) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == 'mean': return loss.mean() elif self.reduction == 'sum': return loss.sum() else: # 'none' return loss class ComputeLoss: sort_obj_iou = False # Compute losses def __init__(self, model, autobalance=False): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets # Focal loss g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.anchors = m.anchors self.device = device def __call__(self, p, targets): # predictions, targets bs = p[0].shape[0] # batch size loss = torch.zeros(3, device=self.device) # [box, obj, cls] losses tcls, tbox, indices = self.build_targets(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions b, gj, gi = indices[i] # image, anchor, gridy, gridx tobj = torch.zeros((pi.shape[0], pi.shape[2], pi.shape[3]), dtype=pi.dtype, device=self.device) # tgt obj n_labels = b.shape[0] # number of labels if n_labels: # pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # faster, requires torch 1.8.0 pxy, pwh, _, pcls = pi[b, :, gj, gi].split((2, 2, 1, self.nc), 1) # target-subset of predictions # Regression # pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] # pwh = (0.0 + (pwh - 1.09861).sigmoid() * 4) * anchors[i] # pwh = (0.33333 + (pwh - 1.09861).sigmoid() * 2.66667) * anchors[i] # pwh = (0.25 + (pwh - 1.38629).sigmoid() * 3.75) * anchors[i] # pwh = (0.20 + (pwh - 1.60944).sigmoid() * 4.8) * anchors[i] # pwh = (0.16667 + (pwh - 1.79175).sigmoid() * 5.83333) * anchors[i] pxy = pxy.sigmoid() * 1.6 - 0.3 pwh = (0.2 + pwh.sigmoid() * 4.8) * self.anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) loss[0] += (1.0 - iou).mean() # box loss # Objectness iou = iou.detach().clamp(0).type(tobj.dtype) if self.sort_obj_iou: j = iou.argsort() b, gj, gi, iou = b[j], gj[j], gi[j], iou[j] if self.gr < 1: iou = (1.0 - self.gr) + self.gr * iou tobj[b, gj, gi] = iou # iou ratio # Classification if self.nc > 1: # cls loss (only if multiple classes) t = torch.full_like(pcls, self.cn, device=self.device) # targets t[range(n_labels), tcls[i]] = self.cp loss[2] += self.BCEcls(pcls, t) # cls loss obji = self.BCEobj(pi[:, 4], tobj) loss[1] += obji * self.balance[i] # obj loss if self.autobalance: self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() if self.autobalance: self.balance = [x / self.balance[self.ssi] for x in self.balance] loss[0] *= self.hyp['box'] loss[1] *= self.hyp['obj'] loss[2] *= self.hyp['cls'] return loss.sum() * bs, loss.detach() # [box, obj, cls] losses def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) nt = targets.shape[0] # number of anchors, targets tcls, tbox, indices = [], [], [] gain = torch.ones(6, device=self.device) # normalized to gridspace gain g = 0.3 # bias off = torch.tensor( [ [0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm ], device=self.device).float() * g # offsets for i in range(self.nl): shape = p[i].shape gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors t = targets * gain # shape(3,n,7) if nt: # Matches r = t[..., 4:6] / self.anchors[i] # wh ratio j = torch.max(r, 1 / r).max(1)[0] < self.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[[2, 3]] - gxy # inverse j, k = ((gxy % 1 < g) & (gxy > 1)).T l, m = ((gxi % 1 < g) & (gxi > 1)).T j = torch.stack((torch.ones_like(j), j, k, l, m)) t = t.repeat((5, 1, 1))[j] offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] else: t = targets[0] offsets = 0 # Define bc, gxy, gwh = t.chunk(3, 1) # (image, class), grid xy, grid wh b, c = bc.long().T # image, class gij = (gxy - offsets).long() gi, gj = gij.T # grid indices # Append indices.append((b, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, grid_y, grid_x indices tbox.append(torch.cat((gxy - gij, gwh), 1)) # box tcls.append(c) # class return tcls, tbox, indices class ComputeLoss_NEW: sort_obj_iou = False # Compute losses def __init__(self, model, autobalance=False): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets # Focal loss g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.anchors = m.anchors self.device = device self.BCE_base = nn.BCEWithLogitsLoss(reduction='none') def __call__(self, p, targets): # predictions, targets tcls, tbox, indices = self.build_targets(p, targets) # targets bs = p[0].shape[0] # batch size n_labels = targets.shape[0] # number of labels loss = torch.zeros(3, device=self.device) # [box, obj, cls] losses # Compute all losses all_loss = [] for i, pi in enumerate(p): # layer index, layer predictions b, gj, gi = indices[i] # image, anchor, gridy, gridx if n_labels: pxy, pwh, pobj, pcls = pi[b, :, gj, gi].split((2, 2, 1, self.nc), 2) # target-subset of predictions # Regression pbox = torch.cat((pxy.sigmoid() * 1.6 - 0.3, (0.2 + pwh.sigmoid() * 4.8) * self.anchors[i]), 2) iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(predicted_box, target_box) obj_target = iou.detach().clamp(0).type(pi.dtype) # objectness targets all_loss.append([(1.0 - iou) * self.hyp['box'], self.BCE_base(pobj.squeeze(), torch.ones_like(obj_target)) * self.hyp['obj'], self.BCE_base(pcls, F.one_hot(tcls[i], self.nc).float()).mean(2) * self.hyp['cls'], obj_target, tbox[i][..., 2] > 0.0]) # valid # Lowest 3 losses per label n_assign = 4 # top n matches cat_loss = [torch.cat(x, 1) for x in zip(*all_loss)] ij = torch.zeros_like(cat_loss[0]).bool() # top 3 mask sum_loss = cat_loss[0] + cat_loss[2] for col in torch.argsort(sum_loss, dim=1).T[:n_assign]: # ij[range(n_labels), col] = True ij[range(n_labels), col] = cat_loss[4][range(n_labels), col] loss[0] = cat_loss[0][ij].mean() * self.nl # box loss loss[2] = cat_loss[2][ij].mean() * self.nl # cls loss # Obj loss for i, (h, pi) in enumerate(zip(ij.chunk(self.nl, 1), p)): # layer index, layer predictions b, gj, gi = indices[i] # image, anchor, gridy, gridx tobj = torch.zeros((pi.shape[0], pi.shape[2], pi.shape[3]), dtype=pi.dtype, device=self.device) # obj if n_labels: # if any labels tobj[b[h], gj[h], gi[h]] = all_loss[i][3][h] loss[1] += self.BCEobj(pi[:, 4], tobj) * (self.balance[i] * self.hyp['obj']) return loss.sum() * bs, loss.detach() # [box, obj, cls] losses def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) nt = targets.shape[0] # number of anchors, targets tcls, tbox, indices = [], [], [] gain = torch.ones(6, device=self.device) # normalized to gridspace gain g = 0.3 # bias off = torch.tensor( [ [0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm ], device=self.device).float() # offsets for i in range(self.nl): shape = p[i].shape gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors t = targets * gain # shape(3,n,7) if nt: # # Matches r = t[..., 4:6] / self.anchors[i] # wh ratio a = torch.max(r, 1 / r).max(1)[0] < self.hyp['anchor_t'] # compare # a = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) # t = t[a] # filter # # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[[2, 3]] - gxy # inverse j, k = ((gxy % 1 < g) & (gxy > 1)).T l, m = ((gxi % 1 < g) & (gxi > 1)).T j = torch.stack((torch.ones_like(j), j, k, l, m)) & a t = t.repeat((5, 1, 1)) offsets = torch.zeros_like(gxy)[None] + off[:, None] t[..., 4:6][~j] = 0.0 # move unsuitable targets far away else: t = targets[0] offsets = 0 # Define bc, gxy, gwh = t.chunk(3, 2) # (image, class), grid xy, grid wh b, c = bc.long().transpose(0, 2).contiguous() # image, class gij = (gxy - offsets).long() gi, gj = gij.transpose(0, 2).contiguous() # grid indices # Append indices.append((b, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, grid_y, grid_x indices tbox.append(torch.cat((gxy - gij, gwh), 2).permute(1, 0, 2).contiguous()) # box tcls.append(c) # class # # Unique # n1 = torch.cat((b.view(-1, 1), tbox[i].view(-1, 4)), 1).shape[0] # n2 = tbox[i].view(-1, 4).unique(dim=0).shape[0] # print(f'targets-unique {n1}-{n2} diff={n1-n2}') return tcls, tbox, indices ================================================ FILE: utils/loss_tal.py ================================================ import os import torch import torch.nn as nn import torch.nn.functional as F from utils.general import xywh2xyxy from utils.metrics import bbox_iou from utils.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist from utils.tal.assigner import TaskAlignedAssigner from utils.torch_utils import de_parallel def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps class VarifocalLoss(nn.Module): # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 def __init__(self): super().__init__() def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label with torch.cuda.amp.autocast(enabled=False): loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight).sum() return loss class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = "none" # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == "mean": return loss.mean() elif self.reduction == "sum": return loss.sum() else: # 'none' return loss class BboxLoss(nn.Module): def __init__(self, reg_max, use_dfl=False): super().__init__() self.reg_max = reg_max self.use_dfl = use_dfl def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): # iou loss bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) loss_iou = 1.0 - iou loss_iou *= bbox_weight loss_iou = loss_iou.sum() / target_scores_sum # dfl loss if self.use_dfl: dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight loss_dfl = loss_dfl.sum() / target_scores_sum else: loss_dfl = torch.tensor(0.0).to(pred_dist.device) return loss_iou, loss_dfl, iou def _df_loss(self, pred_dist, target): target_left = target.to(torch.long) target_right = target_left + 1 weight_left = target_right.to(torch.float) - target weight_right = 1 - weight_left loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( target_left.shape) * weight_left loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction="none").view(target_left.shape) * weight_right return (loss_left + loss_right).mean(-1, keepdim=True) class ComputeLoss: # Compute losses def __init__(self, model, use_dfl=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, img=None, epoch=0): loss = torch.zeros(3, device=self.device) # box, cls, dfl feats = p[1] if isinstance(p, tuple) else p pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask = self.assigner( pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_bboxes /= stride_tensor target_scores_sum = max(target_scores.sum(), 1) # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[1] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE # bbox loss if fg_mask.sum(): loss[0], loss[2], iou = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask) loss[0] *= 7.5 # box gain loss[1] *= 0.5 # cls gain loss[2] *= 1.5 # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) ================================================ FILE: utils/loss_tal_dual.py ================================================ import os import torch import torch.nn as nn import torch.nn.functional as F from utils.general import xywh2xyxy from utils.metrics import bbox_iou from utils.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist from utils.tal.assigner import TaskAlignedAssigner from utils.torch_utils import de_parallel def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps class VarifocalLoss(nn.Module): # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 def __init__(self): super().__init__() def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label with torch.cuda.amp.autocast(enabled=False): loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight).sum() return loss class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = "none" # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == "mean": return loss.mean() elif self.reduction == "sum": return loss.sum() else: # 'none' return loss class BboxLoss(nn.Module): def __init__(self, reg_max, use_dfl=False): super().__init__() self.reg_max = reg_max self.use_dfl = use_dfl def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): # iou loss bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) loss_iou = 1.0 - iou loss_iou *= bbox_weight loss_iou = loss_iou.sum() / target_scores_sum # dfl loss if self.use_dfl: dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight loss_dfl = loss_dfl.sum() / target_scores_sum else: loss_dfl = torch.tensor(0.0).to(pred_dist.device) return loss_iou, loss_dfl, iou def _df_loss(self, pred_dist, target): target_left = target.to(torch.long) target_right = target_left + 1 weight_left = target_right.to(torch.float) - target weight_right = 1 - weight_left loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( target_left.shape) * weight_left loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction="none").view(target_left.shape) * weight_right return (loss_left + loss_right).mean(-1, keepdim=True) class ComputeLoss: # Compute losses def __init__(self, model, use_dfl=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.assigner2 = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.bbox_loss2 = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, img=None, epoch=0): loss = torch.zeros(3, device=self.device) # box, cls, dfl feats = p[1][0] if isinstance(p, tuple) else p[0] feats2 = p[1][1] if isinstance(p, tuple) else p[1] pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_distri2, pred_scores2 = torch.cat([xi.view(feats2[0].shape[0], self.no, -1) for xi in feats2], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask = self.assigner( pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_labels2, target_bboxes2, target_scores2, fg_mask2 = self.assigner2( pred_scores2.detach().sigmoid(), (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_bboxes /= stride_tensor target_scores_sum = max(target_scores.sum(), 1) target_bboxes2 /= stride_tensor target_scores_sum2 = max(target_scores2.sum(), 1) # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[1] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[1] *= 0.25 loss[1] += self.BCEcls(pred_scores2, target_scores2.to(dtype)).sum() / target_scores_sum2 # BCE # bbox loss if fg_mask.sum(): loss[0], loss[2], iou = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask) loss[0] *= 0.25 loss[2] *= 0.25 if fg_mask2.sum(): loss0_, loss2_, iou2 = self.bbox_loss2(pred_distri2, pred_bboxes2, anchor_points, target_bboxes2, target_scores2, target_scores_sum2, fg_mask2) loss[0] += loss0_ loss[2] += loss2_ loss[0] *= 7.5 # box gain loss[1] *= 0.5 # cls gain loss[2] *= 1.5 # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) class ComputeLossLH: # Compute losses def __init__(self, model, use_dfl=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, img=None, epoch=0): loss = torch.zeros(3, device=self.device) # box, cls, dfl feats = p[1][0] if isinstance(p, tuple) else p[0] feats2 = p[1][1] if isinstance(p, tuple) else p[1] pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_distri2, pred_scores2 = torch.cat([xi.view(feats2[0].shape[0], self.no, -1) for xi in feats2], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask = self.assigner( pred_scores2.detach().sigmoid(), (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_bboxes /= stride_tensor target_scores_sum = target_scores.sum() # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[1] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[1] *= 0.25 loss[1] += self.BCEcls(pred_scores2, target_scores.to(dtype)).sum() / target_scores_sum # BCE # bbox loss if fg_mask.sum(): loss[0], loss[2], iou = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask) loss[0] *= 0.25 loss[2] *= 0.25 if fg_mask.sum(): loss0_, loss2_, iou2 = self.bbox_loss(pred_distri2, pred_bboxes2, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask) loss[0] += loss0_ loss[2] += loss2_ loss[0] *= 7.5 # box gain loss[1] *= 0.5 # cls gain loss[2] *= 1.5 # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) ================================================ FILE: utils/loss_tal_triple.py ================================================ import os import torch import torch.nn as nn import torch.nn.functional as F from utils.general import xywh2xyxy from utils.metrics import bbox_iou from utils.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist from utils.tal.assigner import TaskAlignedAssigner from utils.torch_utils import de_parallel def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps class VarifocalLoss(nn.Module): # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 def __init__(self): super().__init__() def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label with torch.cuda.amp.autocast(enabled=False): loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight).sum() return loss class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = "none" # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == "mean": return loss.mean() elif self.reduction == "sum": return loss.sum() else: # 'none' return loss class BboxLoss(nn.Module): def __init__(self, reg_max, use_dfl=False): super().__init__() self.reg_max = reg_max self.use_dfl = use_dfl def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): # iou loss bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) loss_iou = 1.0 - iou loss_iou *= bbox_weight loss_iou = loss_iou.sum() / target_scores_sum # dfl loss if self.use_dfl: dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight loss_dfl = loss_dfl.sum() / target_scores_sum else: loss_dfl = torch.tensor(0.0).to(pred_dist.device) return loss_iou, loss_dfl, iou def _df_loss(self, pred_dist, target): target_left = target.to(torch.long) target_right = target_left + 1 weight_left = target_right.to(torch.float) - target weight_right = 1 - weight_left loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( target_left.shape) * weight_left loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction="none").view(target_left.shape) * weight_right return (loss_left + loss_right).mean(-1, keepdim=True) class ComputeLoss: # Compute losses def __init__(self, model, use_dfl=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.assigner2 = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.assigner3 = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.bbox_loss2 = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.bbox_loss3 = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, img=None, epoch=0): loss = torch.zeros(3, device=self.device) # box, cls, dfl feats = p[1][0] if isinstance(p, tuple) else p[0] feats2 = p[1][1] if isinstance(p, tuple) else p[1] feats3 = p[1][2] if isinstance(p, tuple) else p[2] pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_distri2, pred_scores2 = torch.cat([xi.view(feats2[0].shape[0], self.no, -1) for xi in feats2], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() pred_distri3, pred_scores3 = torch.cat([xi.view(feats3[0].shape[0], self.no, -1) for xi in feats3], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores3 = pred_scores3.permute(0, 2, 1).contiguous() pred_distri3 = pred_distri3.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) pred_bboxes3 = self.bbox_decode(anchor_points, pred_distri3) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask = self.assigner( pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_labels2, target_bboxes2, target_scores2, fg_mask2 = self.assigner2( pred_scores2.detach().sigmoid(), (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_labels3, target_bboxes3, target_scores3, fg_mask3 = self.assigner3( pred_scores3.detach().sigmoid(), (pred_bboxes3.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_bboxes /= stride_tensor target_scores_sum = max(target_scores.sum(), 1) target_bboxes2 /= stride_tensor target_scores_sum2 = max(target_scores2.sum(), 1) target_bboxes3 /= stride_tensor target_scores_sum3 = max(target_scores3.sum(), 1) # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[1] = 0.25 * self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[1] += 0.25 * self.BCEcls(pred_scores2, target_scores2.to(dtype)).sum() / target_scores_sum2 # BCE loss[1] += self.BCEcls(pred_scores3, target_scores3.to(dtype)).sum() / target_scores_sum3 # BCE # bbox loss if fg_mask.sum(): loss[0], loss[2], iou = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask) loss[0] *= 0.25 loss[2] *= 0.25 if fg_mask2.sum(): loss0_, loss2_, iou2 = self.bbox_loss2(pred_distri2, pred_bboxes2, anchor_points, target_bboxes2, target_scores2, target_scores_sum2, fg_mask2) loss[0] += 0.25 * loss0_ loss[2] += 0.25 * loss2_ if fg_mask3.sum(): loss0__, loss2__, iou3 = self.bbox_loss3(pred_distri3, pred_bboxes3, anchor_points, target_bboxes3, target_scores3, target_scores_sum3, fg_mask3) loss[0] += loss0__ loss[2] += loss2__ loss[0] *= 7.5 # box gain loss[1] *= 0.5 # cls gain loss[2] *= 1.5 # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) ================================================ FILE: utils/metrics.py ================================================ import math import warnings from pathlib import Path import matplotlib.pyplot as plt import numpy as np import torch from utils import TryExcept, threaded def fitness(x): # Model fitness as a weighted combination of metrics w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] return (x[:, :4] * w).sum(1) def smooth(y, f=0.05): # Box filter of fraction f nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) p = np.ones(nf // 2) # ones padding yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""): """ Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments tp: True positives (nparray, nx1 or nx10). conf: Objectness value from 0-1 (nparray). pred_cls: Predicted object classes (nparray). target_cls: True object classes (nparray). plot: Plot precision-recall curve at mAP@0.5 save_dir: Plot save directory # Returns The average precision as computed in py-faster-rcnn. """ # Sort by objectness i = np.argsort(-conf) tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] # Find unique classes unique_classes, nt = np.unique(target_cls, return_counts=True) nc = unique_classes.shape[0] # number of classes, number of detections # Create Precision-Recall curve and compute AP for each class px, py = np.linspace(0, 1, 1000), [] # for plotting ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) for ci, c in enumerate(unique_classes): i = pred_cls == c n_l = nt[ci] # number of labels n_p = i.sum() # number of predictions if n_p == 0 or n_l == 0: continue # Accumulate FPs and TPs fpc = (1 - tp[i]).cumsum(0) tpc = tp[i].cumsum(0) # Recall recall = tpc / (n_l + eps) # recall curve r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases # Precision precision = tpc / (tpc + fpc) # precision curve p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score # AP from recall-precision curve for j in range(tp.shape[1]): ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) if plot and j == 0: py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 # Compute F1 (harmonic mean of precision and recall) f1 = 2 * p * r / (p + r + eps) names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data names = dict(enumerate(names)) # to dict if plot: plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names) plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1') plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision') plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall') i = smooth(f1.mean(0), 0.1).argmax() # max F1 index p, r, f1 = p[:, i], r[:, i], f1[:, i] tp = (r * nt).round() # true positives fp = (tp / (p + eps) - tp).round() # false positives return tp, fp, p, r, f1, ap, unique_classes.astype(int) def compute_ap(recall, precision): """ Compute the average precision, given the recall and precision curves # Arguments recall: The recall curve (list) precision: The precision curve (list) # Returns Average precision, precision curve, recall curve """ # Append sentinel values to beginning and end mrec = np.concatenate(([0.0], recall, [1.0])) mpre = np.concatenate(([1.0], precision, [0.0])) # Compute the precision envelope mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) # Integrate area under curve method = 'interp' # methods: 'continuous', 'interp' if method == 'interp': x = np.linspace(0, 1, 101) # 101-point interp (COCO) ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate else: # 'continuous' i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve return ap, mpre, mrec class ConfusionMatrix: # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix def __init__(self, nc, conf=0.25, iou_thres=0.45): self.matrix = np.zeros((nc + 1, nc + 1)) self.nc = nc # number of classes self.conf = conf self.iou_thres = iou_thres def process_batch(self, detections, labels): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: detections (Array[N, 6]), x1, y1, x2, y2, conf, class labels (Array[M, 5]), class, x1, y1, x2, y2 Returns: None, updates confusion matrix accordingly """ if detections is None: gt_classes = labels.int() for gc in gt_classes: self.matrix[self.nc, gc] += 1 # background FN return detections = detections[detections[:, 4] > self.conf] gt_classes = labels[:, 0].int() detection_classes = detections[:, 5].int() iou = box_iou(labels[:, 1:], detections[:, :4]) x = torch.where(iou > self.iou_thres) if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] else: matches = np.zeros((0, 3)) n = matches.shape[0] > 0 m0, m1, _ = matches.transpose().astype(int) for i, gc in enumerate(gt_classes): j = m0 == i if n and sum(j) == 1: self.matrix[detection_classes[m1[j]], gc] += 1 # correct else: self.matrix[self.nc, gc] += 1 # true background if n: for i, dc in enumerate(detection_classes): if not any(m1 == i): self.matrix[dc, self.nc] += 1 # predicted background def matrix(self): return self.matrix def tp_fp(self): tp = self.matrix.diagonal() # true positives fp = self.matrix.sum(1) - tp # false positives # fn = self.matrix.sum(0) - tp # false negatives (missed detections) return tp[:-1], fp[:-1] # remove background class @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure') def plot(self, normalize=True, save_dir='', names=()): import seaborn as sn array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True) nc, nn = self.nc, len(names) # number of classes, names sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels ticklabels = (names + ['background']) if labels else "auto" with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered sn.heatmap(array, ax=ax, annot=nc < 30, annot_kws={ "size": 8}, cmap='Blues', fmt='.2f', square=True, vmin=0.0, xticklabels=ticklabels, yticklabels=ticklabels).set_facecolor((1, 1, 1)) ax.set_ylabel('True') ax.set_ylabel('Predicted') ax.set_title('Confusion Matrix') fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) plt.close(fig) def print(self): for i in range(self.nc + 1): print(' '.join(map(str, self.matrix[i]))) class WIoU_Scale: ''' monotonous: { None: origin v1 True: monotonic FM v2 False: non-monotonic FM v3 } momentum: The momentum of running mean''' iou_mean = 1. monotonous = False _momentum = 1 - 0.5 ** (1 / 7000) _is_train = True def __init__(self, iou): self.iou = iou self._update(self) @classmethod def _update(cls, self): if cls._is_train: cls.iou_mean = (1 - cls._momentum) * cls.iou_mean + \ cls._momentum * self.iou.detach().mean().item() @classmethod def _scaled_loss(cls, self, gamma=1.9, delta=3): if isinstance(self.monotonous, bool): if self.monotonous: return (self.iou.detach() / self.iou_mean).sqrt() else: beta = self.iou.detach() / self.iou_mean alpha = delta * torch.pow(gamma, beta - delta) return beta / alpha return 1 def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, MDPIoU=False, feat_h=640, feat_w=640, eps=1e-7): # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4) # Get the coordinates of bounding boxes if xywh: # transform from xywh to xyxy (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ else: # x1, y1, x2, y2 = box1 b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1) b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1) w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps # Intersection area inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) # Union Area union = w1 * h1 + w2 * h2 - inter + eps # IoU iou = inter / union if CIoU or DIoU or GIoU: cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU return iou - rho2 / c2 # DIoU c_area = cw * ch + eps # convex area return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf elif MDPIoU: d1 = (b2_x1 - b1_x1) ** 2 + (b2_y1 - b1_y1) ** 2 d2 = (b2_x2 - b1_x2) ** 2 + (b2_y2 - b1_y2) ** 2 mpdiou_hw_pow = feat_h ** 2 + feat_w ** 2 return iou - d1 / mpdiou_hw_pow - d2 / mpdiou_hw_pow # MPDIoU return iou # IoU def box_iou(box1, box2, eps=1e-7): # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: box1 (Tensor[N, 4]) box2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) # IoU = inter / (area1 + area2 - inter) return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps) def bbox_ioa(box1, box2, eps=1e-7): """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 box1: np.array of shape(nx4) box2: np.array of shape(mx4) returns: np.array of shape(nxm) """ # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1.T b2_x1, b2_y1, b2_x2, b2_y2 = box2.T # Intersection area inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \ (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0) # box2 area box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps # Intersection over box2 area return inter_area / box2_area def wh_iou(wh1, wh2, eps=1e-7): # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 wh1 = wh1[:, None] # [N,1,2] wh2 = wh2[None] # [1,M,2] inter = torch.min(wh1, wh2).prod(2) # [N,M] return inter / (wh1.prod(2) + wh2.prod(2) - inter + eps) # iou = inter / (area1 + area2 - inter) # Plots ---------------------------------------------------------------------------------------------------------------- @threaded def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()): # Precision-recall curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) py = np.stack(py, axis=1) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py.T): ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) else: ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left") ax.set_title('Precision-Recall Curve') fig.savefig(save_dir, dpi=250) plt.close(fig) @threaded def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'): # Metric-confidence curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py): ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) else: ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) y = smooth(py.mean(0), 0.05) ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left") ax.set_title(f'{ylabel}-Confidence Curve') fig.savefig(save_dir, dpi=250) plt.close(fig) ================================================ FILE: utils/panoptic/__init__.py ================================================ # init ================================================ FILE: utils/panoptic/augmentations.py ================================================ import math import random import cv2 import numpy as np from ..augmentations import box_candidates from ..general import resample_segments, segment2box from ..metrics import bbox_ioa def mixup(im, labels, segments, seg_cls, semantic_masks, im2, labels2, segments2, seg_cls2, semantic_masks2): # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 im = (im * r + im2 * (1 - r)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) segments = np.concatenate((segments, segments2), 0) seg_cls = np.concatenate((seg_cls, seg_cls2), 0) semantic_masks = np.concatenate((semantic_masks, semantic_masks2), 0) return im, labels, segments, seg_cls, semantic_masks def random_perspective(im, targets=(), segments=(), semantic_masks = (), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] height = im.shape[0] + border[0] * 2 # shape(h,w,c) width = im.shape[1] + border[1] * 2 # Center C = np.eye(3) C[0, 2] = -im.shape[1] / 2 # x translation (pixels) C[1, 2] = -im.shape[0] / 2 # y translation (pixels) # Perspective P = np.eye(3) P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) # Rotation and Scale R = np.eye(3) a = random.uniform(-degrees, degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - scale, 1 + scale) # s = 2 ** random.uniform(-scale, scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) # Shear S = np.eye(3) S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) # Translation T = np.eye(3) T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) # Combined rotation matrix M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed if perspective: im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) else: # affine im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) # Visualize # import matplotlib.pyplot as plt # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() # ax[0].imshow(im[:, :, ::-1]) # base # ax[1].imshow(im2[:, :, ::-1]) # warped # Transform label coordinates n = len(targets) new_segments = [] new_semantic_masks = [] if n: new = np.zeros((n, 4)) segments = resample_segments(segments) # upsample for i, segment in enumerate(segments): xy = np.ones((len(segment), 3)) xy[:, :2] = segment xy = xy @ M.T # transform xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine # clip new[i] = segment2box(xy, width, height) new_segments.append(xy) semantic_masks = resample_segments(semantic_masks) for i, semantic_mask in enumerate(semantic_masks): #if i < n: # xy = np.ones((len(segments[i]), 3)) # xy[:, :2] = segments[i] # xy = xy @ M.T # transform # xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine # new[i] = segment2box(xy, width, height) # new_segments.append(xy) xy_s = np.ones((len(semantic_mask), 3)) xy_s[:, :2] = semantic_mask xy_s = xy_s @ M.T # transform xy_s = (xy_s[:, :2] / xy_s[:, 2:3] if perspective else xy_s[:, :2]) # perspective rescale or affine new_semantic_masks.append(xy_s) # filter candidates i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) targets = targets[i] targets[:, 1:5] = new[i] new_segments = np.array(new_segments)[i] new_semantic_masks = np.array(new_semantic_masks) return im, targets, new_segments, new_semantic_masks def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): # Resize and pad image while meeting stride-multiple constraints shape = im.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scaleup: # only scale down, do not scale up (for better val mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimum rectangle dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im, ratio, (dw, dh) def copy_paste(im, labels, segments, seg_cls, semantic_masks, p=0.5): # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) n = len(segments) if p and n: h, w, _ = im.shape # height, width, channels im_new = np.zeros(im.shape, np.uint8) # calculate ioa first then select indexes randomly boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) n = len(indexes) for j in random.sample(list(indexes), k=round(p * n)): l, box, s = labels[j], boxes[j], segments[j] labels = np.concatenate((labels, [[l[0], *box]]), 0) segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) seg_cls.append(l[0].astype(int)) semantic_masks.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED) result = cv2.flip(im, 1) # augment segments (flip left-right) i = cv2.flip(im_new, 1).astype(bool) im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug return im, labels, segments, seg_cls, semantic_masks ================================================ FILE: utils/panoptic/dataloaders.py ================================================ import os import random import pickle from pathlib import Path from itertools import repeat from multiprocessing.pool import Pool, ThreadPool import cv2 import numpy as np import torch from torch.utils.data import DataLoader, distributed from tqdm import tqdm from ..augmentations import augment_hsv from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker, get_hash, verify_image_label, HELP_URL, TQDM_BAR_FORMAT, LOCAL_RANK from ..general import NUM_THREADS, LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn from ..torch_utils import torch_distributed_zero_first from ..coco_utils import annToMask, getCocoIds from .augmentations import mixup, random_perspective, copy_paste, letterbox RANK = int(os.getenv('RANK', -1)) def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, rect=False, rank=-1, workers=8, image_weights=False, close_mosaic=False, quad=False, prefix='', shuffle=False, mask_downsample_ratio=1, overlap_mask=False): if rect and shuffle: LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') shuffle = False with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = LoadImagesAndLabelsAndMasks( path, imgsz, batch_size, augment=augment, # augmentation hyp=hyp, # hyperparameters rect=rect, # rectangular batches cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights, prefix=prefix, downsample_ratio=mask_downsample_ratio, overlap=overlap_mask) batch_size = min(batch_size, len(dataset)) nd = torch.cuda.device_count() # number of CUDA devices nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) #loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader generator = torch.Generator() generator.manual_seed(6148914691236517205 + RANK) return loader( dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, pin_memory=True, collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, worker_init_fn=seed_worker, generator=generator, ), dataset def img2stuff_paths(img_paths): # Define label paths as a function of image paths sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}stuff{os.sep}' # /images/, /segmentations/ substrings return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing def __init__( self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0, min_items=0, prefix="", downsample_ratio=1, overlap=False, ): super().__init__( path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, stride, pad, min_items, prefix) self.downsample_ratio = downsample_ratio self.overlap = overlap # semantic segmentation self.coco_ids = getCocoIds() # Check cache self.seg_files = img2stuff_paths(self.im_files) # labels p = Path(path) cache_path = (p.with_suffix('') if p.is_file() else Path(self.seg_files[0]).parent) cache_path = Path(str(cache_path) + '_stuff').with_suffix('.cache') try: cache, exists = np.load(cache_path, allow_pickle = True).item(), True # load dict #assert cache['version'] == self.cache_version # matches current version #assert cache['hash'] == get_hash(self.seg_files + self.im_files) # identical hash except Exception: cache, exists = self.cache_seg_labels(cache_path, prefix), False # run cache ops # Display cache nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total if exists and LOCAL_RANK in {-1, 0}: d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupt" tqdm(None, desc = (prefix + d), total = n, initial = n, bar_format = TQDM_BAR_FORMAT) # display cache results if cache['msgs']: LOGGER.info('\n'.join(cache['msgs'])) # display warnings assert (0 < nf) or (not augment), f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}' # Read cache [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items seg_labels, _, self.semantic_masks = zip(*cache.values()) nl = len(np.concatenate(seg_labels, 0)) # number of labels assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}' # Update labels self.seg_cls = [] include_class = [] # filter labels to include only these classes (optional) include_class_array = np.array(include_class).reshape(1, -1) for i, (label, semantic_masks) in enumerate(zip(seg_labels, self.semantic_masks)): self.seg_cls.append((label[:, 0].astype(int)).tolist()) if include_class: j = (label[:, 0:1] == include_class_array).any(1) if semantic_masks: self.semantic_masks[i] = semantic_masks[j] if single_cls: # single-class training, merge all classes into 0 if semantic_masks: self.semantic_masks[i][:, 0] = 0 def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights hyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] masks = [] if mosaic: # Load mosaic img, labels, segments, seg_cls, semantic_masks = self.load_mosaic(index) shapes = None # MixUp augmentation if random.random() < hyp["mixup"]: img, labels, segments, seg_cls, semantic_masks = mixup(img, labels, segments, seg_cls, semantic_masks, *self.load_mosaic(random.randint(0, self.n - 1))) else: # Load image img, (h0, w0), (h, w) = self.load_image(index) # Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling labels = self.labels[index].copy() # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy segments = self.segments[index].copy() if len(segments): for i_s in range(len(segments)): segments[i_s] = xyn2xy( segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], ) seg_cls = self.seg_cls[index].copy() semantic_masks = self.semantic_masks[index].copy() #semantic_masks = [xyn2xy(x, ratio[0] * w, ratio[1] * h, padw = pad[0], padh = pad[1]) for x in semantic_masks] if len(semantic_masks): for ss in range(len(semantic_masks)): semantic_masks[ss] = xyn2xy( semantic_masks[ss], ratio[0] * w, ratio[1] * h, padw = pad[0], padh = pad[1], ) if labels.size: # normalized xywh to pixel xyxy format labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: img, labels, segments, semantic_masks = random_perspective( img, labels, segments=segments, semantic_masks = semantic_masks, degrees=hyp["degrees"], translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"]) nl = len(labels) # number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) if self.overlap: masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, downsample_ratio=self.downsample_ratio) masks = masks[None] # (640, 640) -> (1, 640, 640) labels = labels[sorted_idx] else: masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio)) semantic_masks = polygons2masks(img.shape[:2], semantic_masks, color = 1, downsample_ratio=self.downsample_ratio) #semantic_masks = polygons2masks(img.shape[:2], semantic_masks, color = 1, downsample_ratio=1) semantic_masks = torch.from_numpy(semantic_masks) # TODO: albumentations support if self.augment: # Albumentations # there are some augmentation that won't change boxes and masks, # so just be it for now. img, labels = self.albumentations(img, labels) nl = len(labels) # update after albumentations ns = len(semantic_masks) # HSV color-space augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) # Flip up-down if random.random() < hyp["flipud"]: img = np.flipud(img) if nl: labels[:, 2] = 1 - labels[:, 2] masks = torch.flip(masks, dims=[1]) if ns: semantic_masks = torch.flip(semantic_masks, dims = [1]) # Flip left-right if random.random() < hyp["fliplr"]: img = np.fliplr(img) if nl: labels[:, 1] = 1 - labels[:, 1] masks = torch.flip(masks, dims=[2]) if ns: semantic_masks = torch.flip(semantic_masks, dims = [2]) # Cutouts # labels = cutout(img, labels, p=0.5) labels_out = torch.zeros((nl, 6)) if nl: labels_out[:, 1:] = torch.from_numpy(labels) # Combine semantic masks semantic_seg_masks = torch.zeros((len(self.coco_ids), img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio), dtype = torch.uint8) #semantic_seg_masks = torch.zeros((len(self.coco_ids), img.shape[0], img.shape[1]), dtype = torch.uint8) for cls_id, semantic_mask in zip(seg_cls, semantic_masks): semantic_seg_masks[cls_id] = (semantic_seg_masks[cls_id].logical_or(semantic_mask)).int() # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks, semantic_seg_masks) def load_mosaic(self, index): # YOLO 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic labels4, segments4, seg_cls, semantic_masks4 = [], [], [], [] s = self.img_size yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y # 3 additional image indices indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices for i, index in enumerate(indices): # Load image img, _, (h, w) = self.load_image(index) # place img in img4 if i == 0: # top left img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b labels, segments, semantic_masks = self.labels[index].copy(), self.segments[index].copy(), self.semantic_masks[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padw, padh) for x in segments] semantic_masks = [xyn2xy(x, w, h, padw, padh) for x in semantic_masks] labels4.append(labels) segments4.extend(segments) seg_cls.extend(self.seg_cls[index].copy()) semantic_masks4.extend(semantic_masks) # Concat/clip labels labels4 = np.concatenate(labels4, 0) for i in range(len(semantic_masks4)): if i < len(segments4): np.clip(labels4[:, 1:][i], 0, 2 * s, out = labels4[:, 1:][i]) np.clip(segments4[i], 0, 2 * s, out = segments4[i]) np.clip(semantic_masks4[i], 0, 2 * s, out = semantic_masks4[i]) # img4, labels4 = replicate(img4, labels4) # replicate # 3 additional image indices # Augment img4, labels4, segments4, seg_cls, semantic_masks4 = copy_paste(img4, labels4, segments4, seg_cls, semantic_masks4, p=self.hyp["copy_paste"]) img4, labels4, segments4, semantic_masks4 = random_perspective(img4, labels4, segments4, semantic_masks4, degrees=self.hyp["degrees"], translate=self.hyp["translate"], scale=self.hyp["scale"], shear=self.hyp["shear"], perspective=self.hyp["perspective"], border=self.mosaic_border) # border to remove return img4, labels4, segments4, seg_cls, semantic_masks4 def cache_seg_labels(self, path = Path('./labels_stuff.cache'), prefix = ''): # Cache dataset labels, check images and read shapes x = {} # dict nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..." with Pool(NUM_THREADS) as pool: pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.seg_files, repeat(prefix))), desc = desc, total = len(self.im_files), bar_format = TQDM_BAR_FORMAT) for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: nm += nm_f nf += nf_f ne += ne_f nc += nc_f if im_file: x[im_file] = [lb, shape, segments] if msg: msgs.append(msg) pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupt" pbar.close() if msgs: LOGGER.info('\n'.join(msgs)) if nf == 0: LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. {HELP_URL}') x['hash'] = get_hash(self.seg_files + self.im_files) x['results'] = nf, nm, ne, nc, len(self.im_files) x['msgs'] = msgs # warnings x['version'] = self.cache_version # cache version try: np.save(path, x) # save cache for next time path.with_suffix('.cache.npy').rename(path) # remove .npy suffix LOGGER.info(f'{prefix}New cache created: {path}') except Exception as e: LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # not writeable return x @staticmethod def collate_fn(batch): img, label, path, shapes, masks, semantic_masks = zip(*batch) # transposed batched_masks = torch.cat(masks, 0) for i, l in enumerate(label): l[:, 0] = i # add target image index for build_targets() return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks, torch.stack(semantic_masks, 0) def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): """ Args: img_size (tuple): The image size. polygons (np.ndarray): [N, M], N is the number of polygons, M is the number of points(Be divided by 2). """ mask = np.zeros(img_size, dtype=np.uint8) polygons = np.asarray(polygons) polygons = polygons.astype(np.int32) shape = polygons.shape polygons = polygons.reshape(shape[0], -1, 2) cv2.fillPoly(mask, polygons, color=color) nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) # NOTE: fillPoly firstly then resize is trying the keep the same way # of loss calculation when mask-ratio=1. mask = cv2.resize(mask, (nw, nh)) return mask def polygons2masks(img_size, polygons, color, downsample_ratio=1): """ Args: img_size (tuple): The image size. polygons (list[np.ndarray]): each polygon is [N, M], N is the number of polygons, M is the number of points(Be divided by 2). """ masks = [] for si in range(len(polygons)): mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio) masks.append(mask) return np.array(masks) def polygons2masks_overlap(img_size, segments, downsample_ratio=1): """Return a (640, 640) overlap mask.""" masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), dtype=np.int32 if len(segments) > 255 else np.uint8) areas = [] ms = [] for si in range(len(segments)): mask = polygon2mask( img_size, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1, ) ms.append(mask) areas.append(mask.sum()) areas = np.asarray(areas) index = np.argsort(-areas) ms = np.array(ms)[index] for i in range(len(segments)): mask = ms[i] * (i + 1) masks = masks + mask masks = np.clip(masks, a_min=0, a_max=i + 1) return masks, index ================================================ FILE: utils/panoptic/general.py ================================================ import cv2 import numpy as np import torch import torch.nn.functional as F def crop_mask(masks, boxes): """ "Crop" predicted masks by zeroing out everything not in the predicted bbox. Vectorized by Chong (thanks Chong). Args: - masks should be a size [h, w, n] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form """ n, h, w = masks.shape x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) def process_mask_upsample(protos, masks_in, bboxes, shape): """ Crop after upsample. proto_out: [mask_dim, mask_h, mask_w] out_masks: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms shape:input_image_size, (h, w) return: h, w, n """ c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW masks = crop_mask(masks, bboxes) # CHW return masks.gt_(0.5) def process_mask(protos, masks_in, bboxes, shape, upsample=False): """ Crop before upsample. proto_out: [mask_dim, mask_h, mask_w] out_masks: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms shape:input_image_size, (h, w) return: h, w, n """ c, mh, mw = protos.shape # CHW ih, iw = shape masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW downsampled_bboxes = bboxes.clone() downsampled_bboxes[:, 0] *= mw / iw downsampled_bboxes[:, 2] *= mw / iw downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih masks = crop_mask(masks, downsampled_bboxes) # CHW if upsample: masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW return masks.gt_(0.5) def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): """ img1_shape: model input shape, [h, w] img0_shape: origin pic shape, [h, w, 3] masks: [h, w, num] """ # Rescale coordinates (xyxy) from im1_shape to im0_shape if ratio_pad is None: # calculate from im0_shape gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding else: pad = ratio_pad[1] top, left = int(pad[1]), int(pad[0]) # y, x bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) if len(masks.shape) < 2: raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') masks = masks[top:bottom, left:right] # masks = masks.permute(2, 0, 1).contiguous() # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] # masks = masks.permute(1, 2, 0).contiguous() masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) if len(masks.shape) == 2: masks = masks[:, :, None] return masks def mask_iou(mask1, mask2, eps=1e-7): """ mask1: [N, n] m1 means number of predicted objects mask2: [M, n] m2 means number of gt objects Note: n means image_w x image_h return: masks iou, [N, M] """ intersection = torch.matmul(mask1, mask2.t()).clamp(0) union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection return intersection / (union + eps) def masks_iou(mask1, mask2, eps=1e-7): """ mask1: [N, n] m1 means number of predicted objects mask2: [N, n] m2 means number of gt objects Note: n means image_w x image_h return: masks iou, (N, ) """ intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection return intersection / (union + eps) def masks2segments(masks, strategy='largest'): # Convert masks(n,160,160) into segments(n,xy) segments = [] for x in masks.int().cpu().numpy().astype('uint8'): c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] if c: if strategy == 'concat': # concatenate all segments c = np.concatenate([x.reshape(-1, 2) for x in c]) elif strategy == 'largest': # select largest segment c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) else: c = np.zeros((0, 2)) # no segments found segments.append(c.astype('float32')) return segments ================================================ FILE: utils/panoptic/loss.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import de_parallel from .general import crop_mask class ComputeLoss: # Compute losses def __init__(self, model, autobalance=False, overlap=False): self.sort_obj_iou = False self.overlap = overlap device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters self.device = device # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets # Focal loss g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance self.na = m.na # number of anchors self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.nm = m.nm # number of masks self.anchors = m.anchors self.device = device def __call__(self, preds, targets, masks): # predictions, targets, model p, proto = preds bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width lcls = torch.zeros(1, device=self.device) lbox = torch.zeros(1, device=self.device) lobj = torch.zeros(1, device=self.device) lseg = torch.zeros(1, device=self.device) tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image, anchor, gridy, gridx tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj n = b.shape[0] # number of targets if n: pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions # Box regression pxy = pxy.sigmoid() * 2 - 0.5 pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness iou = iou.detach().clamp(0).type(tobj.dtype) if self.sort_obj_iou: j = iou.argsort() b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j] if self.gr < 1: iou = (1.0 - self.gr) + self.gr * iou tobj[b, a, gj, gi] = iou # iou ratio # Classification if self.nc > 1: # cls loss (only if multiple classes) t = torch.full_like(pcls, self.cn, device=self.device) # targets t[range(n), tcls[i]] = self.cp lcls += self.BCEcls(pcls, t) # BCE # Mask regression if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0] marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) for bi in b.unique(): j = b == bi # matching index if self.overlap: mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0) else: mask_gti = masks[tidxs[i]][j] lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j]) obji = self.BCEobj(pi[..., 4], tobj) lobj += obji * self.balance[i] # obj loss if self.autobalance: self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() if self.autobalance: self.balance = [x / self.balance[self.ssi] for x in self.balance] lbox *= self.hyp["box"] lobj *= self.hyp["obj"] lcls *= self.hyp["cls"] lseg *= self.hyp["box"] / bs loss = lbox + lobj + lcls + lseg return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], [] gain = torch.ones(8, device=self.device) # normalized to gridspace gain ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) if self.overlap: batch = p[0].shape[0] ti = [] for i in range(batch): num = (targets[:, 0] == i).sum() # find number of targets of each image ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) ti = torch.cat(ti, 1) # (na, nt) else: ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1) targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices g = 0.5 # bias off = torch.tensor( [ [0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm ], device=self.device).float() * g # offsets for i in range(self.nl): anchors, shape = self.anchors[i], p[i].shape gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors t = targets * gain # shape(3,n,7) if nt: # Matches r = t[..., 4:6] / anchors[:, None] # wh ratio j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[[2, 3]] - gxy # inverse j, k = ((gxy % 1 < g) & (gxy > 1)).T l, m = ((gxi % 1 < g) & (gxi > 1)).T j = torch.stack((torch.ones_like(j), j, k, l, m)) t = t.repeat((5, 1, 1))[j] offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] else: t = targets[0] offsets = 0 # Define bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors (a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class gij = (gxy - offsets).long() gi, gj = gij.T # grid indices # Append indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class tidxs.append(tidx) xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized return tcls, tbox, indices, anch, tidxs, xywhn ================================================ FILE: utils/panoptic/loss_tal.py ================================================ import os import torch import torch.nn as nn import torch.nn.functional as F from torchvision.ops import sigmoid_focal_loss from utils.general import xywh2xyxy, xyxy2xywh from utils.metrics import bbox_iou from utils.panoptic.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist from utils.panoptic.tal.assigner import TaskAlignedAssigner from utils.torch_utils import de_parallel from utils.panoptic.general import crop_mask def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps class VarifocalLoss(nn.Module): # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 def __init__(self): super().__init__() def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label with torch.cuda.amp.autocast(enabled=False): loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight).sum() return loss class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = "none" # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == "mean": return loss.mean() elif self.reduction == "sum": return loss.sum() else: # 'none' return loss class BboxLoss(nn.Module): def __init__(self, reg_max, use_dfl=False): super().__init__() self.reg_max = reg_max self.use_dfl = use_dfl def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): # iou loss bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) loss_iou = 1.0 - iou #### wiou #iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, WIoU=True, scale=True) #if type(iou) is tuple: # if len(iou) == 2: # loss_iou = (iou[1].detach() * (1 - iou[0])) # iou = iou[0] # else: # loss_iou = (iou[0] * iou[1]) # iou = iou[-1] #else: # loss_iou = (1.0 - iou) # iou loss loss_iou *= bbox_weight loss_iou = loss_iou.sum() / target_scores_sum # loss_iou = loss_iou.mean() # dfl loss if self.use_dfl: dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight loss_dfl = loss_dfl.sum() / target_scores_sum else: loss_dfl = torch.tensor(0.0).to(pred_dist.device) return loss_iou, loss_dfl, iou def _df_loss(self, pred_dist, target): target_left = target.to(torch.long) target_right = target_left + 1 weight_left = target_right.to(torch.float) - target weight_right = 1 - weight_left loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( target_left.shape) * weight_left loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction="none").view(target_left.shape) * weight_right return (loss_left + loss_right).mean(-1, keepdim=True) class ComputeLoss: # Compute losses def __init__(self, model, use_dfl=True, overlap=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.nm = m.nm self.overlap = overlap self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, masks, semasks, img=None, epoch=0): loss = torch.zeros(6, device=self.device) # box, cls, dfl feats, pred_masks, proto, psemasks = p if len(p) == 4 else p[1] batch_size, _, mask_h, mask_w = proto.shape pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_masks = pred_masks.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets try: batch_idx = targets[:, 0].view(-1, 1) targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) except RuntimeError as e: raise TypeError('ERROR.') from e # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_scores_sum = target_scores.sum() # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE # bbox loss if fg_mask.sum(): loss[0], loss[3], _ = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask) # masks loss if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] for i in range(batch_size): if fg_mask[i].sum(): mask_idx = target_gt_idx[i][fg_mask[i]] if self.overlap: gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) else: gt_mask = masks[batch_idx.view(-1) == i][mask_idx] xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, marea) # seg loss # Semantic Segmentation # focal loss pt = torch.flatten(psemasks, start_dim = 2).permute(0, 2, 1) gt = torch.flatten(semasks, start_dim = 2).permute(0, 2, 1) bs, _, _ = gt.shape #torch.clamp(torch.sigmoid(logits), min=eps, max= 1 - eps) #total_loss = (sigmoid_focal_loss(pt.float(), gt.float(), alpha = .25, gamma = 2., reduction = 'mean')) / 2. #total_loss = (sigmoid_focal_loss(pt.clamp(-16., 16.), gt, alpha = .25, gamma = 2., reduction = 'mean')) / 2. total_loss = (sigmoid_focal_loss(pt, gt, alpha = .25, gamma = 2., reduction = 'mean')) / 2. loss[4] += total_loss * 20. # dice loss pt = torch.flatten(psemasks.softmax(dim = 1)) gt = torch.flatten(semasks) inter_mask = torch.sum(torch.mul(pt, gt)) union_mask = torch.sum(torch.add(pt, gt)) dice_coef = (2. * inter_mask + 1.) / (union_mask + 1.) loss[5] += (1. - dice_coef) / 2. loss[0] *= 7.5 # box gain loss[1] *= 2.5 / batch_size loss[2] *= 0.5 # cls gain loss[3] *= 1.5 # dfl gain loss[4] *= 2.5 #/ batch_size loss[5] *= 2.5 #/ batch_size return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() ================================================ FILE: utils/panoptic/metrics.py ================================================ import numpy as np import torch from ..metrics import ap_per_class def fitness(x): # Model fitness as a weighted combination of metrics w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9, 0.1, 0.9] return (x[:, :len(w)] * w).sum(1) def ap_per_class_box_and_mask( tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ): """ Args: tp_b: tp of boxes. tp_m: tp of masks. other arguments see `func: ap_per_class`. """ results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, prefix="Box")[2:] results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, prefix="Mask")[2:] results = { "boxes": { "p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[3], "f1": results_boxes[2], "ap_class": results_boxes[4]}, "masks": { "p": results_masks[0], "r": results_masks[1], "ap": results_masks[3], "f1": results_masks[2], "ap_class": results_masks[4]}} return results class Metric: def __init__(self) -> None: self.p = [] # (nc, ) self.r = [] # (nc, ) self.f1 = [] # (nc, ) self.all_ap = [] # (nc, 10) self.ap_class_index = [] # (nc, ) @property def ap50(self): """AP@0.5 of all classes. Return: (nc, ) or []. """ return self.all_ap[:, 0] if len(self.all_ap) else [] @property def ap(self): """AP@0.5:0.95 Return: (nc, ) or []. """ return self.all_ap.mean(1) if len(self.all_ap) else [] @property def mp(self): """mean precision of all classes. Return: float. """ return self.p.mean() if len(self.p) else 0.0 @property def mr(self): """mean recall of all classes. Return: float. """ return self.r.mean() if len(self.r) else 0.0 @property def map50(self): """Mean AP@0.5 of all classes. Return: float. """ return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 @property def map(self): """Mean AP@0.5:0.95 of all classes. Return: float. """ return self.all_ap.mean() if len(self.all_ap) else 0.0 def mean_results(self): """Mean of results, return mp, mr, map50, map""" return (self.mp, self.mr, self.map50, self.map) def class_result(self, i): """class-aware result, return p[i], r[i], ap50[i], ap[i]""" return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) def get_maps(self, nc): maps = np.zeros(nc) + self.map for i, c in enumerate(self.ap_class_index): maps[c] = self.ap[i] return maps def update(self, results): """ Args: results: tuple(p, r, ap, f1, ap_class) """ p, r, all_ap, f1, ap_class_index = results self.p = p self.r = r self.all_ap = all_ap self.f1 = f1 self.ap_class_index = ap_class_index class Metrics: """Metric for boxes and masks.""" def __init__(self) -> None: self.metric_box = Metric() self.metric_mask = Metric() def update(self, results): """ Args: results: Dict{'boxes': Dict{}, 'masks': Dict{}} """ self.metric_box.update(list(results["boxes"].values())) self.metric_mask.update(list(results["masks"].values())) def mean_results(self): return self.metric_box.mean_results() + self.metric_mask.mean_results() def class_result(self, i): return self.metric_box.class_result(i) + self.metric_mask.class_result(i) def get_maps(self, nc): return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) @property def ap_class_index(self): # boxes and masks have the same ap_class_index return self.metric_box.ap_class_index class Semantic_Metrics: def __init__(self, nc, device): self.nc = nc # number of classes self.device = device self.iou = [] self.c_bit_counts = torch.zeros(nc, dtype = torch.long).to(device) self.c_intersection_counts = torch.zeros(nc, dtype = torch.long).to(device) self.c_union_counts = torch.zeros(nc, dtype = torch.long).to(device) def update(self, pred_masks, target_masks): nb, nc, h, w = pred_masks.shape device = pred_masks.device for b in range(nb): onehot_mask = pred_masks[b].to(device) # convert predict mask to one hot semantic_mask = torch.flatten(onehot_mask, start_dim = 1).permute(1, 0) # class x h x w -> (h x w) x class max_idx = semantic_mask.argmax(1) output_masks = (torch.zeros(semantic_mask.shape).to(self.device)).scatter(1, max_idx.unsqueeze(1), 1.0) # one hot: (h x w) x class output_masks = torch.reshape(output_masks.permute(1, 0), (nc, h, w)) # (h x w) x class -> class x h x w onehot_mask = output_masks.int() for c in range(self.nc): pred_mask = onehot_mask[c].to(device) target_mask = target_masks[b, c].to(device) # calculate IoU intersection = (torch.logical_and(pred_mask, target_mask).sum()).item() union = (torch.logical_or(pred_mask, target_mask).sum()).item() iou = 0. if (0 == union) else (intersection / union) # record class pixel counts, intersection counts, union counts self.c_bit_counts[c] += target_mask.int().sum() self.c_intersection_counts[c] += intersection self.c_union_counts[c] += union self.iou.append(iou) def results(self): # Mean IoU miou = 0. if (0 == len(self.iou)) else np.sum(self.iou) / (len(self.iou) * self.nc) # Frequency Weighted IoU c_iou = self.c_intersection_counts / (self.c_union_counts + 1) # add smooth # c_bit_counts = self.c_bit_counts.astype(int) total_c_bit_counts = self.c_bit_counts.sum() freq_ious = torch.zeros(1, dtype = torch.long).to(self.device) if (0 == total_c_bit_counts) else (self.c_bit_counts / total_c_bit_counts) * c_iou fwiou = (freq_ious.sum()).item() return (miou, fwiou) def reset(self): self.iou = [] self.c_bit_counts = torch.zeros(self.nc, dtype = torch.long).to(self.device) self.c_intersection_counts = torch.zeros(self.nc, dtype = torch.long).to(self.device) self.c_union_counts = torch.zeros(self.nc, dtype = torch.long).to(self.device) KEYS = [ "train/box_loss", "train/seg_loss", # train loss "train/cls_loss", "train/dfl_loss", "train/fcl_loss", "train/dic_loss", "metrics/precision(B)", "metrics/recall(B)", "metrics/mAP_0.5(B)", "metrics/mAP_0.5:0.95(B)", # metrics "metrics/precision(M)", "metrics/recall(M)", "metrics/mAP_0.5(M)", "metrics/mAP_0.5:0.95(M)", # metrics "metrics/MIOUS(S)", "metrics/FWIOUS(S)", # metrics "val/box_loss", "val/seg_loss", # val loss "val/cls_loss", "val/dfl_loss", "val/fcl_loss", "val/dic_loss", "x/lr0", "x/lr1", "x/lr2",] BEST_KEYS = [ "best/epoch", "best/precision(B)", "best/recall(B)", "best/mAP_0.5(B)", "best/mAP_0.5:0.95(B)", "best/precision(M)", "best/recall(M)", "best/mAP_0.5(M)", "best/mAP_0.5:0.95(M)", "best/MIOUS(S)", "best/FWIOUS(S)",] ================================================ FILE: utils/panoptic/plots.py ================================================ import contextlib import math from pathlib import Path import cv2 import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch from torchvision.utils import draw_segmentation_masks, save_image from .. import threaded from ..general import xywh2xyxy from ..plots import Annotator, colors @threaded def plot_images_and_masks(images, targets, masks, semasks, paths=None, fname='images.jpg', names=None): try: if images.shape[-2:] != semasks.shape[-2:]: m = torch.nn.Upsample(scale_factor=4, mode='nearest') semasks = m(semasks) for idx in range(images.shape[0]): output_img = draw_segmentation_masks( image = images[idx, :, :, :].cpu().to(dtype = torch.uint8), masks = semasks[idx, :, :, :].cpu().to(dtype = torch.bool), alpha = 1) cv2.imwrite( '{}_{}.jpg'.format(fname, idx), torch.permute(output_img, (1, 2, 0)).numpy() ) except: pass # Plot image grid with labels if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() if isinstance(masks, torch.Tensor): masks = masks.cpu().numpy().astype(int) if isinstance(semasks, torch.Tensor): semasks = semasks.cpu().numpy().astype(int) max_size = 1920 # max image size max_subplots = 16 # max image subplots, i.e. 4x4 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) if np.max(images[0]) <= 1: images *= 255 # de-normalise (optional) # Build Image mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init for i, im in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin im = im.transpose(1, 2, 0) mosaic[y:y + h, x:x + w, :] = im # Resize (optional) scale = max_size / ns / max(h, w) if scale < 1: h = math.ceil(scale * h) w = math.ceil(scale * w) mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) # Annotate fs = int((h + w) * ns * 0.01) # font size annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) for i in range(i + 1): x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders if paths: annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames if len(targets) > 0: idx = targets[:, 0] == i ti = targets[idx] # image targets boxes = xywh2xyxy(ti[:, 2:6]).T classes = ti[:, 1].astype('int') labels = ti.shape[1] == 6 # labels if no conf column conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 boxes[[0, 2]] *= w # scale to pixels boxes[[1, 3]] *= h elif scale < 1: # absolute coords need scale if image scales boxes *= scale boxes[[0, 2]] += x boxes[[1, 3]] += y for j, box in enumerate(boxes.T.tolist()): cls = classes[j] color = colors(cls) cls = names[cls] if names else cls if labels or conf[j] > 0.25: # 0.25 conf thresh label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' annotator.box_label(box, label, color=color) # Plot masks if len(masks): if masks.max() > 1.0: # mean that masks are overlap image_masks = masks[[i]] # (1, 640, 640) nl = len(ti) index = np.arange(nl).reshape(nl, 1, 1) + 1 image_masks = np.repeat(image_masks, nl, axis=0) image_masks = np.where(image_masks == index, 1.0, 0.0) else: image_masks = masks[idx] im = np.asarray(annotator.im).copy() for j, box in enumerate(boxes.T.tolist()): if labels or conf[j] > 0.25: # 0.25 conf thresh color = colors(classes[j]) mh, mw = image_masks[j].shape if mh != h or mw != w: mask = image_masks[j].astype(np.uint8) mask = cv2.resize(mask, (w, h)) mask = mask.astype(bool) else: mask = image_masks[j].astype(bool) with contextlib.suppress(Exception): im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 annotator.fromarray(im) annotator.im.save(fname) # save def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') save_dir = Path(file).parent if file else Path(dir) fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) ax = ax.ravel() files = list(save_dir.glob("results*.csv")) assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." for f in files: try: data = pd.read_csv(f) index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 0.1 * data.values[:, 11]) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): y = data.values[:, j] # y[y == 0] = np.nan # don't show zero values ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) if best: # best ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") else: # last ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") # if j in [8, 9, 10]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) except Exception as e: print(f"Warning: Plotting error for {f}: {e}") ax[1].legend() fig.savefig(save_dir / "results.png", dpi=200) plt.close() ================================================ FILE: utils/panoptic/tal/__init__.py ================================================ # init ================================================ FILE: utils/panoptic/tal/anchor_generator.py ================================================ import torch from utils.general import check_version TORCH_1_10 = check_version(torch.__version__, '1.10.0') def make_anchors(feats, strides, grid_cell_offset=0.5): """Generate anchors from features.""" anchor_points, stride_tensor = [], [] assert feats is not None dtype, device = feats[0].dtype, feats[0].device for i, stride in enumerate(strides): _, _, h, w = feats[i].shape sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) return torch.cat(anchor_points), torch.cat(stride_tensor) def dist2bbox(distance, anchor_points, xywh=True, dim=-1): """Transform distance(ltrb) to box(xywh or xyxy).""" lt, rb = torch.split(distance, 2, dim) x1y1 = anchor_points - lt x2y2 = anchor_points + rb if xywh: c_xy = (x1y1 + x2y2) / 2 wh = x2y2 - x1y1 return torch.cat((c_xy, wh), dim) # xywh bbox return torch.cat((x1y1, x2y2), dim) # xyxy bbox def bbox2dist(anchor_points, bbox, reg_max): """Transform bbox(xyxy) to dist(ltrb).""" x1y1, x2y2 = torch.split(bbox, 2, -1) return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) ================================================ FILE: utils/panoptic/tal/assigner.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from utils.metrics import bbox_iou def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): """select the positive anchor center in gt Args: xy_centers (Tensor): shape(h*w, 4) gt_bboxes (Tensor): shape(b, n_boxes, 4) Return: (Tensor): shape(b, n_boxes, h*w) """ n_anchors = xy_centers.shape[0] bs, n_boxes, _ = gt_bboxes.shape lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) return bbox_deltas.amin(3).gt_(eps) def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): """if an anchor box is assigned to multiple gts, the one with the highest iou will be selected. Args: mask_pos (Tensor): shape(b, n_max_boxes, h*w) overlaps (Tensor): shape(b, n_max_boxes, h*w) Return: target_gt_idx (Tensor): shape(b, h*w) fg_mask (Tensor): shape(b, h*w) mask_pos (Tensor): shape(b, n_max_boxes, h*w) """ # (b, n_max_boxes, h*w) -> (b, h*w) fg_mask = mask_pos.sum(-2) if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) max_overlaps_idx = overlaps.argmax(1) # (b, h*w) is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) fg_mask = mask_pos.sum(-2) # find each grid serve which gt(index) target_gt_idx = mask_pos.argmax(-2) # (b, h*w) return target_gt_idx, fg_mask, mask_pos class TaskAlignedAssigner(nn.Module): def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): super().__init__() self.topk = topk self.num_classes = num_classes self.bg_idx = num_classes self.alpha = alpha self.beta = beta self.eps = eps @torch.no_grad() def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): """This code referenced to https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py Args: pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) anc_points (Tensor): shape(num_total_anchors, 2) gt_labels (Tensor): shape(bs, n_max_boxes, 1) gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) mask_gt (Tensor): shape(bs, n_max_boxes, 1) Returns: target_labels (Tensor): shape(bs, num_total_anchors) target_bboxes (Tensor): shape(bs, num_total_anchors, 4) target_scores (Tensor): shape(bs, num_total_anchors, num_classes) fg_mask (Tensor): shape(bs, num_total_anchors) """ self.bs = pd_scores.size(0) self.n_max_boxes = gt_bboxes.size(1) if self.n_max_boxes == 0: device = gt_bboxes.device return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), torch.zeros_like(pd_bboxes).to(device), torch.zeros_like(pd_scores).to(device), torch.zeros_like(pd_scores[..., 0]).to(device), torch.zeros_like(pd_scores[..., 0]).to(device)) mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt) target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) # assigned target target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) # normalize align_metric *= mask_pos pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) target_scores = target_scores * norm_align_metric return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): # get anchor_align metric, (b, max_num_obj, h*w) align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) # get in_gts mask, (b, max_num_obj, h*w) mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) # get topk_metric mask, (b, max_num_obj, h*w) mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) # merge all mask to a final mask, (b, max_num_obj, h*w) mask_pos = mask_topk * mask_in_gts * mask_gt return mask_pos, align_metric, overlaps def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj ind[1] = gt_labels.squeeze(-1) # b, max_num_obj # get the scores of each grid for each gt cls bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) #overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, WIoU=True, scale=True)[-1].squeeze(3).clamp(0) align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) return align_metric, overlaps def select_topk_candidates(self, metrics, largest=True, topk_mask=None): """ Args: metrics: (b, max_num_obj, h*w). topk_mask: (b, max_num_obj, topk) or None """ num_anchors = metrics.shape[-1] # h*w # (b, max_num_obj, topk) topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) if topk_mask is None: topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) # (b, max_num_obj, topk) topk_idxs = torch.where(topk_mask, topk_idxs, 0) # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) # filter invalid bboxes # assigned topk should be unique, this is for dealing with empty labels # since empty labels will generate index `0` through `F.one_hot` # NOTE: but what if the topk_idxs include `0`? is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) return is_in_topk.to(metrics.dtype) def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): """ Args: gt_labels: (b, max_num_obj, 1) gt_bboxes: (b, max_num_obj, 4) target_gt_idx: (b, h*w) fg_mask: (b, h*w) """ # assigned target labels, (b, 1) batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] # assigned target scores target_labels.clamp(0) target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) return target_labels, target_bboxes, target_scores ================================================ FILE: utils/plots.py ================================================ import contextlib import math import os from copy import copy from pathlib import Path from urllib.error import URLError import cv2 import matplotlib import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sn import torch from PIL import Image, ImageDraw, ImageFont from utils import TryExcept, threaded from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path, is_ascii, xywh2xyxy, xyxy2xywh) from utils.metrics import fitness from utils.segment.general import scale_image # Settings RANK = int(os.getenv('RANK', -1)) matplotlib.rc('font', **{'size': 11}) matplotlib.use('Agg') # for writing to files only class Colors: # Ultralytics color palette https://ultralytics.com/ def __init__(self): # hex = matplotlib.colors.TABLEAU_COLORS.values() hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB', '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') self.palette = [self.hex2rgb(f'#{c}') for c in hexs] self.n = len(self.palette) def __call__(self, i, bgr=False): c = self.palette[int(i) % self.n] return (c[2], c[1], c[0]) if bgr else c @staticmethod def hex2rgb(h): # rgb order (PIL) return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) colors = Colors() # create instance for 'from utils.plots import colors' def check_pil_font(font=FONT, size=10): # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary font = Path(font) font = font if font.exists() else (CONFIG_DIR / font.name) try: return ImageFont.truetype(str(font) if font.exists() else font.name, size) except Exception: # download if missing try: check_font(font) return ImageFont.truetype(str(font), size) except TypeError: check_requirements('Pillow>=8.4.0') # known issue https://github.com/ultralytics/yolov5/issues/5374 except URLError: # not online return ImageFont.load_default() class Annotator: # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'): assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.' non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic self.pil = pil or non_ascii if self.pil: # use PIL self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.draw = ImageDraw.Draw(self.im) self.font = check_pil_font(font='Arial.Unicode.ttf' if non_ascii else font, size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12)) else: # use cv2 self.im = im self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): # Add one xyxy box to image with label if self.pil or not is_ascii(label): self.draw.rectangle(box, width=self.lw, outline=color) # box if label: w, h = self.font.getsize(label) # text width, height outside = box[1] - h >= 0 # label fits outside box self.draw.rectangle( (box[0], box[1] - h if outside else box[1], box[0] + w + 1, box[1] + 1 if outside else box[1] + h + 1), fill=color, ) # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font) else: # cv2 p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) if label: tf = max(self.lw - 1, 1) # font thickness w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height outside = p1[1] - h >= 3 p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) def masks(self, masks, colors, im_gpu=None, alpha=0.5): """Plot masks at once. Args: masks (tensor): predicted masks on cuda, shape: [n, h, w] colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque """ if self.pil: # convert to numpy first self.im = np.asarray(self.im).copy() if im_gpu is None: # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) if len(masks) == 0: return if isinstance(masks, torch.Tensor): masks = torch.as_tensor(masks, dtype=torch.uint8) masks = masks.permute(1, 2, 0).contiguous() masks = masks.cpu().numpy() # masks = np.ascontiguousarray(masks.transpose(1, 2, 0)) masks = scale_image(masks.shape[:2], masks, self.im.shape) masks = np.asarray(masks, dtype=np.float32) colors = np.asarray(colors, dtype=np.float32) # shape(n,3) s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3) self.im[:] = masks * alpha + self.im * (1 - s * alpha) else: if len(masks) == 0: self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0 colors = colors[:, None, None] # shape(n,1,1,3) masks = masks.unsqueeze(3) # shape(n,h,w,1) masks_color = masks * (colors * alpha) # shape(n,h,w,3) inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3) im_gpu = im_gpu.flip(dims=[0]) # flip channel im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) im_gpu = im_gpu * inv_alph_masks[-1] + mcs im_mask = (im_gpu * 255).byte().cpu().numpy() self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape) if self.pil: # convert im back to PIL and update draw self.fromarray(self.im) def rectangle(self, xy, fill=None, outline=None, width=1): # Add rectangle to image (PIL-only) self.draw.rectangle(xy, fill, outline, width) def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'): # Add text to image (PIL-only) if anchor == 'bottom': # start y from font bottom w, h = self.font.getsize(text) # text width, height xy[1] += 1 - h self.draw.text(xy, text, fill=txt_color, font=self.font) def fromarray(self, im): # Update self.im from a numpy array self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.draw = ImageDraw.Draw(self.im) def result(self): # Return annotated image as array return np.asarray(self.im) def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')): """ x: Features to be visualized module_type: Module type stage: Module stage within model n: Maximum number of feature maps to plot save_dir: Directory to save results """ if 'Detect' not in module_type: batch, channels, height, width = x.shape # batch, channels, height, width if height > 1 and width > 1: f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels n = min(n, channels) # number of plots fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols ax = ax.ravel() plt.subplots_adjust(wspace=0.05, hspace=0.05) for i in range(n): ax[i].imshow(blocks[i].squeeze()) # cmap='gray' ax[i].axis('off') LOGGER.info(f'Saving {f}... ({n}/{channels})') plt.savefig(f, dpi=300, bbox_inches='tight') plt.close() np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save def hist2d(x, y, n=100): # 2d histogram used in labels.png and evolve.png xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) return np.log(hist[xidx, yidx]) def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): from scipy.signal import butter, filtfilt # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy def butter_lowpass(cutoff, fs, order): nyq = 0.5 * fs normal_cutoff = cutoff / nyq return butter(order, normal_cutoff, btype='low', analog=False) b, a = butter_lowpass(cutoff, fs, order=order) return filtfilt(b, a, data) # forward-backward filter def output_to_target(output, max_det=300): # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting targets = [] for i, o in enumerate(output): box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1) j = torch.full((conf.shape[0], 1), i) targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1)) return torch.cat(targets, 0).numpy() @threaded def plot_images(images, targets, paths=None, fname='images.jpg', names=None): # Plot image grid with labels if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() max_size = 1920 # max image size max_subplots = 16 # max image subplots, i.e. 4x4 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) if np.max(images[0]) <= 1: images *= 255 # de-normalise (optional) # Build Image mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init for i, im in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin im = im.transpose(1, 2, 0) mosaic[y:y + h, x:x + w, :] = im # Resize (optional) scale = max_size / ns / max(h, w) if scale < 1: h = math.ceil(scale * h) w = math.ceil(scale * w) mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) # Annotate fs = int((h + w) * ns * 0.01) # font size annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) for i in range(i + 1): x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders if paths: annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames if len(targets) > 0: ti = targets[targets[:, 0] == i] # image targets boxes = xywh2xyxy(ti[:, 2:6]).T classes = ti[:, 1].astype('int') labels = ti.shape[1] == 6 # labels if no conf column conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 boxes[[0, 2]] *= w # scale to pixels boxes[[1, 3]] *= h elif scale < 1: # absolute coords need scale if image scales boxes *= scale boxes[[0, 2]] += x boxes[[1, 3]] += y for j, box in enumerate(boxes.T.tolist()): cls = classes[j] color = colors(cls) cls = names[cls] if names else cls if labels or conf[j] > 0.25: # 0.25 conf thresh label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' annotator.box_label(box, label, color=color) annotator.im.save(fname) # save def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''): # Plot LR simulating training for full epochs optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals y = [] for _ in range(epochs): scheduler.step() y.append(optimizer.param_groups[0]['lr']) plt.plot(y, '.-', label='LR') plt.xlabel('epoch') plt.ylabel('LR') plt.grid() plt.xlim(0, epochs) plt.ylim(0) plt.savefig(Path(save_dir) / 'LR.png', dpi=200) plt.close() def plot_val_txt(): # from utils.plots import *; plot_val() # Plot val.txt histograms x = np.loadtxt('val.txt', dtype=np.float32) box = xyxy2xywh(x[:, :4]) cx, cy = box[:, 0], box[:, 1] fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) ax.set_aspect('equal') plt.savefig('hist2d.png', dpi=300) fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) ax[0].hist(cx, bins=600) ax[1].hist(cy, bins=600) plt.savefig('hist1d.png', dpi=200) def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() # Plot targets.txt histograms x = np.loadtxt('targets.txt', dtype=np.float32).T s = ['x targets', 'y targets', 'width targets', 'height targets'] fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) ax = ax.ravel() for i in range(4): ax[i].hist(x[i], bins=100, label=f'{x[i].mean():.3g} +/- {x[i].std():.3g}') ax[i].legend() ax[i].set_title(s[i]) plt.savefig('targets.jpg', dpi=200) def plot_val_study(file='', dir='', x=None): # from utils.plots import *; plot_val_study() # Plot file=study.txt generated by val.py (or plot all study*.txt in dir) save_dir = Path(file).parent if file else Path(dir) plot2 = False # plot additional results if plot2: ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel() fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]: for f in sorted(save_dir.glob('study*.txt')): y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T x = np.arange(y.shape[1]) if x is None else np.array(x) if plot2: s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_preprocess (ms/img)', 't_inference (ms/img)', 't_NMS (ms/img)'] for i in range(7): ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8) ax[i].set_title(s[i]) j = y[3].argmax() + 1 ax2.plot(y[5, 1:j], y[3, 1:j] * 1E2, '.-', linewidth=2, markersize=8, label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO')) ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet') ax2.grid(alpha=0.2) ax2.set_yticks(np.arange(20, 60, 5)) ax2.set_xlim(0, 57) ax2.set_ylim(25, 55) ax2.set_xlabel('GPU Speed (ms/img)') ax2.set_ylabel('COCO AP val') ax2.legend(loc='lower right') f = save_dir / 'study.png' print(f'Saving {f}...') plt.savefig(f, dpi=300) @TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395 def plot_labels(labels, names=(), save_dir=Path('')): # plot dataset labels LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ") c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes nc = int(c.max() + 1) # number of classes x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) # seaborn correlogram sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9)) plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200) plt.close() # matplotlib labels matplotlib.use('svg') # faster ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) with contextlib.suppress(Exception): # color histogram bars by class [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # known issue #3195 ax[0].set_ylabel('instances') if 0 < len(names) < 30: ax[0].set_xticks(range(len(names))) ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10) else: ax[0].set_xlabel('classes') sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9) sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9) # rectangles labels[:, 1:3] = 0.5 # center labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000 img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255) for cls, *box in labels[:1000]: ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot ax[1].imshow(img) ax[1].axis('off') for a in [0, 1, 2, 3]: for s in ['top', 'right', 'left', 'bottom']: ax[a].spines[s].set_visible(False) plt.savefig(save_dir / 'labels.jpg', dpi=200) matplotlib.use('Agg') plt.close() def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=False, f=Path('images.jpg')): # Show classification image grid with labels (optional) and predictions (optional) from utils.augmentations import denormalize names = names or [f'class{i}' for i in range(1000)] blocks = torch.chunk(denormalize(im.clone()).cpu().float(), len(im), dim=0) # select batch index 0, block by channels n = min(len(blocks), nmax) # number of plots m = min(8, round(n ** 0.5)) # 8 x 8 default fig, ax = plt.subplots(math.ceil(n / m), m) # 8 rows x n/8 cols ax = ax.ravel() if m > 1 else [ax] # plt.subplots_adjust(wspace=0.05, hspace=0.05) for i in range(n): ax[i].imshow(blocks[i].squeeze().permute((1, 2, 0)).numpy().clip(0.0, 1.0)) ax[i].axis('off') if labels is not None: s = names[labels[i]] + (f'—{names[pred[i]]}' if pred is not None else '') ax[i].set_title(s, fontsize=8, verticalalignment='top') plt.savefig(f, dpi=300, bbox_inches='tight') plt.close() if verbose: LOGGER.info(f"Saving {f}") if labels is not None: LOGGER.info('True: ' + ' '.join(f'{names[i]:3s}' for i in labels[:nmax])) if pred is not None: LOGGER.info('Predicted:' + ' '.join(f'{names[i]:3s}' for i in pred[:nmax])) return f def plot_evolve(evolve_csv='path/to/evolve.csv'): # from utils.plots import *; plot_evolve() # Plot evolve.csv hyp evolution results evolve_csv = Path(evolve_csv) data = pd.read_csv(evolve_csv) keys = [x.strip() for x in data.columns] x = data.values f = fitness(x) j = np.argmax(f) # max fitness index plt.figure(figsize=(10, 12), tight_layout=True) matplotlib.rc('font', **{'size': 8}) print(f'Best results from row {j} of {evolve_csv}:') for i, k in enumerate(keys[7:]): v = x[:, 7 + i] mu = v[j] # best single result plt.subplot(6, 5, i + 1) plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none') plt.plot(mu, f.max(), 'k+', markersize=15) plt.title(f'{k} = {mu:.3g}', fontdict={'size': 9}) # limit to 40 characters if i % 5 != 0: plt.yticks([]) print(f'{k:>15}: {mu:.3g}') f = evolve_csv.with_suffix('.png') # filename plt.savefig(f, dpi=200) plt.close() print(f'Saved {f}') def plot_results(file='path/to/results.csv', dir=''): # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') save_dir = Path(file).parent if file else Path(dir) fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) ax = ax.ravel() files = list(save_dir.glob('results*.csv')) assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.' for f in files: try: data = pd.read_csv(f) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]): y = data.values[:, j].astype('float') # y[y == 0] = np.nan # don't show zero values ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8) ax[i].set_title(s[j], fontsize=12) # if j in [8, 9, 10]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) except Exception as e: LOGGER.info(f'Warning: Plotting error for {f}: {e}') ax[1].legend() fig.savefig(save_dir / 'results.png', dpi=200) plt.close() def profile_idetection(start=0, stop=0, labels=(), save_dir=''): # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection() ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS'] files = list(Path(save_dir).glob('frames*.txt')) for fi, f in enumerate(files): try: results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows n = results.shape[1] # number of rows x = np.arange(start, min(stop, n) if stop else n) results = results[:, x] t = (results[0] - results[0].min()) # set t0=0s results[0] = x for i, a in enumerate(ax): if i < len(results): label = labels[fi] if len(labels) else f.stem.replace('frames_', '') a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5) a.set_title(s[i]) a.set_xlabel('time (s)') # if fi == len(files) - 1: # a.set_ylim(bottom=0) for side in ['top', 'right']: a.spines[side].set_visible(False) else: a.remove() except Exception as e: print(f'Warning: Plotting error for {f}; {e}') ax[1].legend() plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200) def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True): # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop xyxy = torch.tensor(xyxy).view(-1, 4) b = xyxy2xywh(xyxy) # boxes if square: b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad xyxy = xywh2xyxy(b).long() clip_boxes(xyxy, im.shape) crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] if save: file.parent.mkdir(parents=True, exist_ok=True) # make directory f = str(increment_path(file).with_suffix('.jpg')) # cv2.imwrite(f, crop) # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0) # save RGB return crop ================================================ FILE: utils/segment/__init__.py ================================================ # init ================================================ FILE: utils/segment/augmentations.py ================================================ import math import random import cv2 import numpy as np from ..augmentations import box_candidates from ..general import resample_segments, segment2box def mixup(im, labels, segments, im2, labels2, segments2): # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 im = (im * r + im2 * (1 - r)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) segments = np.concatenate((segments, segments2), 0) return im, labels, segments def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] height = im.shape[0] + border[0] * 2 # shape(h,w,c) width = im.shape[1] + border[1] * 2 # Center C = np.eye(3) C[0, 2] = -im.shape[1] / 2 # x translation (pixels) C[1, 2] = -im.shape[0] / 2 # y translation (pixels) # Perspective P = np.eye(3) P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) # Rotation and Scale R = np.eye(3) a = random.uniform(-degrees, degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - scale, 1 + scale) # s = 2 ** random.uniform(-scale, scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) # Shear S = np.eye(3) S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) # Translation T = np.eye(3) T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) # Combined rotation matrix M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed if perspective: im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) else: # affine im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) # Visualize # import matplotlib.pyplot as plt # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() # ax[0].imshow(im[:, :, ::-1]) # base # ax[1].imshow(im2[:, :, ::-1]) # warped # Transform label coordinates n = len(targets) new_segments = [] if n: new = np.zeros((n, 4)) segments = resample_segments(segments) # upsample for i, segment in enumerate(segments): xy = np.ones((len(segment), 3)) xy[:, :2] = segment xy = xy @ M.T # transform xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine # clip new[i] = segment2box(xy, width, height) new_segments.append(xy) # filter candidates i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) targets = targets[i] targets[:, 1:5] = new[i] new_segments = np.array(new_segments)[i] return im, targets, new_segments ================================================ FILE: utils/segment/dataloaders.py ================================================ import os import random import cv2 import numpy as np import torch from torch.utils.data import DataLoader, distributed from ..augmentations import augment_hsv, copy_paste, letterbox from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn from ..torch_utils import torch_distributed_zero_first from .augmentations import mixup, random_perspective RANK = int(os.getenv('RANK', -1)) def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, rect=False, rank=-1, workers=8, image_weights=False, close_mosaic=False, quad=False, prefix='', shuffle=False, mask_downsample_ratio=1, overlap_mask=False): if rect and shuffle: LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') shuffle = False with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = LoadImagesAndLabelsAndMasks( path, imgsz, batch_size, augment=augment, # augmentation hyp=hyp, # hyperparameters rect=rect, # rectangular batches cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights, prefix=prefix, downsample_ratio=mask_downsample_ratio, overlap=overlap_mask) batch_size = min(batch_size, len(dataset)) nd = torch.cuda.device_count() # number of CUDA devices nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) #loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader generator = torch.Generator() generator.manual_seed(6148914691236517205 + RANK) return loader( dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, pin_memory=True, collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, worker_init_fn=seed_worker, generator=generator, ), dataset class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing def __init__( self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0, min_items=0, prefix="", downsample_ratio=1, overlap=False, ): super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, stride, pad, min_items, prefix) self.downsample_ratio = downsample_ratio self.overlap = overlap def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights hyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] masks = [] if mosaic: # Load mosaic img, labels, segments = self.load_mosaic(index) shapes = None # MixUp augmentation if random.random() < hyp["mixup"]: img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1))) else: # Load image img, (h0, w0), (h, w) = self.load_image(index) # Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling labels = self.labels[index].copy() # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy segments = self.segments[index].copy() if len(segments): for i_s in range(len(segments)): segments[i_s] = xyn2xy( segments[i_s], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1], ) if labels.size: # normalized xywh to pixel xyxy format labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: img, labels, segments = random_perspective(img, labels, segments=segments, degrees=hyp["degrees"], translate=hyp["translate"], scale=hyp["scale"], shear=hyp["shear"], perspective=hyp["perspective"]) nl = len(labels) # number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) if self.overlap: masks, sorted_idx = polygons2masks_overlap(img.shape[:2], segments, downsample_ratio=self.downsample_ratio) masks = masks[None] # (640, 640) -> (1, 640, 640) labels = labels[sorted_idx] else: masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio)) # TODO: albumentations support if self.augment: # Albumentations # there are some augmentation that won't change boxes and masks, # so just be it for now. img, labels = self.albumentations(img, labels) nl = len(labels) # update after albumentations # HSV color-space augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) # Flip up-down if random.random() < hyp["flipud"]: img = np.flipud(img) if nl: labels[:, 2] = 1 - labels[:, 2] masks = torch.flip(masks, dims=[1]) # Flip left-right if random.random() < hyp["fliplr"]: img = np.fliplr(img) if nl: labels[:, 1] = 1 - labels[:, 1] masks = torch.flip(masks, dims=[2]) # Cutouts # labels = cutout(img, labels, p=0.5) labels_out = torch.zeros((nl, 6)) if nl: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks) def load_mosaic(self, index): # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic labels4, segments4 = [], [] s = self.img_size yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y # 3 additional image indices indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices for i, index in enumerate(indices): # Load image img, _, (h, w) = self.load_image(index) # place img in img4 if i == 0: # top left img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padw, padh) for x in segments] labels4.append(labels) segments4.extend(segments) # Concat/clip labels labels4 = np.concatenate(labels4, 0) for x in (labels4[:, 1:], *segments4): np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() # img4, labels4 = replicate(img4, labels4) # replicate # Augment img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) img4, labels4, segments4 = random_perspective(img4, labels4, segments4, degrees=self.hyp["degrees"], translate=self.hyp["translate"], scale=self.hyp["scale"], shear=self.hyp["shear"], perspective=self.hyp["perspective"], border=self.mosaic_border) # border to remove return img4, labels4, segments4 @staticmethod def collate_fn(batch): img, label, path, shapes, masks = zip(*batch) # transposed batched_masks = torch.cat(masks, 0) for i, l in enumerate(label): l[:, 0] = i # add target image index for build_targets() return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): """ Args: img_size (tuple): The image size. polygons (np.ndarray): [N, M], N is the number of polygons, M is the number of points(Be divided by 2). """ mask = np.zeros(img_size, dtype=np.uint8) polygons = np.asarray(polygons) polygons = polygons.astype(np.int32) shape = polygons.shape polygons = polygons.reshape(shape[0], -1, 2) cv2.fillPoly(mask, polygons, color=color) nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) # NOTE: fillPoly firstly then resize is trying the keep the same way # of loss calculation when mask-ratio=1. mask = cv2.resize(mask, (nw, nh)) return mask def polygons2masks(img_size, polygons, color, downsample_ratio=1): """ Args: img_size (tuple): The image size. polygons (list[np.ndarray]): each polygon is [N, M], N is the number of polygons, M is the number of points(Be divided by 2). """ masks = [] for si in range(len(polygons)): mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio) masks.append(mask) return np.array(masks) def polygons2masks_overlap(img_size, segments, downsample_ratio=1): """Return a (640, 640) overlap mask.""" masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), dtype=np.int32 if len(segments) > 255 else np.uint8) areas = [] ms = [] for si in range(len(segments)): mask = polygon2mask( img_size, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1, ) ms.append(mask) areas.append(mask.sum()) areas = np.asarray(areas) index = np.argsort(-areas) ms = np.array(ms)[index] for i in range(len(segments)): mask = ms[i] * (i + 1) masks = masks + mask masks = np.clip(masks, a_min=0, a_max=i + 1) return masks, index ================================================ FILE: utils/segment/general.py ================================================ import cv2 import numpy as np import torch import torch.nn.functional as F def crop_mask(masks, boxes): """ "Crop" predicted masks by zeroing out everything not in the predicted bbox. Vectorized by Chong (thanks Chong). Args: - masks should be a size [h, w, n] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form """ n, h, w = masks.shape x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) def process_mask_upsample(protos, masks_in, bboxes, shape): """ Crop after upsample. proto_out: [mask_dim, mask_h, mask_w] out_masks: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms shape:input_image_size, (h, w) return: h, w, n """ c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW masks = crop_mask(masks, bboxes) # CHW return masks.gt_(0.5) def process_mask(protos, masks_in, bboxes, shape, upsample=False): """ Crop before upsample. proto_out: [mask_dim, mask_h, mask_w] out_masks: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms shape:input_image_size, (h, w) return: h, w, n """ c, mh, mw = protos.shape # CHW ih, iw = shape masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW downsampled_bboxes = bboxes.clone() downsampled_bboxes[:, 0] *= mw / iw downsampled_bboxes[:, 2] *= mw / iw downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih masks = crop_mask(masks, downsampled_bboxes) # CHW if upsample: masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW return masks.gt_(0.5) def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): """ img1_shape: model input shape, [h, w] img0_shape: origin pic shape, [h, w, 3] masks: [h, w, num] """ # Rescale coordinates (xyxy) from im1_shape to im0_shape if ratio_pad is None: # calculate from im0_shape gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding else: pad = ratio_pad[1] top, left = int(pad[1]), int(pad[0]) # y, x bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) if len(masks.shape) < 2: raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') masks = masks[top:bottom, left:right] # masks = masks.permute(2, 0, 1).contiguous() # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] # masks = masks.permute(1, 2, 0).contiguous() masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) if len(masks.shape) == 2: masks = masks[:, :, None] return masks def mask_iou(mask1, mask2, eps=1e-7): """ mask1: [N, n] m1 means number of predicted objects mask2: [M, n] m2 means number of gt objects Note: n means image_w x image_h return: masks iou, [N, M] """ intersection = torch.matmul(mask1, mask2.t()).clamp(0) union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection return intersection / (union + eps) def masks_iou(mask1, mask2, eps=1e-7): """ mask1: [N, n] m1 means number of predicted objects mask2: [N, n] m2 means number of gt objects Note: n means image_w x image_h return: masks iou, (N, ) """ intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection return intersection / (union + eps) def masks2segments(masks, strategy='largest'): # Convert masks(n,160,160) into segments(n,xy) segments = [] for x in masks.int().cpu().numpy().astype('uint8'): c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] if c: if strategy == 'concat': # concatenate all segments c = np.concatenate([x.reshape(-1, 2) for x in c]) elif strategy == 'largest': # select largest segment c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) else: c = np.zeros((0, 2)) # no segments found segments.append(c.astype('float32')) return segments ================================================ FILE: utils/segment/loss.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from ..general import xywh2xyxy from ..loss import FocalLoss, smooth_BCE from ..metrics import bbox_iou from ..torch_utils import de_parallel from .general import crop_mask class ComputeLoss: # Compute losses def __init__(self, model, autobalance=False, overlap=False): self.sort_obj_iou = False self.overlap = overlap device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters self.device = device # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets # Focal loss g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance self.na = m.na # number of anchors self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.nm = m.nm # number of masks self.anchors = m.anchors self.device = device def __call__(self, preds, targets, masks): # predictions, targets, model p, proto = preds bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width lcls = torch.zeros(1, device=self.device) lbox = torch.zeros(1, device=self.device) lobj = torch.zeros(1, device=self.device) lseg = torch.zeros(1, device=self.device) tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions b, a, gj, gi = indices[i] # image, anchor, gridy, gridx tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj n = b.shape[0] # number of targets if n: pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions # Box regression pxy = pxy.sigmoid() * 2 - 0.5 pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness iou = iou.detach().clamp(0).type(tobj.dtype) if self.sort_obj_iou: j = iou.argsort() b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j] if self.gr < 1: iou = (1.0 - self.gr) + self.gr * iou tobj[b, a, gj, gi] = iou # iou ratio # Classification if self.nc > 1: # cls loss (only if multiple classes) t = torch.full_like(pcls, self.cn, device=self.device) # targets t[range(n), tcls[i]] = self.cp lcls += self.BCEcls(pcls, t) # BCE # Mask regression if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0] marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) for bi in b.unique(): j = b == bi # matching index if self.overlap: mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0) else: mask_gti = masks[tidxs[i]][j] lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j]) obji = self.BCEobj(pi[..., 4], tobj) lobj += obji * self.balance[i] # obj loss if self.autobalance: self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() if self.autobalance: self.balance = [x / self.balance[self.ssi] for x in self.balance] lbox *= self.hyp["box"] lobj *= self.hyp["obj"] lcls *= self.hyp["cls"] lseg *= self.hyp["box"] / bs loss = lbox + lobj + lcls + lseg return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], [] gain = torch.ones(8, device=self.device) # normalized to gridspace gain ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) if self.overlap: batch = p[0].shape[0] ti = [] for i in range(batch): num = (targets[:, 0] == i).sum() # find number of targets of each image ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) ti = torch.cat(ti, 1) # (na, nt) else: ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1) targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices g = 0.5 # bias off = torch.tensor( [ [0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm ], device=self.device).float() * g # offsets for i in range(self.nl): anchors, shape = self.anchors[i], p[i].shape gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain # Match targets to anchors t = targets * gain # shape(3,n,7) if nt: # Matches r = t[..., 4:6] / anchors[:, None] # wh ratio j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[[2, 3]] - gxy # inverse j, k = ((gxy % 1 < g) & (gxy > 1)).T l, m = ((gxi % 1 < g) & (gxi > 1)).T j = torch.stack((torch.ones_like(j), j, k, l, m)) t = t.repeat((5, 1, 1))[j] offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] else: t = targets[0] offsets = 0 # Define bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors (a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class gij = (gxy - offsets).long() gi, gj = gij.T # grid indices # Append indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class tidxs.append(tidx) xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized return tcls, tbox, indices, anch, tidxs, xywhn ================================================ FILE: utils/segment/loss_tal.py ================================================ import os import torch import torch.nn as nn import torch.nn.functional as F from torchvision.ops import sigmoid_focal_loss from utils.general import xywh2xyxy, xyxy2xywh from utils.metrics import bbox_iou from utils.segment.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist from utils.segment.tal.assigner import TaskAlignedAssigner from utils.torch_utils import de_parallel from utils.segment.general import crop_mask def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps class VarifocalLoss(nn.Module): # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 def __init__(self): super().__init__() def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label with torch.cuda.amp.autocast(enabled=False): loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight).sum() return loss class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = "none" # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == "mean": return loss.mean() elif self.reduction == "sum": return loss.sum() else: # 'none' return loss class BboxLoss(nn.Module): def __init__(self, reg_max, use_dfl=False): super().__init__() self.reg_max = reg_max self.use_dfl = use_dfl def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): # iou loss bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) loss_iou = 1.0 - iou loss_iou *= bbox_weight loss_iou = loss_iou.sum() / target_scores_sum # dfl loss if self.use_dfl: dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight loss_dfl = loss_dfl.sum() / target_scores_sum else: loss_dfl = torch.tensor(0.0).to(pred_dist.device) return loss_iou, loss_dfl, iou def _df_loss(self, pred_dist, target): target_left = target.to(torch.long) target_right = target_left + 1 weight_left = target_right.to(torch.float) - target weight_right = 1 - weight_left loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( target_left.shape) * weight_left loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction="none").view(target_left.shape) * weight_right return (loss_left + loss_right).mean(-1, keepdim=True) class ComputeLoss: # Compute losses def __init__(self, model, use_dfl=True, overlap=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.nm = m.nm self.overlap = overlap self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, masks, img=None, epoch=0): loss = torch.zeros(4, device=self.device) # box, cls, dfl feats, pred_masks, proto = p if len(p) == 3 else p[1] batch_size, _, mask_h, mask_w = proto.shape pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_masks = pred_masks.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets try: batch_idx = targets[:, 0].view(-1, 1) targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) except RuntimeError as e: raise TypeError('ERROR.') from e # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_scores_sum = target_scores.sum() # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE # bbox loss if fg_mask.sum(): loss[0], loss[3], _ = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask) # masks loss if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] for i in range(batch_size): if fg_mask[i].sum(): mask_idx = target_gt_idx[i][fg_mask[i]] if self.overlap: gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) else: gt_mask = masks[batch_idx.view(-1) == i][mask_idx] xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, marea) # seg loss loss[0] *= 7.5 # box gain loss[1] *= 2.5 / batch_size loss[2] *= 0.5 # cls gain loss[3] *= 1.5 # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') #loss = sigmoid_focal_loss(pred_mask, gt_mask, alpha = .25, gamma = 2., reduction = 'none') return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() #p_m = torch.flatten(pred_mask.sigmoid()) #p_m = torch.flatten(pred_mask.softmax(dim = 1)) #g_m = torch.flatten(gt_mask) #i_m = torch.sum(torch.mul(p_m, g_m)) #u_m = torch.sum(torch.add(p_m, g_m)) #d_c = (2. * i_m + 1.) / (u_m + 1.) #d_l = (1. - d_c) #return d_l ================================================ FILE: utils/segment/loss_tal_dual.py ================================================ import os import torch import torch.nn as nn import torch.nn.functional as F from torchvision.ops import sigmoid_focal_loss from utils.general import xywh2xyxy, xyxy2xywh from utils.metrics import bbox_iou from utils.segment.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist from utils.segment.tal.assigner import TaskAlignedAssigner from utils.torch_utils import de_parallel from utils.segment.general import crop_mask def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps class VarifocalLoss(nn.Module): # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 def __init__(self): super().__init__() def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label with torch.cuda.amp.autocast(enabled=False): loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight).sum() return loss class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = "none" # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == "mean": return loss.mean() elif self.reduction == "sum": return loss.sum() else: # 'none' return loss class BboxLoss(nn.Module): def __init__(self, reg_max, use_dfl=False): super().__init__() self.reg_max = reg_max self.use_dfl = use_dfl def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): # iou loss bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) loss_iou = 1.0 - iou loss_iou *= bbox_weight loss_iou = loss_iou.sum() / target_scores_sum # dfl loss if self.use_dfl: dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight loss_dfl = loss_dfl.sum() / target_scores_sum else: loss_dfl = torch.tensor(0.0).to(pred_dist.device) return loss_iou, loss_dfl, iou def _df_loss(self, pred_dist, target): target_left = target.to(torch.long) target_right = target_left + 1 weight_left = target_right.to(torch.float) - target weight_right = 1 - weight_left loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( target_left.shape) * weight_left loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction="none").view(target_left.shape) * weight_right return (loss_left + loss_right).mean(-1, keepdim=True) class ComputeLoss: # Compute losses def __init__(self, model, use_dfl=True, overlap=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.nm = m.nm self.overlap = overlap self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.assigner2 = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.bbox_loss2 = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, masks, img=None, epoch=0): loss = torch.zeros(4, device=self.device) # box, cls, dfl feats_, pred_masks_, proto_ = p if len(p) == 3 else p[1] feats, pred_masks, proto = feats_[0], pred_masks_[0], proto_[0] feats2, pred_masks2, proto2 = feats_[1], pred_masks_[1], proto_[1] batch_size, _, mask_h, mask_w = proto.shape pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_masks = pred_masks.permute(0, 2, 1).contiguous() pred_distri2, pred_scores2 = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats2], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() pred_masks2 = pred_masks2.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets try: batch_idx = targets[:, 0].view(-1, 1) targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) except RuntimeError as e: raise TypeError('ERROR.') from e # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_labels2, target_bboxes2, target_scores2, fg_mask2, target_gt_idx2 = self.assigner2( pred_scores2.detach().sigmoid(), (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_scores_sum = target_scores.sum() target_scores_sum2 = target_scores2.sum() # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[2] *= 0.25 loss[2] += self.BCEcls(pred_scores2, target_scores2.to(dtype)).sum() / target_scores_sum2 # BCE # bbox loss if fg_mask.sum(): loss[0], loss[3], _ = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask) # masks loss if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] for i in range(batch_size): if fg_mask[i].sum(): mask_idx = target_gt_idx[i][fg_mask[i]] if self.overlap: gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) else: gt_mask = masks[batch_idx.view(-1) == i][mask_idx] xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, marea) # seg loss loss[0] *= 0.25 loss[3] *= 0.25 loss[1] *= 0.25 # bbox loss if fg_mask2.sum(): loss0_, loss3_, _ = self.bbox_loss2(pred_distri2, pred_bboxes2, anchor_points, target_bboxes2 / stride_tensor, target_scores2, target_scores_sum2, fg_mask2) # masks loss if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] for i in range(batch_size): if fg_mask2[i].sum(): mask_idx = target_gt_idx2[i][fg_mask2[i]] if self.overlap: gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) else: gt_mask = masks[batch_idx.view(-1) == i][mask_idx] xyxyn = target_bboxes2[i][fg_mask2[i]] / imgsz[[1, 0, 1, 0]] marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) loss[1] += self.single_mask_loss(gt_mask, pred_masks2[i][fg_mask2[i]], proto2[i], mxyxy, marea) # seg loss loss[0] += loss0_ loss[3] += loss3_ loss[0] *= 7.5 # box gain loss[1] *= 2.5 / batch_size loss[2] *= 0.5 # cls gain loss[3] *= 1.5 # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') #loss = sigmoid_focal_loss(pred_mask, gt_mask, alpha = .25, gamma = 2., reduction = 'none') #p_m = torch.flatten(pred_mask.softmax(dim = 1)) #g_m = torch.flatten(gt_mask) #i_m = torch.sum(torch.mul(p_m, g_m)) #u_m = torch.sum(torch.add(p_m, g_m)) #dice_coef = (2. * i_m + 1.) / (u_m + 1.) #dice_loss = (1. - dice_coef) return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() class ComputeLossLH: # Compute losses def __init__(self, model, use_dfl=True, overlap=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.nm = m.nm self.overlap = overlap self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, masks, img=None, epoch=0): loss = torch.zeros(4, device=self.device) # box, cls, dfl feats_, pred_masks_, proto_ = p if len(p) == 3 else p[1] feats, pred_masks, proto = feats_[0], pred_masks_[0], proto_[0] feats2, pred_masks2, proto2 = feats_[1], pred_masks_[1], proto_[1] batch_size, _, mask_h, mask_w = proto.shape pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_masks = pred_masks.permute(0, 2, 1).contiguous() pred_distri2, pred_scores2 = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats2], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() pred_masks2 = pred_masks2.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets try: batch_idx = targets[:, 0].view(-1, 1) targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) except RuntimeError as e: raise TypeError('ERROR.') from e # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( pred_scores2.detach().sigmoid(), (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_scores_sum = target_scores.sum() # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[2] *= 0.25 loss[2] += self.BCEcls(pred_scores2, target_scores.to(dtype)).sum() / target_scores_sum # BCE # bbox loss if fg_mask.sum(): loss[0], loss[3], _ = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask) # masks loss if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] for i in range(batch_size): if fg_mask[i].sum(): mask_idx = target_gt_idx[i][fg_mask[i]] if self.overlap: gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) else: gt_mask = masks[batch_idx.view(-1) == i][mask_idx] xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, marea) # seg loss loss[0] *= 0.25 loss[3] *= 0.25 loss[1] *= 0.25 # bbox loss if fg_mask.sum(): loss0_, loss3_, _ = self.bbox_loss(pred_distri2, pred_bboxes2, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask) # masks loss if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] for i in range(batch_size): if fg_mask[i].sum(): mask_idx = target_gt_idx[i][fg_mask[i]] if self.overlap: gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) else: gt_mask = masks[batch_idx.view(-1) == i][mask_idx] xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) loss[1] += self.single_mask_loss(gt_mask, pred_masks2[i][fg_mask[i]], proto2[i], mxyxy, marea) # seg loss loss[0] += loss0_ loss[3] += loss3_ loss[0] *= 7.5 # box gain loss[1] *= 2.5 / batch_size loss[2] *= 0.5 # cls gain loss[3] *= 1.5 # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') #loss = sigmoid_focal_loss(pred_mask, gt_mask, alpha = .25, gamma = 2., reduction = 'none') #p_m = torch.flatten(pred_mask.softmax(dim = 1)) #g_m = torch.flatten(gt_mask) #i_m = torch.sum(torch.mul(p_m, g_m)) #u_m = torch.sum(torch.add(p_m, g_m)) #dice_coef = (2. * i_m + 1.) / (u_m + 1.) #dice_loss = (1. - dice_coef) return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() class ComputeLossLH0: # Compute losses def __init__(self, model, use_dfl=True, overlap=True): device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets # Focal loss g = h["fl_gamma"] # focal loss gamma if g > 0: BCEcls = FocalLoss(BCEcls, g) m = de_parallel(model).model[-1] # Detect() module self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.BCEcls = BCEcls self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.nl = m.nl # number of layers self.no = m.no self.nm = m.nm self.overlap = overlap self.reg_max = m.reg_max self.device = device self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), num_classes=self.nc, alpha=float(os.getenv('YOLOA', 0.5)), beta=float(os.getenv('YOLOB', 6.0))) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 self.use_dfl = use_dfl def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, p, targets, masks, img=None, epoch=0): loss = torch.zeros(4, device=self.device) # box, cls, dfl feats_, pred_masks_, proto_ = p if len(p) == 3 else p[1] feats, pred_masks, proto = feats_[0], pred_masks_[0], proto_[0] feats2, pred_masks2, proto2 = feats_[1], pred_masks_[1], proto_[1] batch_size, _, mask_h, mask_w = proto.shape pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() pred_masks = pred_masks.permute(0, 2, 1).contiguous() pred_distri2, pred_scores2 = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats2], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() pred_masks2 = pred_masks2.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size, grid_size = pred_scores.shape[:2] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets try: batch_idx = targets[:, 0].view(-1, 1) targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) except RuntimeError as e: raise TypeError('ERROR.') from e # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( pred_scores2.detach().sigmoid(), (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_scores_sum = target_scores.sum() # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE loss[2] *= 0.25 loss[2] += self.BCEcls(pred_scores2, target_scores.to(dtype)).sum() / target_scores_sum # BCE # bbox loss if fg_mask.sum(): loss[0], loss[3], _ = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask) # masks loss if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] for i in range(batch_size): if fg_mask[i].sum(): mask_idx = target_gt_idx[i][fg_mask[i]] if self.overlap: gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) else: gt_mask = masks[batch_idx.view(-1) == i][mask_idx] xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, marea) # seg loss loss[0] *= 0.25 loss[3] *= 0.25 loss[1] *= 0.25 # bbox loss if fg_mask.sum(): loss0_, loss3_, _ = self.bbox_loss(pred_distri2, pred_bboxes2, anchor_points, target_bboxes / stride_tensor, target_scores, target_scores_sum, fg_mask) # masks loss if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] for i in range(batch_size): if fg_mask[i].sum(): mask_idx = target_gt_idx[i][fg_mask[i]] if self.overlap: gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) else: gt_mask = masks[batch_idx.view(-1) == i][mask_idx] xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) loss[1] += 0. * self.single_mask_loss(gt_mask, pred_masks2[i][fg_mask[i]], proto2[i], mxyxy, marea) # seg loss loss[0] += loss0_ loss[3] += loss3_ loss[0] *= 7.5 # box gain loss[1] *= 2.5 / batch_size loss[2] *= 0.5 # cls gain loss[3] *= 1.5 # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): # Mask loss for one image pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') #loss = sigmoid_focal_loss(pred_mask, gt_mask, alpha = .25, gamma = 2., reduction = 'none') #p_m = torch.flatten(pred_mask.softmax(dim = 1)) #g_m = torch.flatten(gt_mask) #i_m = torch.sum(torch.mul(p_m, g_m)) #u_m = torch.sum(torch.add(p_m, g_m)) #dice_coef = (2. * i_m + 1.) / (u_m + 1.) #dice_loss = (1. - dice_coef) return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() ================================================ FILE: utils/segment/metrics.py ================================================ import numpy as np from ..metrics import ap_per_class def fitness(x): # Model fitness as a weighted combination of metrics w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] return (x[:, :8] * w).sum(1) def ap_per_class_box_and_mask( tp_m, tp_b, conf, pred_cls, target_cls, plot=False, save_dir=".", names=(), ): """ Args: tp_b: tp of boxes. tp_m: tp of masks. other arguments see `func: ap_per_class`. """ results_boxes = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, prefix="Box")[2:] results_masks = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=plot, save_dir=save_dir, names=names, prefix="Mask")[2:] results = { "boxes": { "p": results_boxes[0], "r": results_boxes[1], "ap": results_boxes[3], "f1": results_boxes[2], "ap_class": results_boxes[4]}, "masks": { "p": results_masks[0], "r": results_masks[1], "ap": results_masks[3], "f1": results_masks[2], "ap_class": results_masks[4]}} return results class Metric: def __init__(self) -> None: self.p = [] # (nc, ) self.r = [] # (nc, ) self.f1 = [] # (nc, ) self.all_ap = [] # (nc, 10) self.ap_class_index = [] # (nc, ) @property def ap50(self): """AP@0.5 of all classes. Return: (nc, ) or []. """ return self.all_ap[:, 0] if len(self.all_ap) else [] @property def ap(self): """AP@0.5:0.95 Return: (nc, ) or []. """ return self.all_ap.mean(1) if len(self.all_ap) else [] @property def mp(self): """mean precision of all classes. Return: float. """ return self.p.mean() if len(self.p) else 0.0 @property def mr(self): """mean recall of all classes. Return: float. """ return self.r.mean() if len(self.r) else 0.0 @property def map50(self): """Mean AP@0.5 of all classes. Return: float. """ return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 @property def map(self): """Mean AP@0.5:0.95 of all classes. Return: float. """ return self.all_ap.mean() if len(self.all_ap) else 0.0 def mean_results(self): """Mean of results, return mp, mr, map50, map""" return (self.mp, self.mr, self.map50, self.map) def class_result(self, i): """class-aware result, return p[i], r[i], ap50[i], ap[i]""" return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) def get_maps(self, nc): maps = np.zeros(nc) + self.map for i, c in enumerate(self.ap_class_index): maps[c] = self.ap[i] return maps def update(self, results): """ Args: results: tuple(p, r, ap, f1, ap_class) """ p, r, all_ap, f1, ap_class_index = results self.p = p self.r = r self.all_ap = all_ap self.f1 = f1 self.ap_class_index = ap_class_index class Metrics: """Metric for boxes and masks.""" def __init__(self) -> None: self.metric_box = Metric() self.metric_mask = Metric() def update(self, results): """ Args: results: Dict{'boxes': Dict{}, 'masks': Dict{}} """ self.metric_box.update(list(results["boxes"].values())) self.metric_mask.update(list(results["masks"].values())) def mean_results(self): return self.metric_box.mean_results() + self.metric_mask.mean_results() def class_result(self, i): return self.metric_box.class_result(i) + self.metric_mask.class_result(i) def get_maps(self, nc): return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) @property def ap_class_index(self): # boxes and masks have the same ap_class_index return self.metric_box.ap_class_index KEYS = [ "train/box_loss", "train/seg_loss", # train loss "train/obj_loss", "train/cls_loss", "metrics/precision(B)", "metrics/recall(B)", "metrics/mAP_0.5(B)", "metrics/mAP_0.5:0.95(B)", # metrics "metrics/precision(M)", "metrics/recall(M)", "metrics/mAP_0.5(M)", "metrics/mAP_0.5:0.95(M)", # metrics "val/box_loss", "val/seg_loss", # val loss "val/obj_loss", "val/cls_loss", "x/lr0", "x/lr1", "x/lr2",] BEST_KEYS = [ "best/epoch", "best/precision(B)", "best/recall(B)", "best/mAP_0.5(B)", "best/mAP_0.5:0.95(B)", "best/precision(M)", "best/recall(M)", "best/mAP_0.5(M)", "best/mAP_0.5:0.95(M)",] ================================================ FILE: utils/segment/plots.py ================================================ import contextlib import math from pathlib import Path import cv2 import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch from .. import threaded from ..general import xywh2xyxy from ..plots import Annotator, colors @threaded def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None): # Plot image grid with labels if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() if isinstance(masks, torch.Tensor): masks = masks.cpu().numpy().astype(int) max_size = 1920 # max image size max_subplots = 16 # max image subplots, i.e. 4x4 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) if np.max(images[0]) <= 1: images *= 255 # de-normalise (optional) # Build Image mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init for i, im in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin im = im.transpose(1, 2, 0) mosaic[y:y + h, x:x + w, :] = im # Resize (optional) scale = max_size / ns / max(h, w) if scale < 1: h = math.ceil(scale * h) w = math.ceil(scale * w) mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) # Annotate fs = int((h + w) * ns * 0.01) # font size annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) for i in range(i + 1): x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders if paths: annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames if len(targets) > 0: idx = targets[:, 0] == i ti = targets[idx] # image targets boxes = xywh2xyxy(ti[:, 2:6]).T classes = ti[:, 1].astype('int') labels = ti.shape[1] == 6 # labels if no conf column conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 boxes[[0, 2]] *= w # scale to pixels boxes[[1, 3]] *= h elif scale < 1: # absolute coords need scale if image scales boxes *= scale boxes[[0, 2]] += x boxes[[1, 3]] += y for j, box in enumerate(boxes.T.tolist()): cls = classes[j] color = colors(cls) cls = names[cls] if names else cls if labels or conf[j] > 0.25: # 0.25 conf thresh label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' annotator.box_label(box, label, color=color) # Plot masks if len(masks): if masks.max() > 1.0: # mean that masks are overlap image_masks = masks[[i]] # (1, 640, 640) nl = len(ti) index = np.arange(nl).reshape(nl, 1, 1) + 1 image_masks = np.repeat(image_masks, nl, axis=0) image_masks = np.where(image_masks == index, 1.0, 0.0) else: image_masks = masks[idx] im = np.asarray(annotator.im).copy() for j, box in enumerate(boxes.T.tolist()): if labels or conf[j] > 0.25: # 0.25 conf thresh color = colors(classes[j]) mh, mw = image_masks[j].shape if mh != h or mw != w: mask = image_masks[j].astype(np.uint8) mask = cv2.resize(mask, (w, h)) mask = mask.astype(bool) else: mask = image_masks[j].astype(bool) with contextlib.suppress(Exception): im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 annotator.fromarray(im) annotator.im.save(fname) # save def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') save_dir = Path(file).parent if file else Path(dir) fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) ax = ax.ravel() files = list(save_dir.glob("results*.csv")) assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." for f in files: try: data = pd.read_csv(f) index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + 0.1 * data.values[:, 11]) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): y = data.values[:, j] # y[y == 0] = np.nan # don't show zero values ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) if best: # best ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") else: # last ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") # if j in [8, 9, 10]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) except Exception as e: print(f"Warning: Plotting error for {f}: {e}") ax[1].legend() fig.savefig(save_dir / "results.png", dpi=200) plt.close() ================================================ FILE: utils/segment/tal/__init__.py ================================================ # init ================================================ FILE: utils/segment/tal/anchor_generator.py ================================================ import torch from utils.general import check_version TORCH_1_10 = check_version(torch.__version__, '1.10.0') def make_anchors(feats, strides, grid_cell_offset=0.5): """Generate anchors from features.""" anchor_points, stride_tensor = [], [] assert feats is not None dtype, device = feats[0].dtype, feats[0].device for i, stride in enumerate(strides): _, _, h, w = feats[i].shape sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) return torch.cat(anchor_points), torch.cat(stride_tensor) def dist2bbox(distance, anchor_points, xywh=True, dim=-1): """Transform distance(ltrb) to box(xywh or xyxy).""" lt, rb = torch.split(distance, 2, dim) x1y1 = anchor_points - lt x2y2 = anchor_points + rb if xywh: c_xy = (x1y1 + x2y2) / 2 wh = x2y2 - x1y1 return torch.cat((c_xy, wh), dim) # xywh bbox return torch.cat((x1y1, x2y2), dim) # xyxy bbox def bbox2dist(anchor_points, bbox, reg_max): """Transform bbox(xyxy) to dist(ltrb).""" x1y1, x2y2 = torch.split(bbox, 2, -1) return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) ================================================ FILE: utils/segment/tal/assigner.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from utils.metrics import bbox_iou def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): """select the positive anchor center in gt Args: xy_centers (Tensor): shape(h*w, 4) gt_bboxes (Tensor): shape(b, n_boxes, 4) Return: (Tensor): shape(b, n_boxes, h*w) """ n_anchors = xy_centers.shape[0] bs, n_boxes, _ = gt_bboxes.shape lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) return bbox_deltas.amin(3).gt_(eps) def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): """if an anchor box is assigned to multiple gts, the one with the highest iou will be selected. Args: mask_pos (Tensor): shape(b, n_max_boxes, h*w) overlaps (Tensor): shape(b, n_max_boxes, h*w) Return: target_gt_idx (Tensor): shape(b, h*w) fg_mask (Tensor): shape(b, h*w) mask_pos (Tensor): shape(b, n_max_boxes, h*w) """ # (b, n_max_boxes, h*w) -> (b, h*w) fg_mask = mask_pos.sum(-2) if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) max_overlaps_idx = overlaps.argmax(1) # (b, h*w) is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) fg_mask = mask_pos.sum(-2) # find each grid serve which gt(index) target_gt_idx = mask_pos.argmax(-2) # (b, h*w) return target_gt_idx, fg_mask, mask_pos class TaskAlignedAssigner(nn.Module): def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): super().__init__() self.topk = topk self.num_classes = num_classes self.bg_idx = num_classes self.alpha = alpha self.beta = beta self.eps = eps @torch.no_grad() def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): """This code referenced to https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py Args: pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) anc_points (Tensor): shape(num_total_anchors, 2) gt_labels (Tensor): shape(bs, n_max_boxes, 1) gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) mask_gt (Tensor): shape(bs, n_max_boxes, 1) Returns: target_labels (Tensor): shape(bs, num_total_anchors) target_bboxes (Tensor): shape(bs, num_total_anchors, 4) target_scores (Tensor): shape(bs, num_total_anchors, num_classes) fg_mask (Tensor): shape(bs, num_total_anchors) """ self.bs = pd_scores.size(0) self.n_max_boxes = gt_bboxes.size(1) if self.n_max_boxes == 0: device = gt_bboxes.device return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), torch.zeros_like(pd_bboxes).to(device), torch.zeros_like(pd_scores).to(device), torch.zeros_like(pd_scores[..., 0]).to(device), torch.zeros_like(pd_scores[..., 0]).to(device)) mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt) target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) # assigned target target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) # normalize align_metric *= mask_pos pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) target_scores = target_scores * norm_align_metric return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): # get anchor_align metric, (b, max_num_obj, h*w) align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) # get in_gts mask, (b, max_num_obj, h*w) mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) # get topk_metric mask, (b, max_num_obj, h*w) mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) # merge all mask to a final mask, (b, max_num_obj, h*w) mask_pos = mask_topk * mask_in_gts * mask_gt return mask_pos, align_metric, overlaps def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj ind[1] = gt_labels.squeeze(-1) # b, max_num_obj # get the scores of each grid for each gt cls bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) align_metric = bbox_scores.pow(self.alpha) * (overlaps).pow(self.beta) return align_metric, overlaps def select_topk_candidates(self, metrics, largest=True, topk_mask=None): """ Args: metrics: (b, max_num_obj, h*w). topk_mask: (b, max_num_obj, topk) or None """ num_anchors = metrics.shape[-1] # h*w # (b, max_num_obj, topk) topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) if topk_mask is None: topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) # (b, max_num_obj, topk) topk_idxs = torch.where(topk_mask, topk_idxs, 0) # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) # filter invalid bboxes # assigned topk should be unique, this is for dealing with empty labels # since empty labels will generate index `0` through `F.one_hot` # NOTE: but what if the topk_idxs include `0`? is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) return is_in_topk.to(metrics.dtype) def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): """ Args: gt_labels: (b, max_num_obj, 1) gt_bboxes: (b, max_num_obj, 4) target_gt_idx: (b, h*w) fg_mask: (b, h*w) """ # assigned target labels, (b, 1) batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] # assigned target scores target_labels.clamp(0) target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) return target_labels, target_bboxes, target_scores ================================================ FILE: utils/tal/__init__.py ================================================ # init ================================================ FILE: utils/tal/anchor_generator.py ================================================ import torch from utils.general import check_version TORCH_1_10 = check_version(torch.__version__, '1.10.0') def make_anchors(feats, strides, grid_cell_offset=0.5): """Generate anchors from features.""" anchor_points, stride_tensor = [], [] assert feats is not None dtype, device = feats[0].dtype, feats[0].device for i, stride in enumerate(strides): _, _, h, w = feats[i].shape sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) return torch.cat(anchor_points), torch.cat(stride_tensor) def dist2bbox(distance, anchor_points, xywh=True, dim=-1): """Transform distance(ltrb) to box(xywh or xyxy).""" lt, rb = torch.split(distance, 2, dim) x1y1 = anchor_points - lt x2y2 = anchor_points + rb if xywh: c_xy = (x1y1 + x2y2) / 2 wh = x2y2 - x1y1 return torch.cat((c_xy, wh), dim) # xywh bbox return torch.cat((x1y1, x2y2), dim) # xyxy bbox def bbox2dist(anchor_points, bbox, reg_max): """Transform bbox(xyxy) to dist(ltrb).""" x1y1, x2y2 = torch.split(bbox, 2, -1) return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) ================================================ FILE: utils/tal/assigner.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from utils.metrics import bbox_iou def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): """select the positive anchor center in gt Args: xy_centers (Tensor): shape(h*w, 4) gt_bboxes (Tensor): shape(b, n_boxes, 4) Return: (Tensor): shape(b, n_boxes, h*w) """ n_anchors = xy_centers.shape[0] bs, n_boxes, _ = gt_bboxes.shape lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) return bbox_deltas.amin(3).gt_(eps) def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): """if an anchor box is assigned to multiple gts, the one with the highest iou will be selected. Args: mask_pos (Tensor): shape(b, n_max_boxes, h*w) overlaps (Tensor): shape(b, n_max_boxes, h*w) Return: target_gt_idx (Tensor): shape(b, h*w) fg_mask (Tensor): shape(b, h*w) mask_pos (Tensor): shape(b, n_max_boxes, h*w) """ # (b, n_max_boxes, h*w) -> (b, h*w) fg_mask = mask_pos.sum(-2) if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) max_overlaps_idx = overlaps.argmax(1) # (b, h*w) is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) fg_mask = mask_pos.sum(-2) # find each grid serve which gt(index) target_gt_idx = mask_pos.argmax(-2) # (b, h*w) return target_gt_idx, fg_mask, mask_pos class TaskAlignedAssigner(nn.Module): def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): super().__init__() self.topk = topk self.num_classes = num_classes self.bg_idx = num_classes self.alpha = alpha self.beta = beta self.eps = eps @torch.no_grad() def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): """This code referenced to https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py Args: pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) anc_points (Tensor): shape(num_total_anchors, 2) gt_labels (Tensor): shape(bs, n_max_boxes, 1) gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) mask_gt (Tensor): shape(bs, n_max_boxes, 1) Returns: target_labels (Tensor): shape(bs, num_total_anchors) target_bboxes (Tensor): shape(bs, num_total_anchors, 4) target_scores (Tensor): shape(bs, num_total_anchors, num_classes) fg_mask (Tensor): shape(bs, num_total_anchors) """ self.bs = pd_scores.size(0) self.n_max_boxes = gt_bboxes.size(1) if self.n_max_boxes == 0: device = gt_bboxes.device return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), torch.zeros_like(pd_bboxes).to(device), torch.zeros_like(pd_scores).to(device), torch.zeros_like(pd_scores[..., 0]).to(device)) mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt) target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) # assigned target target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) # normalize align_metric *= mask_pos pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) target_scores = target_scores * norm_align_metric return target_labels, target_bboxes, target_scores, fg_mask.bool() def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): # get anchor_align metric, (b, max_num_obj, h*w) align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) # get in_gts mask, (b, max_num_obj, h*w) mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) # get topk_metric mask, (b, max_num_obj, h*w) mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) # merge all mask to a final mask, (b, max_num_obj, h*w) mask_pos = mask_topk * mask_in_gts * mask_gt return mask_pos, align_metric, overlaps def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj ind[1] = gt_labels.squeeze(-1) # b, max_num_obj # get the scores of each grid for each gt cls bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) return align_metric, overlaps def select_topk_candidates(self, metrics, largest=True, topk_mask=None): """ Args: metrics: (b, max_num_obj, h*w). topk_mask: (b, max_num_obj, topk) or None """ num_anchors = metrics.shape[-1] # h*w # (b, max_num_obj, topk) topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) if topk_mask is None: topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) # (b, max_num_obj, topk) topk_idxs = torch.where(topk_mask, topk_idxs, 0) # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) # filter invalid bboxes # assigned topk should be unique, this is for dealing with empty labels # since empty labels will generate index `0` through `F.one_hot` # NOTE: but what if the topk_idxs include `0`? is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) return is_in_topk.to(metrics.dtype) def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): """ Args: gt_labels: (b, max_num_obj, 1) gt_bboxes: (b, max_num_obj, 4) target_gt_idx: (b, h*w) fg_mask: (b, h*w) """ # assigned target labels, (b, 1) batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] # assigned target scores target_labels.clamp(0) target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) return target_labels, target_bboxes, target_scores ================================================ FILE: utils/torch_utils.py ================================================ import math import os import platform import subprocess import time import warnings from contextlib import contextmanager from copy import deepcopy from pathlib import Path import torch import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F from torch.nn.parallel import DistributedDataParallel as DDP from utils.general import LOGGER, check_version, colorstr, file_date, git_describe from utils.lion import Lion LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) try: import thop # for FLOPs computation except ImportError: thop = None # Suppress PyTorch warnings warnings.filterwarnings('ignore', message='User provided device_type of \'cuda\', but CUDA is not available. Disabling') warnings.filterwarnings('ignore', category=UserWarning) def smart_inference_mode(torch_1_9=check_version(torch.__version__, '1.9.0')): # Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator def decorate(fn): return (torch.inference_mode if torch_1_9 else torch.no_grad)()(fn) return decorate def smartCrossEntropyLoss(label_smoothing=0.0): # Returns nn.CrossEntropyLoss with label smoothing enabled for torch>=1.10.0 if check_version(torch.__version__, '1.10.0'): return nn.CrossEntropyLoss(label_smoothing=label_smoothing) if label_smoothing > 0: LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0') return nn.CrossEntropyLoss() def smart_DDP(model): # Model DDP creation with checks assert not check_version(torch.__version__, '1.12.0', pinned=True), \ 'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \ 'Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395' if check_version(torch.__version__, '1.11.0'): return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) else: return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) def reshape_classifier_output(model, n=1000): # Update a TorchVision classification model to class count 'n' if required from models.common import Classify name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module if isinstance(m, Classify): # YOLOv5 Classify() head if m.linear.out_features != n: m.linear = nn.Linear(m.linear.in_features, n) elif isinstance(m, nn.Linear): # ResNet, EfficientNet if m.out_features != n: setattr(model, name, nn.Linear(m.in_features, n)) elif isinstance(m, nn.Sequential): types = [type(x) for x in m] if nn.Linear in types: i = types.index(nn.Linear) # nn.Linear index if m[i].out_features != n: m[i] = nn.Linear(m[i].in_features, n) elif nn.Conv2d in types: i = types.index(nn.Conv2d) # nn.Conv2d index if m[i].out_channels != n: m[i] = nn.Conv2d(m[i].in_channels, n, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None) @contextmanager def torch_distributed_zero_first(local_rank: int): # Decorator to make all processes in distributed training wait for each local_master to do something if local_rank not in [-1, 0]: dist.barrier(device_ids=[local_rank]) yield if local_rank == 0: dist.barrier(device_ids=[0]) def device_count(): # Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows assert platform.system() in ('Linux', 'Windows'), 'device_count() only supported on Linux or Windows' try: cmd = 'nvidia-smi -L | wc -l' if platform.system() == 'Linux' else 'nvidia-smi -L | find /c /v ""' # Windows return int(subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]) except Exception: return 0 def select_device(device='', batch_size=0, newline=True): # device = None or 'cpu' or 0 or '0' or '0,1,2,3' s = f'YOLO 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} ' device = str(device).strip().lower().replace('cuda:', '').replace('none', '') # to string, 'cuda:0' to '0' cpu = device == 'cpu' mps = device == 'mps' # Apple Metal Performance Shaders (MPS) if cpu or mps: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False elif device: # non-cpu device requested os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available() assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \ f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)" if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7 n = len(devices) # device count if n > 1 and batch_size > 0: # check batch_size is divisible by device_count assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' space = ' ' * (len(s) + 1) for i, d in enumerate(devices): p = torch.cuda.get_device_properties(i) s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB arg = 'cuda:0' elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available(): # prefer MPS if available s += 'MPS\n' arg = 'mps' else: # revert to CPU s += 'CPU\n' arg = 'cpu' if not newline: s = s.rstrip() LOGGER.info(s) return torch.device(arg) def time_sync(): # PyTorch-accurate time if torch.cuda.is_available(): torch.cuda.synchronize() return time.time() def profile(input, ops, n=10, device=None): """ YOLOv5 speed/memory/FLOPs profiler Usage: input = torch.randn(16, 3, 640, 640) m1 = lambda x: x * torch.sigmoid(x) m2 = nn.SiLU() profile(input, [m1, m2], n=100) # profile over 100 iterations """ results = [] if not isinstance(device, torch.device): device = select_device(device) print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}" f"{'input':>24s}{'output':>24s}") for x in input if isinstance(input, list) else [input]: x = x.to(device) x.requires_grad = True for m in ops if isinstance(ops, list) else [ops]: m = m.to(device) if hasattr(m, 'to') else m # device m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward try: flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs except Exception: flops = 0 try: for _ in range(n): t[0] = time_sync() y = m(x) t[1] = time_sync() try: _ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() t[2] = time_sync() except Exception: # no backward method # print(e) # for debug t[2] = float('nan') tf += (t[1] - t[0]) * 1000 / n # ms per op forward tb += (t[2] - t[1]) * 1000 / n # ms per op backward mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB) s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}') results.append([p, flops, mem, tf, tb, s_in, s_out]) except Exception as e: print(e) results.append(None) torch.cuda.empty_cache() return results def is_parallel(model): # Returns True if model is of type DP or DDP return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) def de_parallel(model): # De-parallelize a model: returns single-GPU model if model is of type DP or DDP return model.module if is_parallel(model) else model def initialize_weights(model): for m in model.modules(): t = type(m) if t is nn.Conv2d: pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif t is nn.BatchNorm2d: m.eps = 1e-3 m.momentum = 0.03 elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: m.inplace = True def find_modules(model, mclass=nn.Conv2d): # Finds layer indices matching module class 'mclass' return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] def sparsity(model): # Return global model sparsity a, b = 0, 0 for p in model.parameters(): a += p.numel() b += (p == 0).sum() return b / a def prune(model, amount=0.3): # Prune model to requested global sparsity import torch.nn.utils.prune as prune for name, m in model.named_modules(): if isinstance(m, nn.Conv2d): prune.l1_unstructured(m, name='weight', amount=amount) # prune prune.remove(m, 'weight') # make permanent LOGGER.info(f'Model pruned to {sparsity(model):.3g} global sparsity') def fuse_conv_and_bn(conv, bn): # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ fusedconv = nn.Conv2d(conv.in_channels, conv.out_channels, kernel_size=conv.kernel_size, stride=conv.stride, padding=conv.padding, dilation=conv.dilation, groups=conv.groups, bias=True).requires_grad_(False).to(conv.weight.device) # Prepare filters w_conv = conv.weight.clone().view(conv.out_channels, -1) w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) # Prepare spatial bias b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) return fusedconv def model_info(model, verbose=False, imgsz=640): # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] n_p = sum(x.numel() for x in model.parameters()) # number parameters n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients if verbose: print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}") for i, (name, p) in enumerate(model.named_parameters()): name = name.replace('module_list.', '') print('%5g %40s %9s %12g %20s %10.3g %10.3g' % (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) try: # FLOPs p = next(model.parameters()) stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format flops = thop.profile(deepcopy(model), inputs=(im,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float fs = f', {flops * imgsz[0] / stride * imgsz[1] / stride:.1f} GFLOPs' # 640x640 GFLOPs except Exception: fs = '' name = Path(model.yaml_file).stem.replace('yolov5', 'YOLOv5') if hasattr(model, 'yaml_file') else 'Model' LOGGER.info(f"{name} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) # Scales img(bs,3,y,x) by ratio constrained to gs-multiple if ratio == 1.0: return img h, w = img.shape[2:] s = (int(h * ratio), int(w * ratio)) # new size img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize if not same_shape: # pad/crop img h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w)) return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean def copy_attr(a, b, include=(), exclude=()): # Copy attributes from b to a, options to only include [...] and to exclude [...] for k, v in b.__dict__.items(): if (len(include) and k not in include) or k.startswith('_') or k in exclude: continue else: setattr(a, k, v) def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5): # YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay g = [], [], [] # optimizer parameter groups bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() #for v in model.modules(): # for p_name, p in v.named_parameters(recurse=0): # if p_name == 'bias': # bias (no decay) # g[2].append(p) # elif p_name == 'weight' and isinstance(v, bn): # weight (no decay) # g[1].append(p) # else: # g[0].append(p) # weight (with decay) for v in model.modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias (no decay) g[2].append(v.bias) if isinstance(v, bn): # weight (no decay) g[1].append(v.weight) elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) g[0].append(v.weight) if hasattr(v, 'im'): if hasattr(v.im, 'implicit'): g[1].append(v.im.implicit) else: for iv in v.im: g[1].append(iv.implicit) if hasattr(v, 'ia'): if hasattr(v.ia, 'implicit'): g[1].append(v.ia.implicit) else: for iv in v.ia: g[1].append(iv.implicit) if hasattr(v, 'im2'): if hasattr(v.im2, 'implicit'): g[1].append(v.im2.implicit) else: for iv in v.im2: g[1].append(iv.implicit) if hasattr(v, 'ia2'): if hasattr(v.ia2, 'implicit'): g[1].append(v.ia2.implicit) else: for iv in v.ia2: g[1].append(iv.implicit) if hasattr(v, 'im3'): if hasattr(v.im3, 'implicit'): g[1].append(v.im3.implicit) else: for iv in v.im3: g[1].append(iv.implicit) if hasattr(v, 'ia3'): if hasattr(v.ia3, 'implicit'): g[1].append(v.ia3.implicit) else: for iv in v.ia3: g[1].append(iv.implicit) if hasattr(v, 'im4'): if hasattr(v.im4, 'implicit'): g[1].append(v.im4.implicit) else: for iv in v.im4: g[1].append(iv.implicit) if hasattr(v, 'ia4'): if hasattr(v.ia4, 'implicit'): g[1].append(v.ia4.implicit) else: for iv in v.ia4: g[1].append(iv.implicit) if hasattr(v, 'im5'): if hasattr(v.im5, 'implicit'): g[1].append(v.im5.implicit) else: for iv in v.im5: g[1].append(iv.implicit) if hasattr(v, 'ia5'): if hasattr(v.ia5, 'implicit'): g[1].append(v.ia5.implicit) else: for iv in v.ia5: g[1].append(iv.implicit) if hasattr(v, 'im6'): if hasattr(v.im6, 'implicit'): g[1].append(v.im6.implicit) else: for iv in v.im6: g[1].append(iv.implicit) if hasattr(v, 'ia6'): if hasattr(v.ia6, 'implicit'): g[1].append(v.ia6.implicit) else: for iv in v.ia6: g[1].append(iv.implicit) if hasattr(v, 'im7'): if hasattr(v.im7, 'implicit'): g[1].append(v.im7.implicit) else: for iv in v.im7: g[1].append(iv.implicit) if hasattr(v, 'ia7'): if hasattr(v.ia7, 'implicit'): g[1].append(v.ia7.implicit) else: for iv in v.ia7: g[1].append(iv.implicit) if name == 'Adam': optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999)) # adjust beta1 to momentum elif name == 'AdamW': optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0, amsgrad=True) elif name == 'RMSProp': optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum) elif name == 'SGD': optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True) elif name == 'LION': optimizer = Lion(g[2], lr=lr, betas=(momentum, 0.99), weight_decay=0.0) else: raise NotImplementedError(f'Optimizer {name} not implemented.') optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights) LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}) with parameter groups " f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias") return optimizer def smart_hub_load(repo='ultralytics/yolov5', model='yolov5s', **kwargs): # YOLOv5 torch.hub.load() wrapper with smart error/issue handling if check_version(torch.__version__, '1.9.1'): kwargs['skip_validation'] = True # validation causes GitHub API rate limit errors if check_version(torch.__version__, '1.12.0'): kwargs['trust_repo'] = True # argument required starting in torch 0.12 try: return torch.hub.load(repo, model, **kwargs) except Exception: return torch.hub.load(repo, model, force_reload=True, **kwargs) def smart_resume(ckpt, optimizer, ema=None, weights='yolov5s.pt', epochs=300, resume=True): # Resume training from a partially trained checkpoint best_fitness = 0.0 start_epoch = ckpt['epoch'] + 1 if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) # optimizer best_fitness = ckpt['best_fitness'] if ema and ckpt.get('ema'): ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) # EMA ema.updates = ckpt['updates'] if resume: assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.\n' \ f"Start a new training without --resume, i.e. 'python train.py --weights {weights}'" LOGGER.info(f'Resuming training from {weights} from epoch {start_epoch} to {epochs} total epochs') if epochs < start_epoch: LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.") epochs += ckpt['epoch'] # finetune additional epochs return best_fitness, start_epoch, epochs class EarlyStopping: # YOLOv5 simple early stopper def __init__(self, patience=30): self.best_fitness = 0.0 # i.e. mAP self.best_epoch = 0 self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop self.possible_stop = False # possible stop may occur next epoch def __call__(self, epoch, fitness): if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training self.best_epoch = epoch self.best_fitness = fitness delta = epoch - self.best_epoch # epochs without improvement self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch stop = delta >= self.patience # stop training if patience exceeded if stop: LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. ' f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n' f'To update EarlyStopping(patience={self.patience}) pass a new patience value, ' f'i.e. `python train.py --patience 300` or use `--patience 0` to disable EarlyStopping.') return stop class ModelEMA: """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models Keeps a moving average of everything in the model state_dict (parameters and buffers) For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage """ def __init__(self, model, decay=0.9999, tau=2000, updates=0): # Create EMA self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA self.updates = updates # number of EMA updates self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs) for p in self.ema.parameters(): p.requires_grad_(False) def update(self, model): # Update EMA parameters self.updates += 1 d = self.decay(self.updates) msd = de_parallel(model).state_dict() # model state_dict for k, v in self.ema.state_dict().items(): if v.dtype.is_floating_point: # true for FP16 and FP32 v *= d v += (1 - d) * msd[k].detach() # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32' def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): # Update EMA attributes copy_attr(self.ema, model, include, exclude) ================================================ FILE: utils/triton.py ================================================ import typing from urllib.parse import urlparse import torch class TritonRemoteModel: """ A wrapper over a model served by the Triton Inference Server. It can be configured to communicate over GRPC or HTTP. It accepts Torch Tensors as input and returns them as outputs. """ def __init__(self, url: str): """ Keyword arguments: url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000 """ parsed_url = urlparse(url) if parsed_url.scheme == "grpc": from tritonclient.grpc import InferenceServerClient, InferInput self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client model_repository = self.client.get_model_repository_index() self.model_name = model_repository.models[0].name self.metadata = self.client.get_model_metadata(self.model_name, as_json=True) def create_input_placeholders() -> typing.List[InferInput]: return [ InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] else: from tritonclient.http import InferenceServerClient, InferInput self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client model_repository = self.client.get_model_repository_index() self.model_name = model_repository[0]['name'] self.metadata = self.client.get_model_metadata(self.model_name) def create_input_placeholders() -> typing.List[InferInput]: return [ InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] self._create_input_placeholders_fn = create_input_placeholders @property def runtime(self): """Returns the model runtime""" return self.metadata.get("backend", self.metadata.get("platform")) def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]: """ Invokes the model. Parameters can be provided via args or kwargs. args, if provided, are assumed to match the order of inputs of the model. kwargs are matched with the model input names. """ inputs = self._create_inputs(*args, **kwargs) response = self.client.infer(model_name=self.model_name, inputs=inputs) result = [] for output in self.metadata['outputs']: tensor = torch.as_tensor(response.as_numpy(output['name'])) result.append(tensor) return result[0] if len(result) == 1 else result def _create_inputs(self, *args, **kwargs): args_len, kwargs_len = len(args), len(kwargs) if not args_len and not kwargs_len: raise RuntimeError("No inputs provided.") if args_len and kwargs_len: raise RuntimeError("Cannot specify args and kwargs at the same time") placeholders = self._create_input_placeholders_fn() if args_len: if args_len != len(placeholders): raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.") for input, value in zip(placeholders, args): input.set_data_from_numpy(value.cpu().numpy()) else: for input in placeholders: value = kwargs[input.name] input.set_data_from_numpy(value.cpu().numpy()) return placeholders ================================================ FILE: val.py ================================================ import argparse import json import os import sys from pathlib import Path import numpy as np import torch from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_boxes, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, ap_per_class, box_iou from utils.plots import output_to_target, plot_images, plot_val_study from utils.torch_utils import select_device, smart_inference_mode def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') def save_one_json(predn, jdict, path, class_map): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(predn.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': class_map[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) def process_batch(detections, labels, iouv): """ Return correct prediction matrix Arguments: detections (array[N, 6]), x1, y1, x2, y2, conf, class labels (array[M, 5]), class, x1, y1, x2, y2 Returns: correct (array[N, 10]), for 10 IoU levels """ correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) iou = box_iou(labels[:, 1:], detections[:, :4]) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(iouv)): x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.7, # NMS IoU threshold max_det=300, # maximum detections per image task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file project=ROOT / 'runs/val', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference min_items=0, # Experimental model=None, dataloader=None, save_dir=Path(''), plots=True, callbacks=Callbacks(), compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA if engine: batch_size = model.batch_size else: device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') # Data data = check_dataset(data) # check # Configure model.eval() cuda = device.type != 'cpu' #is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data ncm = model.model.nc assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad, rect = (0.0, False) if task == 'speed' else (0.5, pt) # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=rect, workers=workers, min_items=opt.min_items, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = model.names if hasattr(model, 'names') else model.module.names # get class names if isinstance(names, (list, tuple)): # old format names = dict(enumerate(names)) class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95') tp, fp, p, r, f1, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 dt = Profile(), Profile(), Profile() # profiling times loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar for batch_i, (im, targets, paths, shapes) in enumerate(pbar): callbacks.run('on_val_batch_start') with dt[0]: if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width # Inference with dt[1]: preds, train_out = model(im) if compute_loss else (model(im, augment=augment), None) # Loss if compute_loss: loss += compute_loss(train_out, targets)[1] # box, obj, cls # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: preds = non_max_suppression(preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det) # Metrics for si, pred in enumerate(preds): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] correct = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init seen += 1 if npr == 0: if nl: stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0])) if plots: confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) if plots: confusion_matrix.process_batch(predn, labelsn) stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, preds) # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(int), minlength=nc) # number of targets per class # Print results pf = '%22s' + '%11i' * 2 + '%11.3g' * 4 # print format LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) if nt.sum() == 0: LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels') # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) callbacks.run('on_val_end', nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements('pycocotools') from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model path(s)') parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.7, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--verbose', action='store_true', help='report mAP by class') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--min-items', type=int, default=0, help='Experimental') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML opt.save_json |= opt.data.endswith('coco.yaml') opt.save_txt |= opt.save_hybrid print_args(vars(opt)) return opt def main(opt): #check_requirements(exclude=('tensorboard', 'thop')) if opt.task in ('train', 'val', 'test'): # run normally if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 LOGGER.info(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results') if opt.save_hybrid: LOGGER.info('WARNING ⚠️ --save-hybrid will return high mAP from hybrid labels, not from predictions alone') run(**vars(opt)) else: weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] opt.half = torch.cuda.is_available() and opt.device != 'cpu' # FP16 for fastest results if opt.task == 'speed': # speed benchmarks # python val.py --task speed --data coco.yaml --batch 1 --weights yolo.pt... opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False for opt.weights in weights: run(**vars(opt), plots=False) elif opt.task == 'study': # speed vs mAP benchmarks # python val.py --task study --data coco.yaml --iou 0.7 --weights yolo.pt... for opt.weights in weights: f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis for opt.imgsz in x: # img-size LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') r, _, t = run(**vars(opt), plots=False) y.append(r + t) # results and times np.savetxt(f, y, fmt='%10.4g') # save os.system('zip -r study.zip study_*.txt') plot_val_study(x=x) # plot if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: val_dual.py ================================================ import argparse import json import os import sys from pathlib import Path import numpy as np import torch from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_boxes, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, ap_per_class, box_iou from utils.plots import output_to_target, plot_images, plot_val_study from utils.torch_utils import select_device, smart_inference_mode def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') def save_one_json(predn, jdict, path, class_map): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(predn.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': class_map[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) def process_batch(detections, labels, iouv): """ Return correct prediction matrix Arguments: detections (array[N, 6]), x1, y1, x2, y2, conf, class labels (array[M, 5]), class, x1, y1, x2, y2 Returns: correct (array[N, 10]), for 10 IoU levels """ correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) iou = box_iou(labels[:, 1:], detections[:, :4]) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(iouv)): x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.7, # NMS IoU threshold max_det=300, # maximum detections per image task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file project=ROOT / 'runs/val', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference min_items=0, # Experimental model=None, dataloader=None, save_dir=Path(''), plots=True, callbacks=Callbacks(), compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA if engine: batch_size = model.batch_size else: device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') # Data data = check_dataset(data) # check # Configure model.eval() cuda = device.type != 'cpu' #is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data ncm = model.model.nc assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad, rect = (0.0, False) if task == 'speed' else (0.5, pt) # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=rect, workers=workers, min_items=opt.min_items, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = model.names if hasattr(model, 'names') else model.module.names # get class names if isinstance(names, (list, tuple)): # old format names = dict(enumerate(names)) class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95') tp, fp, p, r, f1, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 dt = Profile(), Profile(), Profile() # profiling times loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar for batch_i, (im, targets, paths, shapes) in enumerate(pbar): callbacks.run('on_val_batch_start') with dt[0]: if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width # Inference with dt[1]: preds, train_out = model(im) if compute_loss else (model(im, augment=augment), None) # Loss if compute_loss: preds = preds[1] #train_out = train_out[1] #loss += compute_loss(train_out, targets)[1] # box, obj, cls else: preds = preds[0][1] # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: preds = non_max_suppression(preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det) # Metrics for si, pred in enumerate(preds): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] correct = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init seen += 1 if npr == 0: if nl: stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0])) if plots: confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) if plots: confusion_matrix.process_batch(predn, labelsn) stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, preds) # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(int), minlength=nc) # number of targets per class # Print results pf = '%22s' + '%11i' * 2 + '%11.3g' * 4 # print format LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) if nt.sum() == 0: LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels') # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) callbacks.run('on_val_end', nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements('pycocotools') from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model path(s)') parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.7, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--verbose', action='store_true', help='report mAP by class') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--min-items', type=int, default=0, help='Experimental') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML opt.save_json |= opt.data.endswith('coco.yaml') opt.save_txt |= opt.save_hybrid print_args(vars(opt)) return opt def main(opt): #check_requirements(exclude=('tensorboard', 'thop')) if opt.task in ('train', 'val', 'test'): # run normally if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 LOGGER.info(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results') if opt.save_hybrid: LOGGER.info('WARNING ⚠️ --save-hybrid will return high mAP from hybrid labels, not from predictions alone') run(**vars(opt)) else: weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] opt.half = torch.cuda.is_available() and opt.device != 'cpu' # FP16 for fastest results if opt.task == 'speed': # speed benchmarks # python val.py --task speed --data coco.yaml --batch 1 --weights yolo.pt... opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False for opt.weights in weights: run(**vars(opt), plots=False) elif opt.task == 'study': # speed vs mAP benchmarks # python val.py --task study --data coco.yaml --iou 0.7 --weights yolo.pt... for opt.weights in weights: f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis for opt.imgsz in x: # img-size LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') r, _, t = run(**vars(opt), plots=False) y.append(r + t) # results and times np.savetxt(f, y, fmt='%10.4g') # save os.system('zip -r study.zip study_*.txt') plot_val_study(x=x) # plot if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: val_triple.py ================================================ import argparse import json import os import sys from pathlib import Path import numpy as np import torch from tqdm import tqdm FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLO root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import DetectMultiBackend from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_boxes, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, ap_per_class, box_iou from utils.plots import output_to_target, plot_images, plot_val_study from utils.torch_utils import select_device, smart_inference_mode def save_one_txt(predn, save_conf, shape, file): # Save one txt result gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(file, 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') def save_one_json(predn, jdict, path, class_map): # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236} image_id = int(path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(predn.tolist(), box.tolist()): jdict.append({ 'image_id': image_id, 'category_id': class_map[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) def process_batch(detections, labels, iouv): """ Return correct prediction matrix Arguments: detections (array[N, 6]), x1, y1, x2, y2, conf, class labels (array[M, 5]), class, x1, y1, x2, y2 Returns: correct (array[N, 10]), for 10 IoU levels """ correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) iou = box_iou(labels[:, 1:], detections[:, :4]) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(iouv)): x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() def run( data, weights=None, # model.pt path(s) batch_size=32, # batch size imgsz=640, # inference size (pixels) conf_thres=0.001, # confidence threshold iou_thres=0.7, # NMS IoU threshold max_det=300, # maximum detections per image task='val', # train, val, test, speed or study device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu workers=8, # max dataloader workers (per RANK in DDP mode) single_cls=False, # treat as single-class dataset augment=False, # augmented inference verbose=False, # verbose output save_txt=False, # save results to *.txt save_hybrid=False, # save label+prediction hybrid results to *.txt save_conf=False, # save confidences in --save-txt labels save_json=False, # save a COCO-JSON results file project=ROOT / 'runs/val', # save to project/name name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference min_items=0, # Experimental model=None, dataloader=None, save_dir=Path(''), plots=True, callbacks=Callbacks(), compute_loss=None, ): # Initialize/load model and set device training = model is not None if training: # called by train.py device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_img_size(imgsz, s=stride) # check image size half = model.fp16 # FP16 supported on limited backends with CUDA if engine: batch_size = model.batch_size else: device = model.device if not (pt or jit): batch_size = 1 # export.py models default to batch-size 1 LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') # Data data = check_dataset(data) # check # Configure model.eval() cuda = device.type != 'cpu' #is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'coco{os.sep}val2017.txt') # COCO dataset is_coco = isinstance(data.get('val'), str) and data['val'].endswith(f'val2017.txt') # COCO dataset nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() # Dataloader if not training: if pt and not single_cls: # check --weights are trained on --data ncm = model.model.nc assert ncm == nc, f'{weights} ({ncm} classes) trained on different --data than what you passed ({nc} ' \ f'classes). Pass correct combination of --weights and --data that are trained together.' model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup pad, rect = (0.0, False) if task == 'speed' else (0.5, pt) # square inference for benchmarks task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=rect, workers=workers, min_items=opt.min_items, prefix=colorstr(f'{task}: '))[0] seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) names = model.names if hasattr(model, 'names') else model.module.names # get class names if isinstance(names, (list, tuple)): # old format names = dict(enumerate(names)) class_map = coco80_to_coco91_class() if is_coco else list(range(1000)) s = ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'P', 'R', 'mAP50', 'mAP50-95') tp, fp, p, r, f1, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 dt = Profile(), Profile(), Profile() # profiling times loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] callbacks.run('on_val_start') pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar for batch_i, (im, targets, paths, shapes) in enumerate(pbar): callbacks.run('on_val_batch_start') with dt[0]: if cuda: im = im.to(device, non_blocking=True) targets = targets.to(device) im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width # Inference with dt[1]: preds, train_out = model(im) if compute_loss else (model(im, augment=augment), None) preds = preds[2] train_out = train_out[2] # Loss #if compute_loss: # loss += compute_loss(train_out, targets)[2] # box, obj, cls # NMS targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling with dt[2]: preds = non_max_suppression(preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det) # Metrics for si, pred in enumerate(preds): labels = targets[targets[:, 0] == si, 1:] nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions path, shape = Path(paths[si]), shapes[si][0] correct = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init seen += 1 if npr == 0: if nl: stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0])) if plots: confusion_matrix.process_batch(detections=None, labels=labels[:, 0]) continue # Predictions if single_cls: pred[:, 5] = 0 predn = pred.clone() scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) if plots: confusion_matrix.process_batch(predn, labelsn) stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) # Save/log if save_txt: save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: plot_images(im, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names) # labels plot_images(im, output_to_target(preds), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names) # pred callbacks.run('on_val_batch_end', batch_i, im, targets, paths, shapes, preds) # Compute metrics stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(int), minlength=nc) # number of targets per class # Print results pf = '%22s' + '%11i' * 2 + '%11.3g' * 4 # print format LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) if nt.sum() == 0: LOGGER.warning(f'WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels') # Print results per class if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats): for i, c in enumerate(ap_class): LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image if not training: shape = (batch_size, 3, imgsz, imgsz) LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}' % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) callbacks.run('on_val_end', nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights anno_json = str(Path(data.get('path', '../coco')) / 'annotations/instances_val2017.json') # annotations json pred_json = str(save_dir / f"{w}_predictions.json") # predictions json LOGGER.info(f'\nEvaluating pycocotools mAP... saving {pred_json}...') with open(pred_json, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements('pycocotools') from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval anno = COCO(anno_json) # init annotations api pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) except Exception as e: LOGGER.info(f'pycocotools unable to run: {e}') # Return results model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}") maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='dataset.yaml path') parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model path(s)') parser.add_argument('--batch-size', type=int, default=32, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.001, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.7, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=300, help='maximum detections per image') parser.add_argument('--task', default='val', help='train, val, test, speed or study') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--verbose', action='store_true', help='report mAP by class') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-json', action='store_true', help='save a COCO-JSON results file') parser.add_argument('--project', default=ROOT / 'runs/val', help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') parser.add_argument('--min-items', type=int, default=0, help='Experimental') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML opt.save_json |= opt.data.endswith('coco.yaml') opt.save_txt |= opt.save_hybrid print_args(vars(opt)) return opt def main(opt): #check_requirements(exclude=('tensorboard', 'thop')) if opt.task in ('train', 'val', 'test'): # run normally if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 LOGGER.info(f'WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results') if opt.save_hybrid: LOGGER.info('WARNING ⚠️ --save-hybrid will return high mAP from hybrid labels, not from predictions alone') run(**vars(opt)) else: weights = opt.weights if isinstance(opt.weights, list) else [opt.weights] opt.half = torch.cuda.is_available() and opt.device != 'cpu' # FP16 for fastest results if opt.task == 'speed': # speed benchmarks # python val.py --task speed --data coco.yaml --batch 1 --weights yolo.pt... opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False for opt.weights in weights: run(**vars(opt), plots=False) elif opt.task == 'study': # speed vs mAP benchmarks # python val.py --task study --data coco.yaml --iou 0.7 --weights yolo.pt... for opt.weights in weights: f = f'study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt' # filename to save to x, y = list(range(256, 1536 + 128, 128)), [] # x axis (image sizes), y axis for opt.imgsz in x: # img-size LOGGER.info(f'\nRunning {f} --imgsz {opt.imgsz}...') r, _, t = run(**vars(opt), plots=False) y.append(r + t) # results and times np.savetxt(f, y, fmt='%10.4g') # save os.system('zip -r study.zip study_*.txt') plot_val_study(x=x) # plot if __name__ == "__main__": opt = parse_opt() main(opt)