Repository: RizwanMunawar/yolov8-object-tracking Branch: main Commit: 3606d19c21ef Files: 77 Total size: 639.3 KB Directory structure: gitextract_66agq4x4/ ├── LICENSE ├── README.md ├── __init__.py ├── models/ │ └── v8/ │ ├── yolov8l.yaml │ ├── yolov8m.yaml │ ├── yolov8n.yaml │ ├── yolov8s.yaml │ ├── yolov8x.yaml │ └── yolov8x6.yaml ├── nn/ │ ├── __init__.py │ ├── autobackend.py │ ├── modules.py │ └── tasks.py ├── requirements.txt └── yolo/ ├── cli.py ├── configs/ │ ├── __init__.py │ ├── default.yaml │ └── hydra_patch.py ├── data/ │ ├── __init__.py │ ├── augment.py │ ├── base.py │ ├── build.py │ ├── dataloaders/ │ │ ├── __init__.py │ │ ├── stream_loaders.py │ │ ├── v5augmentations.py │ │ └── v5loader.py │ ├── dataset.py │ ├── dataset_wrappers.py │ ├── datasets/ │ │ ├── Argoverse.yaml │ │ ├── GlobalWheat2020.yaml │ │ ├── ImageNet.yaml │ │ ├── Objects365.yaml │ │ ├── SKU-110K.yaml │ │ ├── VOC.yaml │ │ ├── VisDrone.yaml │ │ ├── coco.yaml │ │ ├── coco128-seg.yaml │ │ ├── coco128.yaml │ │ └── xView.yaml │ ├── scripts/ │ │ ├── download_weights.sh │ │ ├── get_coco.sh │ │ ├── get_coco128.sh │ │ └── get_imagenet.sh │ └── utils.py ├── engine/ │ ├── __init__.py │ ├── exporter.py │ ├── model.py │ ├── predictor.py │ ├── sort.py │ ├── trainer.py │ └── validator.py ├── utils/ │ ├── __init__.py │ ├── autobatch.py │ ├── callbacks/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── clearml.py │ │ ├── comet.py │ │ ├── hub.py │ │ └── tensorboard.py │ ├── checks.py │ ├── dist.py │ ├── downloads.py │ ├── files.py │ ├── instance.py │ ├── loss.py │ ├── metrics.py │ ├── ops.py │ ├── plotting.py │ ├── tal.py │ └── torch_utils.py └── v8/ ├── __init__.py └── detect/ ├── __init__.py ├── detect_and_trk.py ├── predict.py ├── sort.py ├── train.py └── val.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Remote Network Interaction; Use with the GNU General Public License. Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . ================================================ FILE: README.md ================================================ # yolov8-object-tracking This is compatible only with `ultralytics==8.0.0`. However, I highly recommend using the latest version of the Ultralytics package and referring to the official Ultralytics codebase here: [GitHub Repository](https://github.com/ultralytics/ultralytics/). [![Static Badge](https://img.shields.io/badge/yolov8-blog-blue)](https://muhammadrizwanmunawar.medium.com/train-yolov8-on-custom-data-6d28cd348262) ### Steps to run Code - Clone the repository ```bash https://github.com/RizwanMunawar/yolov8-object-tracking.git ``` - Move to the cloned folder ```bash cd yolov8-object-tracking ``` - Install the ultralytics package ```bash pip install ultralytics==8.0.0 ``` - Do tracking with the mentioned command below ```bash #video file python yolo\v8\detect\detect_and_trk.py model=yolov8s.pt source="test.mp4" show=True #imagefile python yolo\v8\detect\detect_and_trk.py model=yolov8m.pt source="path to image" #Webcam python yolo\v8\detect\detect_and_trk.py model=yolov8m.pt source=0 show=True #External Camera python yolo\v8\detect\detect_and_trk.py model=yolov8m.pt source=1 show=True ``` - Output file will be created in the `runs/detect/train` with the original filename ### Results 📊
YOLOv8s Object Tracking YOLOv8m Object Tracking
### Star History [![Star History Chart](https://api.star-history.com/svg?repos=RizwanMunawar/yolov8-object-tracking&type=date&legend=top-left)](https://www.star-history.com/#RizwanMunawar/yolov8-object-tracking&type=date&legend=top-left) ### References 🔗 - 🔗 https://github.com/ultralytics/ultralytics - 🔗 https://github.com/abewley/sort - 🔗 https://docs.ultralytics.com/ **Some of my articles/research papers | Computer vision awesome resources for learning | How do I appear to the world? 🚀** | Article Title & Link | Published Date | |-----------------------|----------------| | [Ultralytics YOLO11: Object Detection and Instance Segmentation🤯](https://muhammadrizwanmunawar.medium.com/ultralytics-yolo11-object-detection-and-instance-segmentation-88ef0239a811) | ![Published Date](https://img.shields.io/badge/published_Date-2024--10--27-brightgreen) | | [Parking Management using Ultralytics YOLO11](https://muhammadrizwanmunawar.medium.com/parking-management-using-ultralytics-yolo11-fba4c6bc62bc) | ![Published Date](https://img.shields.io/badge/published_Date-2024--11--10-brightgreen) | | [My 🖐️Computer Vision Hobby Projects that Yielded Earnings](https://muhammadrizwanmunawar.medium.com/my-️computer-vision-hobby-projects-that-yielded-earnings-7923c9b9eead) | ![Published Date](https://img.shields.io/badge/published_Date-2023--09--10-brightgreen) | | [Best Resources to Learn Computer Vision](https://muhammadrizwanmunawar.medium.com/best-resources-to-learn-computer-vision-311352ed0833) | ![Published Date](https://img.shields.io/badge/published_Date-2023--06--30-brightgreen) | | [Roadmap for Computer Vision Engineer](https://medium.com/augmented-startups/roadmap-for-computer-vision-engineer-45167b94518c) | ![Published Date](https://img.shields.io/badge/published_Date-2022--08--07-brightgreen) | | [How did I spend 2022 in the Computer Vision Field](https://www.linkedin.com/pulse/how-did-i-spend-2022-computer-vision-field-muhammad-rizwan-munawar) | ![Published Date](https://img.shields.io/badge/published_Date-2022--12--20-brightgreen) | | [Domain Feature Mapping with YOLOv7 for Automated Edge-Based Pallet Racking Inspections](https://www.mdpi.com/1424-8220/22/18/6927) | ![Published Date](https://img.shields.io/badge/published_Date-2022--09--13-brightgreen) | | [Exudate Regeneration for Automated Exudate Detection in Retinal Fundus Images](https://ieeexplore.ieee.org/document/9885192) | ![Published Date](https://img.shields.io/badge/published_Date-2022--09--12-brightgreen) | | [Feature Mapping for Rice Leaf Defect Detection Based on a Custom Convolutional Architecture](https://www.mdpi.com/2304-8158/11/23/3914) | ![Published Date](https://img.shields.io/badge/published_Date-2022--12--04-brightgreen) | | [Yolov5, Yolo-x, Yolo-r, Yolov7 Performance Comparison: A Survey](https://aircconline.com/csit/papers/vol12/csit121602.pdf) | ![Published Date](https://img.shields.io/badge/published_Date-2022--09--24-brightgreen) | | [Explainable AI in Drug Sensitivity Prediction on Cancer Cell Lines](https://ieeexplore.ieee.org/document/9922931) | ![Published Date](https://img.shields.io/badge/published_Date-2022--09--23-brightgreen) | | [Train YOLOv8 on Custom Data](https://medium.com/augmented-startups/train-yolov8-on-custom-data-6d28cd348262) | ![Published Date](https://img.shields.io/badge/published_Date-2022--09--23-brightgreen) | **More Information** For more details, you can reach out to me on [Medium](https://muhammadrizwanmunawar.medium.com/) or connect with me on [LinkedIn](https://www.linkedin.com/in/muhammadrizwanmunawar/) ================================================ FILE: __init__.py ================================================ from hub import checks from engine.model import YOLO from utils import ops from . import v8 ================================================ FILE: models/v8/yolov8l.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Parameters nc: 80 # number of classes depth_multiple: 1.00 # scales module repeats width_multiple: 1.00 # scales convolution channels # YOLOv8.0l backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [512, True]] - [-1, 1, SPPF, [512, 5]] # 9 # YOLOv8.0l head head: - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2f, [512]] # 13 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2f, [256]] # 17 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 12], 1, Concat, [1]] # cat head P4 - [-1, 3, C2f, [512]] # 20 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 9], 1, Concat, [1]] # cat head P5 - [-1, 3, C2f, [512]] # 23 (P5/32-large) - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) ================================================ FILE: models/v8/yolov8m.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Parameters nc: 80 # number of classes depth_multiple: 0.67 # scales module repeats width_multiple: 0.75 # scales convolution channels # YOLOv8.0m backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [768, True]] - [-1, 1, SPPF, [768, 5]] # 9 # YOLOv8.0m head head: - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2f, [512]] # 13 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2f, [256]] # 17 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 12], 1, Concat, [1]] # cat head P4 - [-1, 3, C2f, [512]] # 20 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 9], 1, Concat, [1]] # cat head P5 - [-1, 3, C2f, [768]] # 23 (P5/32-large) - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) ================================================ FILE: models/v8/yolov8n.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Parameters nc: 80 # number of classes depth_multiple: 0.33 # scales module repeats width_multiple: 0.25 # scales convolution channels # YOLOv8.0n backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [1024, True]] - [-1, 1, SPPF, [1024, 5]] # 9 # YOLOv8.0n head head: - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2f, [512]] # 13 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2f, [256]] # 17 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 12], 1, Concat, [1]] # cat head P4 - [-1, 3, C2f, [512]] # 20 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 9], 1, Concat, [1]] # cat head P5 - [-1, 3, C2f, [1024]] # 23 (P5/32-large) - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) ================================================ FILE: models/v8/yolov8s.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Parameters nc: 80 # number of classes depth_multiple: 0.33 # scales module repeats width_multiple: 0.50 # scales convolution channels # YOLOv8.0s backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [1024, True]] - [-1, 1, SPPF, [1024, 5]] # 9 # YOLOv8.0s head head: - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2f, [512]] # 13 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2f, [256]] # 17 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 12], 1, Concat, [1]] # cat head P4 - [-1, 3, C2f, [512]] # 20 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 9], 1, Concat, [1]] # cat head P5 - [-1, 3, C2f, [1024]] # 23 (P5/32-large) - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) ================================================ FILE: models/v8/yolov8x.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Parameters nc: 80 # number of classes depth_multiple: 1.00 # scales module repeats width_multiple: 1.25 # scales convolution channels # YOLOv8.0x backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [512, True]] - [-1, 1, SPPF, [512, 5]] # 9 # YOLOv8.0x head head: - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2f, [512]] # 13 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2f, [256]] # 17 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 12], 1, Concat, [1]] # cat head P4 - [-1, 3, C2f, [512]] # 20 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 9], 1, Concat, [1]] # cat head P5 - [-1, 3, C2f, [512]] # 23 (P5/32-large) - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5) ================================================ FILE: models/v8/yolov8x6.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Parameters nc: 80 # number of classes depth_multiple: 1.00 # scales module repeats width_multiple: 1.25 # scales convolution channels # YOLOv8.0x6 backbone backbone: # [from, repeats, module, args] - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2 - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4 - [-1, 3, C2f, [128, True]] - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8 - [-1, 6, C2f, [256, True]] - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16 - [-1, 6, C2f, [512, True]] - [-1, 1, Conv, [512, 3, 2]] # 7-P5/32 - [-1, 3, C2f, [512, True]] - [-1, 1, Conv, [512, 3, 2]] # 9-P6/64 - [-1, 3, C2f, [512, True]] - [-1, 1, SPPF, [512, 5]] # 11 # YOLOv8.0x6 head head: - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 8], 1, Concat, [1]] # cat backbone P5 - [-1, 3, C2, [512, False]] # 14 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 6], 1, Concat, [1]] # cat backbone P4 - [-1, 3, C2, [512, False]] # 17 - [-1, 1, nn.Upsample, [None, 2, 'nearest']] - [[-1, 4], 1, Concat, [1]] # cat backbone P3 - [-1, 3, C2, [256, False]] # 20 (P3/8-small) - [-1, 1, Conv, [256, 3, 2]] - [[-1, 17], 1, Concat, [1]] # cat head P4 - [-1, 3, C2, [512, False]] # 23 (P4/16-medium) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 14], 1, Concat, [1]] # cat head P5 - [-1, 3, C2, [512, False]] # 26 (P5/32-large) - [-1, 1, Conv, [512, 3, 2]] - [[-1, 11], 1, Concat, [1]] # cat head P6 - [-1, 3, C2, [512, False]] # 29 (P6/64-xlarge) - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6) ================================================ FILE: nn/__init__.py ================================================ ================================================ FILE: nn/autobackend.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import json import platform from collections import OrderedDict, namedtuple from pathlib import Path from urllib.parse import urlparse import cv2 import numpy as np import torch import torch.nn as nn from PIL import Image from yolo.utils import LOGGER, ROOT, yaml_load from yolo.utils.checks import check_requirements, check_suffix, check_version from yolo.utils.downloads import attempt_download, is_url from yolo.utils.ops import xywh2xyxy class AutoBackend(nn.Module): def __init__(self, weights='yolov8n.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True): """ Ultralytics YOLO MultiBackend class for python inference on various backends Args: weights: the path to the weights file. Defaults to yolov8n.pt device: The device to run the model on. dnn: If you want to use OpenCV's DNN module to run the inference, set this to True. Defaults to False data: a dictionary containing the following keys: fp16: If true, will use half precision. Defaults to False fuse: whether to fuse the model or not. Defaults to True Supported format and their usage: | Platform | weights | |-----------------------|------------------| | PyTorch | *.pt | | TorchScript | *.torchscript | | ONNX Runtime | *.onnx | | ONNX OpenCV DNN | *.onnx --dnn | | OpenVINO | *.xml | | CoreML | *.mlmodel | | TensorRT | *.engine | | TensorFlow SavedModel | *_saved_model | | TensorFlow GraphDef | *.pb | | TensorFlow Lite | *.tflite | | TensorFlow Edge TPU | *_edgetpu.tflite | | PaddlePaddle | *_paddle_model | """ super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) nn_module = isinstance(weights, torch.nn.Module) pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w) fp16 &= pt or jit or onnx or engine or nn_module # FP16 nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) stride = 32 # default stride cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA if not (pt or triton or nn_module): w = attempt_download(w) # download if not local # NOTE: special case: in-memory pytorch model if nn_module: model = weights.to(device) model = model.fuse() if fuse else model names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() pt = True elif pt: # PyTorch from nn.tasks import attempt_load_weights model = attempt_load_weights(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse) stride = max(int(model.stride.max()), 32) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names model.half() if fp16 else model.float() self.model = model # explicitly assign for to(), cpu(), cuda(), half() elif jit: # TorchScript LOGGER.info(f'Loading {w} for TorchScript inference...') extra_files = {'config.txt': ''} # model metadata model = torch.jit.load(w, _extra_files=extra_files, map_location=device) model.half() if fp16 else model.float() if extra_files['config.txt']: # load metadata dict d = json.loads(extra_files['config.txt'], object_hook=lambda d: {int(k) if k.isdigit() else k: v for k, v in d.items()}) stride, names = int(d['stride']), d['names'] elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements('opencv-python>=4.5.4') net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime LOGGER.info(f'Loading {w} for ONNX Runtime inference...') check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) import onnxruntime providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] session = onnxruntime.InferenceSession(w, providers=providers) output_names = [x.name for x in session.get_outputs()] meta = session.get_modelmeta().custom_metadata_map # metadata if 'stride' in meta: stride, names = int(meta['stride']), eval(meta['names']) elif xml: # OpenVINO LOGGER.info(f'Loading {w} for OpenVINO inference...') check_requirements('openvino') # requires openvino-dev: https://pypi.org/project/openvino-dev/ from openvino.runtime import Core, Layout, get_batch # noqa ie = Core() if not Path(w).is_file(): # if not *.xml w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin')) if network.get_parameters()[0].get_layout().empty: network.get_parameters()[0].set_layout(Layout("NCHW")) batch_dim = get_batch(network) if batch_dim.is_static: batch_size = batch_dim.get_length() executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2 stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata elif engine: # TensorRT LOGGER.info(f'Loading {w} for TensorRT inference...') import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 if device.type == 'cpu': device = torch.device('cuda:0') Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) logger = trt.Logger(trt.Logger.INFO) with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) context = model.create_execution_context() bindings = OrderedDict() output_names = [] fp16 = False # default updated below dynamic = False for i in range(model.num_bindings): name = model.get_binding_name(i) dtype = trt.nptype(model.get_binding_dtype(i)) if model.binding_is_input(i): if -1 in tuple(model.get_binding_shape(i)): # dynamic dynamic = True context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2])) if dtype == np.float16: fp16 = True else: # output output_names.append(name) shape = tuple(context.get_binding_shape(i)) im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr())) binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size elif coreml: # CoreML LOGGER.info(f'Loading {w} for CoreML inference...') import coremltools as ct model = ct.models.MLModel(w) elif saved_model: # TF SavedModel LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...') import tensorflow as tf keras = False # assume TF1 saved_model model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w) elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...') import tensorflow as tf def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped ge = x.graph.as_graph_element return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) def gd_outputs(gd): name_list, input_list = [], [] for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef name_list.append(node.name) input_list.extend(node.input) return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp')) gd = tf.Graph().as_graph_def() # TF GraphDef with open(w, 'rb') as f: gd.ParseFromString(f.read()) frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd)) elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu from tflite_runtime.interpreter import Interpreter, load_delegate except ImportError: import tensorflow as tf Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') delegate = { 'Linux': 'libedgetpu.so.1', 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll'}[platform.system()] interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) else: # TFLite LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') interpreter = Interpreter(model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs elif tfjs: # TF.js raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported') elif paddle: # PaddlePaddle LOGGER.info(f'Loading {w} for PaddlePaddle inference...') check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle') import paddle.inference as pdi if not Path(w).is_file(): # if not *.pdmodel w = next(Path(w).rglob('*.pdmodel')) # get *.xml file from *_openvino_model dir weights = Path(w).with_suffix('.pdiparams') config = pdi.Config(str(w), str(weights)) if cuda: config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0) predictor = pdi.create_predictor(config) input_handle = predictor.get_input_handle(predictor.get_input_names()[0]) output_names = predictor.get_output_names() elif triton: # NVIDIA Triton Inference Server LOGGER.info('Triton Inference Server not supported...') ''' TODO: check_requirements('tritonclient[all]') from utils.triton import TritonRemoteModel model = TritonRemoteModel(url=w) nhwc = model.runtime.startswith("tensorflow") ''' else: raise NotImplementedError(f'ERROR: {w} is not a supported format') # class names if 'names' not in locals(): names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)} if names[0] == 'n01440764' and len(names) == 1000: # ImageNet names = yaml_load(ROOT / 'yolo/data/datasets/ImageNet.yaml')['names'] # human-readable names self.__dict__.update(locals()) # assign all variables to self def forward(self, im, augment=False, visualize=False): """ Runs inference on the given model Args: im: the image tensor augment: whether to augment the image. Defaults to False visualize: if True, then the network will output the feature maps of the last convolutional layer. Defaults to False """ # YOLOv5 MultiBackend inference b, ch, h, w = im.shape # batch, channel, height, width if self.fp16 and im.dtype != torch.float16: im = im.half() # to FP16 if self.nhwc: im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3) if self.pt or self.nn_module: # PyTorch y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im) elif self.jit: # TorchScript y = self.model(im) elif self.dnn: # ONNX OpenCV DNN im = im.cpu().numpy() # torch to numpy self.net.setInput(im) y = self.net.forward() elif self.onnx: # ONNX Runtime im = im.cpu().numpy() # torch to numpy y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) elif self.xml: # OpenVINO im = im.cpu().numpy() # FP32 y = list(self.executable_network([im]).values()) elif self.engine: # TensorRT if self.dynamic and im.shape != self.bindings['images'].shape: i = self.model.get_binding_index('images') self.context.set_binding_shape(i, im.shape) # reshape if dynamic self.bindings['images'] = self.bindings['images']._replace(shape=im.shape) for name in self.output_names: i = self.model.get_binding_index(name) self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i))) s = self.bindings['images'].shape assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}" self.binding_addrs['images'] = int(im.data_ptr()) self.context.execute_v2(list(self.binding_addrs.values())) y = [self.bindings[x].data for x in sorted(self.output_names)] elif self.coreml: # CoreML im = im.cpu().numpy() im = Image.fromarray((im[0] * 255).astype('uint8')) # im = im.resize((192, 320), Image.ANTIALIAS) y = self.model.predict({'image': im}) # coordinates are xywh normalized if 'confidence' in y: box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) else: y = list(reversed(y.values())) # reversed for segmentation models (pred, proto) elif self.paddle: # PaddlePaddle im = im.cpu().numpy().astype(np.float32) self.input_handle.copy_from_cpu(im) self.predictor.run() y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names] elif self.triton: # NVIDIA Triton Inference Server y = self.model(im) else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) im = im.cpu().numpy() if self.saved_model: # SavedModel y = self.model(im, training=False) if self.keras else self.model(im) elif self.pb: # GraphDef y = self.frozen_func(x=self.tf.constant(im)) else: # Lite or Edge TPU input = self.input_details[0] int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model if int8: scale, zero_point = input['quantization'] im = (im / scale + zero_point).astype(np.uint8) # de-scale self.interpreter.set_tensor(input['index'], im) self.interpreter.invoke() y = [] for output in self.output_details: x = self.interpreter.get_tensor(output['index']) if int8: scale, zero_point = output['quantization'] x = (x.astype(np.float32) - zero_point) * scale # re-scale y.append(x) y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y] y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels if isinstance(y, (list, tuple)): return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] else: return self.from_numpy(y) def from_numpy(self, x): """ `from_numpy` converts a numpy array to a tensor Args: x: the numpy array to convert """ return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x def warmup(self, imgsz=(1, 3, 640, 640)): """ Warmup model by running inference once Args: imgsz: the size of the image you want to run inference on. """ warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module if any(warmup_types) and (self.device.type != 'cpu' or self.triton): im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input for _ in range(2 if self.jit else 1): # self.forward(im) # warmup @staticmethod def _model_type(p='path/to/model.pt'): """ This function takes a path to a model file and returns the model type Args: p: path to the model file. Defaults to path/to/model.pt """ # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle] from yolo.engine.exporter import export_formats sf = list(export_formats().Suffix) # export suffixes if not is_url(p, check=False) and not isinstance(p, str): check_suffix(p, sf) # checks url = urlparse(p) # if url may be Triton inference server types = [s in Path(p).name for s in sf] types[8] &= not types[9] # tflite &= not edgetpu triton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc]) return types + [triton] @staticmethod def _load_metadata(f=Path('path/to/meta.yaml')): """ > Loads the metadata from a yaml file Args: f: The path to the metadata file. """ from yolo.utils.files import yaml_load # Load metadata from meta.yaml if it exists if f.exists(): d = yaml_load(f) return d['stride'], d['names'] # assign stride, names return None, None ================================================ FILE: nn/modules.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Common modules """ import math import warnings from copy import copy from pathlib import Path import cv2 import numpy as np import pandas as pd import requests import torch import torch.nn as nn from PIL import Image, ImageOps from torch.cuda import amp from nn.autobackend import AutoBackend from yolo.data.augment import LetterBox from yolo.utils import LOGGER, colorstr from yolo.utils.files import increment_path from yolo.utils.ops import Profile, make_divisible, non_max_suppression, scale_boxes, xyxy2xywh from yolo.utils.plotting import Annotator, colors, save_one_box from yolo.utils.tal import dist2bbox, make_anchors from yolo.utils.torch_utils import copy_attr, smart_inference_mode # from utils.plots import feature_visualization TODO def autopad(k, p=None, d=1): # kernel, padding, dilation # Pad to 'same' shape outputs if d > 1: k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size if p is None: p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad return p class Conv(nn.Module): # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation) default_act = nn.SiLU() # default activation def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() def forward(self, x): return self.act(self.bn(self.conv(x))) def forward_fuse(self, x): return self.act(self.conv(x)) class DWConv(Conv): # Depth-wise convolution def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) class DWConvTranspose2d(nn.ConvTranspose2d): # Depth-wise transpose convolution def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) class ConvTranspose(nn.Module): # Convolution transpose 2d layer default_act = nn.SiLU() # default activation def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True): super().__init__() self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn) self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity() self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() def forward(self, x): return self.act(self.bn(self.conv_transpose(x))) class DFL(nn.Module): # DFL module def __init__(self, c1=16): super().__init__() self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) x = torch.arange(c1, dtype=torch.float) self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1)) self.c1 = c1 def forward(self, x): b, c, a = x.shape # batch, channels, anchors return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a) # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a) class TransformerLayer(nn.Module): # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) def __init__(self, c, num_heads): super().__init__() self.q = nn.Linear(c, c, bias=False) self.k = nn.Linear(c, c, bias=False) self.v = nn.Linear(c, c, bias=False) self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) self.fc1 = nn.Linear(c, c, bias=False) self.fc2 = nn.Linear(c, c, bias=False) def forward(self, x): x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x x = self.fc2(self.fc1(x)) + x return x class TransformerBlock(nn.Module): # Vision Transformer https://arxiv.org/abs/2010.11929 def __init__(self, c1, c2, num_heads, num_layers): super().__init__() self.conv = None if c1 != c2: self.conv = Conv(c1, c2) self.linear = nn.Linear(c2, c2) # learnable position embedding self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers))) self.c2 = c2 def forward(self, x): if self.conv is not None: x = self.conv(x) b, _, w, h = x.shape p = x.flatten(2).permute(2, 0, 1) return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h) class Bottleneck(nn.Module): # Standard bottleneck def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, k[0], 1) self.cv2 = Conv(c_, c2, k[1], 1, g=g) self.add = shortcut and c1 == c2 def forward(self, x): return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) class BottleneckCSP(nn.Module): # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) self.cv4 = Conv(2 * c_, c2, 1, 1) self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) self.act = nn.SiLU() self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) def forward(self, x): y1 = self.cv3(self.m(self.cv1(x))) y2 = self.cv2(x) return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1)))) class C3(nn.Module): # CSP Bottleneck with 3 convolutions def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) class C2(nn.Module): # CSP Bottleneck with 2 convolutions def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() self.c = int(c2 * e) # hidden channels self.cv1 = Conv(c1, 2 * self.c, 1, 1) self.cv2 = Conv(2 * self.c, c2, 1) # optional act=FReLU(c2) # self.attention = ChannelAttention(2 * self.c) # or SpatialAttention() self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))) def forward(self, x): a, b = self.cv1(x).split((self.c, self.c), 1) return self.cv2(torch.cat((self.m(a), b), 1)) class C2f(nn.Module): # CSP Bottleneck with 2 convolutions def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() self.c = int(c2 * e) # hidden channels self.cv1 = Conv(c1, 2 * self.c, 1, 1) self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2) self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)) def forward(self, x): y = list(self.cv1(x).split((self.c, self.c), 1)) y.extend(m(y[-1]) for m in self.m) return self.cv2(torch.cat(y, 1)) class ChannelAttention(nn.Module): # Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet def __init__(self, channels: int) -> None: super().__init__() self.pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True) self.act = nn.Sigmoid() def forward(self, x: torch.Tensor) -> torch.Tensor: return x * self.act(self.fc(self.pool(x))) class SpatialAttention(nn.Module): # Spatial-attention module def __init__(self, kernel_size=7): super().__init__() assert kernel_size in (3, 7), 'kernel size must be 3 or 7' padding = 3 if kernel_size == 7 else 1 self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) self.act = nn.Sigmoid() def forward(self, x): return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1))) class CBAM(nn.Module): # CSP Bottleneck with 3 convolutions def __init__(self, c1, ratio=16, kernel_size=7): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() self.channel_attention = ChannelAttention(c1) self.spatial_attention = SpatialAttention(kernel_size) def forward(self, x): return self.spatial_attention(self.channel_attention(x)) class C1(nn.Module): # CSP Bottleneck with 3 convolutions def __init__(self, c1, c2, n=1): # ch_in, ch_out, number, shortcut, groups, expansion super().__init__() self.cv1 = Conv(c1, c2, 1, 1) self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n))) def forward(self, x): y = self.cv1(x) return self.m(y) + y class C3x(C3): # C3 module with cross-convolutions def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): super().__init__(c1, c2, n, shortcut, g, e) self.c_ = int(c2 * e) self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n))) class C3TR(C3): # C3 module with TransformerBlock() def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): super().__init__(c1, c2, n, shortcut, g, e) c_ = int(c2 * e) self.m = TransformerBlock(c_, c_, 4, n) class C3Ghost(C3): # C3 module with GhostBottleneck() def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): super().__init__(c1, c2, n, shortcut, g, e) c_ = int(c2 * e) # hidden channels self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n))) class SPP(nn.Module): # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 def __init__(self, c1, c2, k=(5, 9, 13)): super().__init__() c_ = c1 // 2 # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) def forward(self, x): x = self.cv1(x) with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) class SPPF(nn.Module): # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) super().__init__() c_ = c1 // 2 # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c_ * 4, c2, 1, 1) self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) def forward(self, x): x = self.cv1(x) with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning y1 = self.m(x) y2 = self.m(y1) return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) class Focus(nn.Module): # Focus wh information into c-space def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups super().__init__() self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act) # self.contract = Contract(gain=2) def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1)) # return self.conv(self.contract(x)) class GhostConv(nn.Module): # Ghost Convolution https://github.com/huawei-noah/ghostnet def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups super().__init__() c_ = c2 // 2 # hidden channels self.cv1 = Conv(c1, c_, k, s, None, g, act=act) self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act) def forward(self, x): y = self.cv1(x) return torch.cat((y, self.cv2(y)), 1) class GhostBottleneck(nn.Module): # Ghost Bottleneck https://github.com/huawei-noah/ghostnet def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride super().__init__() c_ = c2 // 2 self.conv = nn.Sequential( GhostConv(c1, c_, 1, 1), # pw DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw GhostConv(c_, c2, 1, 1, act=False)) # pw-linear self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() def forward(self, x): return self.conv(x) + self.shortcut(x) class Concat(nn.Module): # Concatenate a list of tensors along dimension def __init__(self, dimension=1): super().__init__() self.d = dimension def forward(self, x): return torch.cat(x, self.d) class AutoShape(nn.Module): # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS conf = 0.25 # NMS confidence threshold iou = 0.45 # NMS IoU threshold agnostic = False # NMS class-agnostic multi_label = False # NMS multiple labels per box classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs max_det = 1000 # maximum number of detections per image amp = False # Automatic Mixed Precision (AMP) inference def __init__(self, model, verbose=True): super().__init__() if verbose: LOGGER.info('Adding AutoShape... ') copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes self.dmb = isinstance(model, AutoBackend) # DetectMultiBackend() instance self.pt = not self.dmb or model.pt # PyTorch model self.model = model.eval() if self.pt: m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() m.inplace = False # Detect.inplace=False for safe multithread inference m.export = True # do not output loss values def _apply(self, fn): # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers self = super()._apply(fn) if self.pt: m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() m.stride = fn(m.stride) m.grid = list(map(fn, m.grid)) if isinstance(m.anchor_grid, list): m.anchor_grid = list(map(fn, m.anchor_grid)) return self @smart_inference_mode() def forward(self, ims, size=640, augment=False, profile=False): # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are: # file: ims = 'data/images/zidane.jpg' # str or PosixPath # URI: = 'https://com/images/zidane.jpg' # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) # numpy: = np.zeros((640,1280,3)) # HWC # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images dt = (Profile(), Profile(), Profile()) with dt[0]: if isinstance(size, int): # expand size = (size, size) p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference if isinstance(ims, torch.Tensor): # torch with amp.autocast(autocast): return self.model(ims.to(p.device).type_as(p), augment=augment) # inference # Pre-process n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images shape0, shape1, files = [], [], [] # image and inference shapes, filenames for i, im in enumerate(ims): f = f'image{i}' # filename if isinstance(im, (str, Path)): # filename or uri im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im im = np.asarray(ImageOps.exif_transpose(im)) elif isinstance(im, Image.Image): # PIL Image im, f = np.asarray(ImageOps.exif_transpose(im)), getattr(im, 'filename', f) or f files.append(Path(f).with_suffix('.jpg').name) if im.shape[0] < 5: # image in CHW im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input s = im.shape[:2] # HWC shape0.append(s) # image shape g = max(size) / max(s) # gain shape1.append([y * g for y in s]) ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] if self.pt else size # inf shape x = [LetterBox(shape1, auto=False)(image=im)["img"] for im in ims] # pad x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 with amp.autocast(autocast): # Inference with dt[1]: y = self.model(x, augment=augment) # forward # Post-process with dt[2]: y = non_max_suppression(y if self.dmb else y[0], self.conf, self.iou, self.classes, self.agnostic, self.multi_label, max_det=self.max_det) # NMS for i in range(n): scale_boxes(shape1, y[i][:, :4], shape0[i]) return Detections(ims, y, files, dt, self.names, x.shape) class Detections: # YOLOv5 detections class for inference results def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None): super().__init__() d = pred[0].device # device gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations self.ims = ims # list of images as numpy arrays self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) self.names = names # class names self.files = files # image filenames self.times = times # profiling times self.xyxy = pred # xyxy pixels self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized self.n = len(self.pred) # number of images (batch size) self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms) self.s = tuple(shape) # inference BCHW shape def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')): s, crops = '', [] for i, (im, pred) in enumerate(zip(self.ims, self.pred)): s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string if pred.shape[0]: for c in pred[:, -1].unique(): n = (pred[:, -1] == c).sum() # detections per class s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string s = s.rstrip(', ') if show or save or render or crop: annotator = Annotator(im, example=str(self.names)) for *box, conf, cls in reversed(pred): # xyxy, confidence, class label = f'{self.names[int(cls)]} {conf:.2f}' if crop: file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None crops.append({ 'box': box, 'conf': conf, 'cls': cls, 'label': label, 'im': save_one_box(box, im, file=file, save=save)}) else: # all others annotator.box_label(box, label if labels else '', color=colors(cls)) im = annotator.im else: s += '(no detections)' im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np if show: im.show(self.files[i]) # show if save: f = self.files[i] im.save(save_dir / f) # save if i == self.n - 1: LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") if render: self.ims[i] = np.asarray(im) if pprint: s = s.lstrip('\n') return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t if crop: if save: LOGGER.info(f'Saved results to {save_dir}\n') return crops def show(self, labels=True): self._run(show=True, labels=labels) # show results def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False): save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir self._run(save=True, labels=labels, save_dir=save_dir) # save results def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False): save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None return self._run(crop=True, save=save, save_dir=save_dir) # crop results def render(self, labels=True): self._run(render=True, labels=labels) # render results return self.ims def pandas(self): # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) new = copy(self) # return copy ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) return new def tolist(self): # return a list of Detections objects, i.e. 'for result in results.tolist():' r = range(self.n) # iterable x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r] # for d in x: # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: # setattr(d, k, getattr(d, k)[0]) # pop out of list return x def print(self): LOGGER.info(self.__str__()) def __len__(self): # override len(results) return self.n def __str__(self): # override print(results) return self._run(pprint=True) # print results def __repr__(self): return f'YOLOv5 {self.__class__} instance\n' + self.__str__() class Proto(nn.Module): # YOLOv8 mask Proto module for segmentation models def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks super().__init__() self.cv1 = Conv(c1, c_, k=3) self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True) # nn.Upsample(scale_factor=2, mode='nearest') self.cv2 = Conv(c_, c_, k=3) self.cv3 = Conv(c_, c2) def forward(self, x): return self.cv3(self.cv2(self.upsample(self.cv1(x)))) class Ensemble(nn.ModuleList): # Ensemble of models def __init__(self): super().__init__() def forward(self, x, augment=False, profile=False, visualize=False): y = [module(x, augment, profile, visualize)[0] for module in self] # y = torch.stack(y).max(0)[0] # max ensemble # y = torch.stack(y).mean(0) # mean ensemble y = torch.cat(y, 1) # nms ensemble return y, None # inference, train output # heads class Detect(nn.Module): # YOLOv5 Detect head for detection models dynamic = False # force grid reconstruction export = False # export mode shape = None anchors = torch.empty(0) # init strides = torch.empty(0) # init def __init__(self, nc=80, ch=()): # detection layer super().__init__() self.nc = nc # number of classes self.nl = len(ch) # number of detection layers self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) self.no = nc + self.reg_max * 4 # number of outputs per anchor self.stride = torch.zeros(self.nl) # strides computed during build c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels self.cv2 = nn.ModuleList( nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() def forward(self, x): shape = x[0].shape # BCHW for i in range(self.nl): x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) if self.training: return x elif self.dynamic or self.shape != shape: self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) self.shape = shape box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides y = torch.cat((dbox, cls.sigmoid()), 1) return y if self.export else (y, x) def bias_init(self): # Initialize Detect() biases, WARNING: requires stride availability m = self # self.model[-1] # Detect() module # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency for a, b, s in zip(m.cv2, m.cv3, m.stride): # from a[-1].bias.data[:] = 1.0 # box b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img) class Segment(Detect): # YOLOv5 Segment head for segmentation models def __init__(self, nc=80, nm=32, npr=256, ch=()): super().__init__(nc, ch) self.nm = nm # number of masks self.npr = npr # number of protos self.proto = Proto(ch[0], self.npr, self.nm) # protos self.detect = Detect.forward c4 = max(ch[0] // 4, self.nm) self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) def forward(self, x): p = self.proto(x[0]) # mask protos bs = p.shape[0] # batch size mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients x = self.detect(self, x) if self.training: return x, mc, p return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) class Classify(nn.Module): # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2) def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups super().__init__() c_ = 1280 # efficientnet_b0 size self.conv = Conv(c1, c_, k, s, autopad(k, p), g) self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1) self.drop = nn.Dropout(p=0.0, inplace=True) self.linear = nn.Linear(c_, c2) # to x(b,c2) def forward(self, x): if isinstance(x, list): x = torch.cat(x, 1) return self.linear(self.drop(self.pool(self.conv(x)).flatten(1))) ================================================ FILE: nn/tasks.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import contextlib from copy import deepcopy import thop import torch import torch.nn as nn from nn.modules import (C1, C2, C3, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C2f, C3Ghost, C3x, Classify, Concat, Conv, ConvTranspose, Detect, DWConv, DWConvTranspose2d, Ensemble, Focus, GhostBottleneck, GhostConv, Segment) from yolo.utils import DEFAULT_CONFIG_DICT, DEFAULT_CONFIG_KEYS, LOGGER, colorstr, yaml_load from yolo.utils.checks import check_yaml from yolo.utils.torch_utils import (fuse_conv_and_bn, initialize_weights, intersect_dicts, make_divisible, model_info, scale_img, time_sync) class BaseModel(nn.Module): ''' The BaseModel class is a base class for all the models in the Ultralytics YOLO family. ''' def forward(self, x, profile=False, visualize=False): """ > `forward` is a wrapper for `_forward_once` that runs the model on a single scale Args: x: the input image profile: whether to profile the model. Defaults to False visualize: if True, will return the intermediate feature maps. Defaults to False Returns: The output of the network. """ return self._forward_once(x, profile, visualize) def _forward_once(self, x, profile=False, visualize=False): """ > Forward pass of the network Args: x: input to the model profile: if True, the time taken for each layer will be printed. Defaults to False visualize: If True, it will save the feature maps of the model. Defaults to False Returns: The last layer of the model. """ y, dt = [], [] # outputs for m in self.model: if m.f != -1: # if not from previous layer x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers if profile: self._profile_one_layer(m, x, dt) x = m(x) # run y.append(x if m.i in self.save else None) # save output if visualize: pass # TODO: feature_visualization(x, m.type, m.i, save_dir=visualize) return x def _profile_one_layer(self, m, x, dt): """ It takes a model, an input, and a list of times, and it profiles the model on the input, appending the time to the list Args: m: the model x: the input image dt: list of time taken for each layer """ c = m == self.model[-1] # is final layer, copy input as inplace fix o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs t = time_sync() for _ in range(10): m(x.copy() if c else x) dt.append((time_sync() - t) * 100) if m == self.model[0]: LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module") LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') if c: LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") def fuse(self): """ > It takes a model and fuses the Conv2d() and BatchNorm2d() layers into a single layer Returns: The model is being returned. """ LOGGER.info('Fusing layers... ') for m in self.model.modules(): if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv delattr(m, 'bn') # remove batchnorm m.forward = m.forward_fuse # update forward self.info() return self def info(self, verbose=False, imgsz=640): """ Prints model information Args: verbose: if True, prints out the model information. Defaults to False imgsz: the size of the image that the model will be trained on. Defaults to 640 """ model_info(self, verbose, imgsz) def _apply(self, fn): """ `_apply()` is a function that applies a function to all the tensors in the model that are not parameters or registered buffers Args: fn: the function to apply to the model Returns: A model that is a Detect() object. """ self = super()._apply(fn) m = self.model[-1] # Detect() if isinstance(m, (Detect, Segment)): m.stride = fn(m.stride) m.anchors = fn(m.anchors) m.strides = fn(m.strides) return self def load(self, weights): """ > This function loads the weights of the model from a file Args: weights: The weights to load into the model. """ # Force all tasks to implement this function raise NotImplementedError("This function needs to be implemented by derived classes!") class DetectionModel(BaseModel): # YOLOv5 detection model def __init__(self, cfg='yolov8n.yaml', ch=3, nc=None, verbose=True): # model, input channels, number of classes super().__init__() self.yaml = cfg if isinstance(cfg, dict) else yaml_load(check_yaml(cfg), append_filename=True) # cfg dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch], verbose=verbose) # model, savelist self.names = {i: f'{i}' for i in range(self.yaml['nc'])} # default names dict self.inplace = self.yaml.get('inplace', True) # Build strides m = self.model[-1] # Detect() if isinstance(m, (Detect, Segment)): s = 256 # 2x min stride m.inplace = self.inplace forward = lambda x: self.forward(x)[0] if isinstance(m, Segment) else self.forward(x) m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward self.stride = m.stride m.bias_init() # only run once # Init weights, biases initialize_weights(self) if verbose: self.info() LOGGER.info('') def forward(self, x, augment=False, profile=False, visualize=False): if augment: return self._forward_augment(x) # augmented inference, None return self._forward_once(x, profile, visualize) # single-scale inference, train def _forward_augment(self, x): img_size = x.shape[-2:] # height, width s = [1, 0.83, 0.67] # scales f = [None, 3, None] # flips (2-ud, 3-lr) y = [] # outputs for si, fi in zip(s, f): xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) yi = self._forward_once(xi)[0] # forward # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save yi = self._descale_pred(yi, fi, si, img_size) y.append(yi) y = self._clip_augmented(y) # clip augmented tails return torch.cat(y, -1), None # augmented inference, train @staticmethod def _descale_pred(p, flips, scale, img_size, dim=1): # de-scale predictions following augmented inference (inverse operation) p[:, :4] /= scale # de-scale x, y, wh, cls = p.split((1, 1, 2, p.shape[dim] - 4), dim) if flips == 2: y = img_size[0] - y # de-flip ud elif flips == 3: x = img_size[1] - x # de-flip lr return torch.cat((x, y, wh, cls), dim) def _clip_augmented(self, y): # Clip YOLOv5 augmented inference tails nl = self.model[-1].nl # number of detection layers (P3-P5) g = sum(4 ** x for x in range(nl)) # grid points e = 1 # exclude layer count i = (y[0].shape[-1] // g) * sum(4 ** x for x in range(e)) # indices y[0] = y[0][..., :-i] # large i = (y[-1].shape[-1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices y[-1] = y[-1][..., i:] # small return y def load(self, weights, verbose=True): csd = weights.float().state_dict() # checkpoint state_dict as FP32 csd = intersect_dicts(csd, self.state_dict()) # intersect self.load_state_dict(csd, strict=False) # load if verbose: LOGGER.info(f'Transferred {len(csd)}/{len(self.model.state_dict())} items from pretrained weights') class SegmentationModel(DetectionModel): # YOLOv5 segmentation model def __init__(self, cfg='yolov8n-seg.yaml', ch=3, nc=None, verbose=True): super().__init__(cfg, ch, nc, verbose) class ClassificationModel(BaseModel): # YOLOv5 classification model def __init__(self, cfg=None, model=None, ch=3, nc=1000, cutoff=10, verbose=True): # yaml, model, number of classes, cutoff index super().__init__() self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg, ch, nc, verbose) def _from_detection_model(self, model, nc=1000, cutoff=10): # Create a YOLOv5 classification model from a YOLOv5 detection model from nn.autobackend import AutoBackend if isinstance(model, AutoBackend): model = model.model # unwrap DetectMultiBackend model.model = model.model[:cutoff] # backbone m = model.model[-1] # last layer ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module c = Classify(ch, nc) # Classify() c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type model.model[-1] = c # replace self.model = model.model self.stride = model.stride self.save = [] self.nc = nc def _from_yaml(self, cfg, ch, nc, verbose): self.yaml = cfg if isinstance(cfg, dict) else yaml_load(check_yaml(cfg), append_filename=True) # cfg dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch], verbose=verbose) # model, savelist self.names = {i: f'{i}' for i in range(self.yaml['nc'])} # default names dict self.info() def load(self, weights): model = weights["model"] if isinstance(weights, dict) else weights # torchvision models are not dicts csd = model.float().state_dict() csd = intersect_dicts(csd, self.state_dict()) # intersect self.load_state_dict(csd, strict=False) # load @staticmethod def reshape_outputs(model, nc): # Update a TorchVision classification model to class count 'n' if required name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module if isinstance(m, Classify): # YOLO Classify() head if m.linear.out_features != nc: m.linear = nn.Linear(m.linear.in_features, nc) elif isinstance(m, nn.Linear): # ResNet, EfficientNet if m.out_features != nc: setattr(model, name, nn.Linear(m.in_features, nc)) elif isinstance(m, nn.Sequential): types = [type(x) for x in m] if nn.Linear in types: i = types.index(nn.Linear) # nn.Linear index if m[i].out_features != nc: m[i] = nn.Linear(m[i].in_features, nc) elif nn.Conv2d in types: i = types.index(nn.Conv2d) # nn.Conv2d index if m[i].out_channels != nc: m[i] = nn.Conv2d(m[i].in_channels, nc, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None) # Functions ------------------------------------------------------------------------------------------------------------ def attempt_load_weights(weights, device=None, inplace=True, fuse=False): # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a from yolo.utils.downloads import attempt_download model = Ensemble() for w in weights if isinstance(weights, list) else [weights]: ckpt = torch.load(attempt_download(w), map_location='cpu') # load args = {**DEFAULT_CONFIG_DICT, **ckpt['train_args']} # combine model and default args, preferring model args ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model # Model compatibility updates ckpt.args = {k: v for k, v in args.items() if k in DEFAULT_CONFIG_KEYS} # attach args to model ckpt.pt_path = weights # attach *.pt file path to model if not hasattr(ckpt, 'stride'): ckpt.stride = torch.tensor([32.]) # Append model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode # Module compatibility updates for m in model.modules(): t = type(m) if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment): m.inplace = inplace # torch 1.7.0 compatibility elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): m.recompute_scale_factor = None # torch 1.11.0 compatibility # Return model if len(model) == 1: return model[-1] # Return ensemble print(f'Ensemble created with {weights}\n') for k in 'names', 'nc', 'yaml': setattr(model, k, getattr(model[0], k)) model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' return model def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False): # Loads a single model weights from yolo.utils.downloads import attempt_download ckpt = torch.load(attempt_download(weight), map_location='cpu') # load args = {**DEFAULT_CONFIG_DICT, **ckpt['train_args']} # combine model and default args, preferring model args model = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model # Model compatibility updates model.args = {k: v for k, v in args.items() if k in DEFAULT_CONFIG_KEYS} # attach args to model model.pt_path = weight # attach *.pt file path to model if not hasattr(model, 'stride'): model.stride = torch.tensor([32.]) model = model.fuse().eval() if fuse and hasattr(model, 'fuse') else model.eval() # model in eval mode # Module compatibility updates for m in model.modules(): t = type(m) if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment): m.inplace = inplace # torch 1.7.0 compatibility elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): m.recompute_scale_factor = None # torch 1.11.0 compatibility # Return model and ckpt return model, ckpt def parse_model(d, ch, verbose=True): # model_dict, input_channels(3) # Parse a YOLO model.yaml dictionary if verbose: LOGGER.info(f"\n{'':>3}{'from':>20}{'n':>3}{'params':>10} {'module':<45}{'arguments':<30}") nc, gd, gw, act = d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation') if act: Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() if verbose: LOGGER.info(f"{colorstr('activation:')} {act}") # print layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): with contextlib.suppress(NameError): args[j] = eval(a) if isinstance(a, str) else a # eval strings n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain if m in { Classify, Conv, ConvTranspose, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, Focus, BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}: c1, c2 = ch[f], args[0] if c2 != nc: # if c2 not equal to number of classes (i.e. for Classify() output) c2 = make_divisible(c2 * gw, 8) args = [c1, c2, *args[1:]] if m in {BottleneckCSP, C1, C2, C2f, C3, C3TR, C3Ghost, C3x}: args.insert(2, n) # number of repeats n = 1 elif m is nn.BatchNorm2d: args = [ch[f]] elif m is Concat: c2 = sum(ch[x] for x in f) elif m in {Detect, Segment}: args.append([ch[x] for x in f]) if m is Segment: args[2] = make_divisible(args[2] * gw, 8) else: c2 = ch[f] m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type m.np = sum(x.numel() for x in m_.parameters()) # number params m_.i, m_.f, m_.type = i, f, t # attach index, 'from' index, type if verbose: LOGGER.info(f'{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f} {t:<45}{str(args):<30}') # print save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist layers.append(m_) if i == 0: ch = [] ch.append(c2) return nn.Sequential(*layers), sorted(save) ================================================ FILE: requirements.txt ================================================ # Ultralytics requirements # Usage: pip install -r requirements.txt # Base ---------------------------------------- hydra-core>=1.2.0 matplotlib>=3.2.2 numpy>=1.18.5 opencv-python>=4.1.1 Pillow>=7.1.2 PyYAML>=5.3.1 requests>=2.23.0 scipy>=1.4.1 torch>=1.7.0 torchvision>=0.8.1 tqdm>=4.64.0 ultralytics==8.0.0 # Logging ------------------------------------- tensorboard>=2.4.1 # clearml # comet #tracking filterpy scikit-image # Plotting ------------------------------------ pandas>=1.1.4 seaborn>=0.11.0 # Export -------------------------------------- # coremltools>=6.0 # CoreML export # onnx>=1.12.0 # ONNX export # onnx-simplifier>=0.4.1 # ONNX simplifier # nvidia-pyindex # TensorRT export # nvidia-tensorrt # TensorRT export # scikit-learn==0.19.2 # CoreML quantization # tensorflow>=2.4.1 # TF exports (-cpu, -aarch64, -macos) # tensorflowjs>=3.9.0 # TF.js export # openvino-dev # OpenVINO export # Extras -------------------------------------- ipython # interactive notebook psutil # system utilization thop>=0.1.1 # FLOPs computation # albumentations>=1.0.3 # pycocotools>=2.0.6 # COCO mAP # roboflow # HUB ----------------------------------------- GitPython>=3.1.24 ================================================ FILE: yolo/cli.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import shutil from pathlib import Path import hydra import hub, yolo from yolo.utils import DEFAULT_CONFIG, LOGGER, colorstr DIR = Path(__file__).parent @hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent.relative_to(DIR)), config_name=DEFAULT_CONFIG.name) def cli(cfg): """ Run a specified task and mode with the given configuration. Args: cfg (DictConfig): Configuration for the task and mode. """ # LOGGER.info(f"{colorstr(f'Ultralytics YOLO v{ultralytics.__version__}')}") task, mode = cfg.task.lower(), cfg.mode.lower() # Special case for initializing the configuration if task == "init": shutil.copy2(DEFAULT_CONFIG, Path.cwd()) LOGGER.info(f""" {colorstr("YOLO:")} configuration saved to {Path.cwd() / DEFAULT_CONFIG.name}. To run experiments using custom configuration: yolo task='task' mode='mode' --config-name config_file.yaml """) return # Mapping from task to module task_module_map = {"detect": yolo.v8.detect, "segment": yolo.v8.segment, "classify": yolo.v8.classify} module = task_module_map.get(task) if not module: raise SyntaxError(f"task not recognized. Choices are {', '.join(task_module_map.keys())}") # Mapping from mode to function mode_func_map = { "train": module.train, "val": module.val, "predict": module.predict, "export": yolo.engine.exporter.export, "checks": hub.checks} func = mode_func_map.get(mode) if not func: raise SyntaxError(f"mode not recognized. Choices are {', '.join(mode_func_map.keys())}") func(cfg) ================================================ FILE: yolo/configs/__init__.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from pathlib import Path from typing import Dict, Union from omegaconf import DictConfig, OmegaConf from ultralytics.yolo.configs.hydra_patch import check_config_mismatch def get_config(config: Union[str, DictConfig], overrides: Union[str, Dict] = None): """ Load and merge configuration data from a file or dictionary. Args: config (Union[str, DictConfig]): Configuration data in the form of a file name or a DictConfig object. overrides (Union[str, Dict], optional): Overrides in the form of a file name or a dictionary. Default is None. Returns: OmegaConf.Namespace: Training arguments namespace. """ if overrides is None: overrides = {} if isinstance(config, (str, Path)): config = OmegaConf.load(config) elif isinstance(config, Dict): config = OmegaConf.create(config) # override if isinstance(overrides, str): overrides = OmegaConf.load(overrides) elif isinstance(overrides, Dict): overrides = OmegaConf.create(overrides) check_config_mismatch(dict(overrides).keys(), dict(config).keys()) return OmegaConf.merge(config, overrides) ================================================ FILE: yolo/configs/default.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Default training settings and hyperparameters for medium-augmentation COCO training task: "detect" # choices=['detect', 'segment', 'classify', 'init'] # init is a special case. Specify task to run. mode: "train" # choices=['train', 'val', 'predict'] # mode to run task in. # Train settings ------------------------------------------------------------------------------------------------------- model: null # i.e. yolov8n.pt, yolov8n.yaml. Path to model file data: null # i.e. coco128.yaml. Path to data file epochs: 100 # number of epochs to train for patience: 50 # TODO: epochs to wait for no observable improvement for early stopping of training batch: 16 # number of images per batch imgsz: 640 # size of input images save: True # save checkpoints cache: False # True/ram, disk or False. Use cache for data loading device: null # cuda device, i.e. 0 or 0,1,2,3 or cpu. Device to run on workers: 8 # number of worker threads for data loading project: null # project name name: null # experiment name exist_ok: False # whether to overwrite existing experiment pretrained: False # whether to use a pretrained model optimizer: 'SGD' # optimizer to use, choices=['SGD', 'Adam', 'AdamW', 'RMSProp'] verbose: False # whether to print verbose output seed: 0 # random seed for reproducibility deterministic: True # whether to enable deterministic mode single_cls: False # train multi-class data as single-class image_weights: False # use weighted image selection for training rect: False # support rectangular training cos_lr: False # use cosine learning rate scheduler close_mosaic: 10 # disable mosaic augmentation for final 10 epochs resume: False # resume training from last checkpoint # Segmentation overlap_mask: True # masks should overlap during training mask_ratio: 4 # mask downsample ratio # Classification dropout: 0.0 # use dropout regularization # Val/Test settings ---------------------------------------------------------------------------------------------------- val: True # validate/test during training save_json: False # save results to JSON file save_hybrid: False # save hybrid version of labels (labels + additional predictions) conf: null # object confidence threshold for detection (default 0.25 predict, 0.001 val) iou: 0.7 # intersection over union (IoU) threshold for NMS max_det: 300 # maximum number of detections per image half: False # use half precision (FP16) dnn: False # use OpenCV DNN for ONNX inference plots: True # show plots during training # Prediction settings -------------------------------------------------------------------------------------------------- source: null # source directory for images or videos show: False # show results if possible save_txt: False # save results as .txt file save_conf: False # save results with confidence scores save_crop: False # save cropped images with results hide_labels: False # hide labels hide_conf: True # hide confidence scores vid_stride: 1 # video frame-rate stride line_thickness: 3 # bounding box thickness (pixels) visualize: False # visualize results augment: False # apply data augmentation to images agnostic_nms: False # class-agnostic NMS retina_masks: False # use retina masks for object detection # Export settings ------------------------------------------------------------------------------------------------------ format: torchscript # format to export to keras: False # use Keras optimize: False # TorchScript: optimize for mobile int8: False # CoreML/TF INT8 quantization dynamic: False # ONNX/TF/TensorRT: dynamic axes simplify: False # ONNX: simplify model opset: 17 # ONNX: opset version workspace: 4 # TensorRT: workspace size (GB) nms: False # CoreML: add NMS # Hyperparameters ------------------------------------------------------------------------------------------------------ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf) momentum: 0.937 # SGD momentum/Adam beta1 weight_decay: 0.0005 # optimizer weight decay 5e-4 warmup_epochs: 3.0 # warmup epochs (fractions ok) warmup_momentum: 0.8 # warmup initial momentum warmup_bias_lr: 0.1 # warmup initial bias lr box: 7.5 # box loss gain cls: 0.5 # cls loss gain (scale with pixels) dfl: 1.5 # dfl loss gain fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) label_smoothing: 0.0 nbs: 64 # nominal batch size hsv_h: 0.015 # image HSV-Hue augmentation (fraction) hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) hsv_v: 0.4 # image HSV-Value augmentation (fraction) degrees: 0.0 # image rotation (+/- deg) translate: 0.1 # image translation (+/- fraction) scale: 0.5 # image scale (+/- gain) shear: 0.0 # image shear (+/- deg) perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 flipud: 0.0 # image flip up-down (probability) fliplr: 0.5 # image flip left-right (probability) mosaic: 1.0 # image mosaic (probability) mixup: 0.0 # image mixup (probability) copy_paste: 0.0 # segment copy-paste (probability) # Hydra configs -------------------------------------------------------------------------------------------------------- hydra: output_subdir: null # disable hydra directory creation run: dir: . # Debug, do not modify ------------------------------------------------------------------------------------------------- v5loader: False # use legacy YOLOv5 dataloader ================================================ FILE: yolo/configs/hydra_patch.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import sys from difflib import get_close_matches from textwrap import dedent import hydra from hydra.errors import ConfigCompositionException from omegaconf import OmegaConf, open_dict # noqa from omegaconf.errors import ConfigAttributeError, ConfigKeyError, OmegaConfBaseException # noqa from yolo.utils import LOGGER, colorstr def override_config(overrides, cfg): override_keys = [override.key_or_group for override in overrides] check_config_mismatch(override_keys, cfg.keys()) for override in overrides: if override.package is not None: raise ConfigCompositionException(f"Override {override.input_line} looks like a config group" f" override, but config group '{override.key_or_group}' does not exist.") key = override.key_or_group value = override.value() try: if override.is_delete(): config_val = OmegaConf.select(cfg, key, throw_on_missing=False) if config_val is None: raise ConfigCompositionException(f"Could not delete from config. '{override.key_or_group}'" " does not exist.") elif value is not None and value != config_val: raise ConfigCompositionException("Could not delete from config. The value of" f" '{override.key_or_group}' is {config_val} and not" f" {value}.") last_dot = key.rfind(".") with open_dict(cfg): if last_dot == -1: del cfg[key] else: node = OmegaConf.select(cfg, key[:last_dot]) del node[key[last_dot + 1:]] elif override.is_add(): if OmegaConf.select(cfg, key, throw_on_missing=False) is None or isinstance(value, (dict, list)): OmegaConf.update(cfg, key, value, merge=True, force_add=True) else: assert override.input_line is not None raise ConfigCompositionException( dedent(f"""\ Could not append to config. An item is already at '{override.key_or_group}'. Either remove + prefix: '{override.input_line[1:]}' Or add a second + to add or override '{override.key_or_group}': '+{override.input_line}' """)) elif override.is_force_add(): OmegaConf.update(cfg, key, value, merge=True, force_add=True) else: try: OmegaConf.update(cfg, key, value, merge=True) except (ConfigAttributeError, ConfigKeyError) as ex: raise ConfigCompositionException(f"Could not override '{override.key_or_group}'." f"\nTo append to your config use +{override.input_line}") from ex except OmegaConfBaseException as ex: raise ConfigCompositionException(f"Error merging override {override.input_line}").with_traceback( sys.exc_info()[2]) from ex def check_config_mismatch(overrides, cfg): mismatched = [option for option in overrides if option not in cfg and 'hydra.' not in option] for option in mismatched: LOGGER.info(f"{colorstr(option)} is not a valid key. Similar keys: {get_close_matches(option, cfg, 3, 0.6)}") if mismatched: exit() hydra._internal.config_loader_impl.ConfigLoaderImpl._apply_overrides_to_config = override_config ================================================ FILE: yolo/data/__init__.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from .base import BaseDataset from .build import build_classification_dataloader, build_dataloader from .dataset import ClassificationDataset, SemanticDataset, YOLODataset from .dataset_wrappers import MixAndRectDataset ================================================ FILE: yolo/data/augment.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import math import random from copy import deepcopy import cv2 import numpy as np import torch import torchvision.transforms as T from ..utils import LOGGER, colorstr from ..utils.checks import check_version from ..utils.instance import Instances from ..utils.metrics import bbox_ioa from ..utils.ops import segment2box from .utils import IMAGENET_MEAN, IMAGENET_STD, polygons2masks, polygons2masks_overlap # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic class BaseTransform: def __init__(self) -> None: pass def apply_image(self, labels): pass def apply_instances(self, labels): pass def apply_semantic(self, labels): pass def __call__(self, labels): self.apply_image(labels) self.apply_instances(labels) self.apply_semantic(labels) class Compose: def __init__(self, transforms): self.transforms = transforms def __call__(self, data): for t in self.transforms: data = t(data) return data def append(self, transform): self.transforms.append(transform) def tolist(self): return self.transforms def __repr__(self): format_string = f"{self.__class__.__name__}(" for t in self.transforms: format_string += "\n" format_string += f" {t}" format_string += "\n)" return format_string class BaseMixTransform: """This implementation is from mmyolo""" def __init__(self, dataset, pre_transform=None, p=0.0) -> None: self.dataset = dataset self.pre_transform = pre_transform self.p = p def __call__(self, labels): if random.uniform(0, 1) > self.p: return labels # get index of one or three other images indexes = self.get_indexes() if isinstance(indexes, int): indexes = [indexes] # get images information will be used for Mosaic or MixUp mix_labels = [self.dataset.get_label_info(i) for i in indexes] if self.pre_transform is not None: for i, data in enumerate(mix_labels): mix_labels[i] = self.pre_transform(data) labels["mix_labels"] = mix_labels # Mosaic or MixUp labels = self._mix_transform(labels) labels.pop("mix_labels", None) return labels def _mix_transform(self, labels): raise NotImplementedError def get_indexes(self): raise NotImplementedError class Mosaic(BaseMixTransform): """Mosaic augmentation. Args: imgsz (Sequence[int]): Image size after mosaic pipeline of single image. The shape order should be (height, width). Default to (640, 640). """ def __init__(self, dataset, imgsz=640, p=1.0, border=(0, 0)): assert 0 <= p <= 1.0, "The probability should be in range [0, 1]. " f"got {p}." super().__init__(dataset=dataset, p=p) self.dataset = dataset self.imgsz = imgsz self.border = border def get_indexes(self): return [random.randint(0, len(self.dataset) - 1) for _ in range(3)] def _mix_transform(self, labels): mosaic_labels = [] assert labels.get("rect_shape", None) is None, "rect and mosaic is exclusive." assert len(labels.get("mix_labels", [])) > 0, "There are no other images for mosaic augment." s = self.imgsz yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y for i in range(4): labels_patch = (labels if i == 0 else labels["mix_labels"][i - 1]).copy() # Load image img = labels_patch["img"] h, w = labels_patch["resized_shape"] # place img in img4 if i == 0: # top left img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b labels_patch = self._update_labels(labels_patch, padw, padh) mosaic_labels.append(labels_patch) final_labels = self._cat_labels(mosaic_labels) final_labels["img"] = img4 return final_labels def _update_labels(self, labels, padw, padh): """Update labels""" nh, nw = labels["img"].shape[:2] labels["instances"].convert_bbox(format="xyxy") labels["instances"].denormalize(nw, nh) labels["instances"].add_padding(padw, padh) return labels def _cat_labels(self, mosaic_labels): if len(mosaic_labels) == 0: return {} cls = [] instances = [] for labels in mosaic_labels: cls.append(labels["cls"]) instances.append(labels["instances"]) final_labels = { "ori_shape": mosaic_labels[0]["ori_shape"], "resized_shape": (self.imgsz * 2, self.imgsz * 2), "im_file": mosaic_labels[0]["im_file"], "cls": np.concatenate(cls, 0), "instances": Instances.concatenate(instances, axis=0)} final_labels["instances"].clip(self.imgsz * 2, self.imgsz * 2) return final_labels class MixUp(BaseMixTransform): def __init__(self, dataset, pre_transform=None, p=0.0) -> None: super().__init__(dataset=dataset, pre_transform=pre_transform, p=p) def get_indexes(self): return random.randint(0, len(self.dataset) - 1) def _mix_transform(self, labels): # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 labels2 = labels["mix_labels"][0] labels["img"] = (labels["img"] * r + labels2["img"] * (1 - r)).astype(np.uint8) labels["instances"] = Instances.concatenate([labels["instances"], labels2["instances"]], axis=0) labels["cls"] = np.concatenate([labels["cls"], labels2["cls"]], 0) return labels class RandomPerspective: def __init__(self, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, border=(0, 0)): self.degrees = degrees self.translate = translate self.scale = scale self.shear = shear self.perspective = perspective # mosaic border self.border = border def affine_transform(self, img): # Center C = np.eye(3) C[0, 2] = -img.shape[1] / 2 # x translation (pixels) C[1, 2] = -img.shape[0] / 2 # y translation (pixels) # Perspective P = np.eye(3) P[2, 0] = random.uniform(-self.perspective, self.perspective) # x perspective (about y) P[2, 1] = random.uniform(-self.perspective, self.perspective) # y perspective (about x) # Rotation and Scale R = np.eye(3) a = random.uniform(-self.degrees, self.degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - self.scale, 1 + self.scale) # s = 2 ** random.uniform(-scale, scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) # Shear S = np.eye(3) S[0, 1] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # y shear (deg) # Translation T = np.eye(3) T[0, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[0] # x translation (pixels) T[1, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[1] # y translation (pixels) # Combined rotation matrix M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT # affine image if (self.border[0] != 0) or (self.border[1] != 0) or (M != np.eye(3)).any(): # image changed if self.perspective: img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114)) else: # affine img = cv2.warpAffine(img, M[:2], dsize=self.size, borderValue=(114, 114, 114)) return img, M, s def apply_bboxes(self, bboxes, M): """apply affine to bboxes only. Args: bboxes(ndarray): list of bboxes, xyxy format, with shape (num_bboxes, 4). M(ndarray): affine matrix. Returns: new_bboxes(ndarray): bboxes after affine, [num_bboxes, 4]. """ n = len(bboxes) if n == 0: return bboxes xy = np.ones((n * 4, 3)) xy[:, :2] = bboxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 xy = xy @ M.T # transform xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine # create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T def apply_segments(self, segments, M): """apply affine to segments and generate new bboxes from segments. Args: segments(ndarray): list of segments, [num_samples, 500, 2]. M(ndarray): affine matrix. Returns: new_segments(ndarray): list of segments after affine, [num_samples, 500, 2]. new_bboxes(ndarray): bboxes after affine, [N, 4]. """ n, num = segments.shape[:2] if n == 0: return [], segments xy = np.ones((n * num, 3)) segments = segments.reshape(-1, 2) xy[:, :2] = segments xy = xy @ M.T # transform xy = xy[:, :2] / xy[:, 2:3] segments = xy.reshape(n, -1, 2) bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0) return bboxes, segments def apply_keypoints(self, keypoints, M): """apply affine to keypoints. Args: keypoints(ndarray): keypoints, [N, 17, 2]. M(ndarray): affine matrix. Return: new_keypoints(ndarray): keypoints after affine, [N, 17, 2]. """ n = len(keypoints) if n == 0: return keypoints new_keypoints = np.ones((n * 17, 3)) new_keypoints[:, :2] = keypoints.reshape(n * 17, 2) # num_kpt is hardcoded to 17 new_keypoints = new_keypoints @ M.T # transform new_keypoints = (new_keypoints[:, :2] / new_keypoints[:, 2:3]).reshape(n, 34) # perspective rescale or affine new_keypoints[keypoints.reshape(-1, 34) == 0] = 0 x_kpts = new_keypoints[:, list(range(0, 34, 2))] y_kpts = new_keypoints[:, list(range(1, 34, 2))] x_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0 y_kpts[np.logical_or.reduce((x_kpts < 0, x_kpts > self.size[0], y_kpts < 0, y_kpts > self.size[1]))] = 0 new_keypoints[:, list(range(0, 34, 2))] = x_kpts new_keypoints[:, list(range(1, 34, 2))] = y_kpts return new_keypoints.reshape(n, 17, 2) def __call__(self, labels): """ Affine images and targets. Args: labels(Dict): a dict of `bboxes`, `segments`, `keypoints`. """ img = labels["img"] cls = labels["cls"] instances = labels.pop("instances") # make sure the coord formats are right instances.convert_bbox(format="xyxy") instances.denormalize(*img.shape[:2][::-1]) self.size = img.shape[1] + self.border[1] * 2, img.shape[0] + self.border[0] * 2 # w, h # M is affine matrix # scale for func:`box_candidates` img, M, scale = self.affine_transform(img) bboxes = self.apply_bboxes(instances.bboxes, M) segments = instances.segments keypoints = instances.keypoints # update bboxes if there are segments. if len(segments): bboxes, segments = self.apply_segments(segments, M) if keypoints is not None: keypoints = self.apply_keypoints(keypoints, M) new_instances = Instances(bboxes, segments, keypoints, bbox_format="xyxy", normalized=False) # clip new_instances.clip(*self.size) # filter instances instances.scale(scale_w=scale, scale_h=scale, bbox_only=True) # make the bboxes have the same scale with new_bboxes i = self.box_candidates(box1=instances.bboxes.T, box2=new_instances.bboxes.T, area_thr=0.01 if len(segments) else 0.10) labels["instances"] = new_instances[i] labels["cls"] = cls[i] labels["img"] = img labels["resized_shape"] = img.shape[:2] return labels def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) # Compute box candidates: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates class RandomHSV: def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None: self.hgain = hgain self.sgain = sgain self.vgain = vgain def __call__(self, labels): img = labels["img"] if self.hgain or self.sgain or self.vgain: r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) dtype = img.dtype # uint8 x = np.arange(0, 256, dtype=r.dtype) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed return labels class RandomFlip: def __init__(self, p=0.5, direction="horizontal") -> None: assert direction in ["horizontal", "vertical"], f"Support direction `horizontal` or `vertical`, got {direction}" assert 0 <= p <= 1.0 self.p = p self.direction = direction def __call__(self, labels): img = labels["img"] instances = labels.pop("instances") instances.convert_bbox(format="xywh") h, w = img.shape[:2] h = 1 if instances.normalized else h w = 1 if instances.normalized else w # Flip up-down if self.direction == "vertical" and random.random() < self.p: img = np.flipud(img) instances.flipud(h) if self.direction == "horizontal" and random.random() < self.p: img = np.fliplr(img) instances.fliplr(w) labels["img"] = np.ascontiguousarray(img) labels["instances"] = instances return labels class LetterBox: """Resize image and padding for detection, instance segmentation, pose""" def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32): self.new_shape = new_shape self.auto = auto self.scaleFill = scaleFill self.scaleup = scaleup self.stride = stride def __call__(self, labels=None, image=None): if labels is None: labels = {} img = labels.get("img") if image is None else image shape = img.shape[:2] # current shape [height, width] new_shape = labels.pop("rect_shape", self.new_shape) if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not self.scaleup: # only scale down, do not scale up (for better val mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if self.auto: # minimum rectangle dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding elif self.scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 if labels.get("ratio_pad"): labels["ratio_pad"] = (labels["ratio_pad"], (dw, dh)) # for evaluation if shape[::-1] != new_unpad: # resize img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # add border if len(labels): labels = self._update_labels(labels, ratio, dw, dh) labels["img"] = img labels["resized_shape"] = new_shape return labels else: return img def _update_labels(self, labels, ratio, padw, padh): """Update labels""" labels["instances"].convert_bbox(format="xyxy") labels["instances"].denormalize(*labels["img"].shape[:2][::-1]) labels["instances"].scale(*ratio) labels["instances"].add_padding(padw, padh) return labels class CopyPaste: def __init__(self, p=0.5) -> None: self.p = p def __call__(self, labels): # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) im = labels["img"] cls = labels["cls"] instances = labels.pop("instances") instances.convert_bbox(format="xyxy") if self.p and len(instances.segments): n = len(instances) _, w, _ = im.shape # height, width, channels im_new = np.zeros(im.shape, np.uint8) # calculate ioa first then select indexes randomly ins_flip = deepcopy(instances) ins_flip.fliplr(w) ioa = bbox_ioa(ins_flip.bboxes, instances.bboxes) # intersection over area, (N, M) indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) n = len(indexes) for j in random.sample(list(indexes), k=round(self.p * n)): cls = np.concatenate((cls, cls[[j]]), axis=0) instances = Instances.concatenate((instances, ins_flip[[j]]), axis=0) cv2.drawContours(im_new, instances.segments[[j]].astype(np.int32), -1, (1, 1, 1), cv2.FILLED) result = cv2.flip(im, 1) # augment segments (flip left-right) i = cv2.flip(im_new, 1).astype(bool) im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug labels["img"] = im labels["cls"] = cls labels["instances"] = instances return labels class Albumentations: # YOLOv5 Albumentations class (optional, only used if package is installed) def __init__(self, p=1.0): self.p = p self.transform = None prefix = colorstr("albumentations: ") try: import albumentations as A check_version(A.__version__, "1.0.3", hard=True) # version requirement T = [ A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), A.CLAHE(p=0.01), A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), A.ImageCompression(quality_lower=75, p=0.0),] # transforms self.transform = A.Compose(T, bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"])) LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p)) except ImportError: # package not installed, skip pass except Exception as e: LOGGER.info(f"{prefix}{e}") def __call__(self, labels): im = labels["img"] cls = labels["cls"] if len(cls): labels["instances"].convert_bbox("xywh") labels["instances"].normalize(*im.shape[:2][::-1]) bboxes = labels["instances"].bboxes # TODO: add supports of segments and keypoints if self.transform and random.random() < self.p: new = self.transform(image=im, bboxes=bboxes, class_labels=cls) # transformed labels["img"] = new["image"] labels["cls"] = np.array(new["class_labels"]) labels["instances"].update(bboxes=bboxes) return labels # TODO: technically this is not an augmentation, maybe we should put this to another files class Format: def __init__(self, bbox_format="xywh", normalize=True, return_mask=False, return_keypoint=False, mask_ratio=4, mask_overlap=True, batch_idx=True): self.bbox_format = bbox_format self.normalize = normalize self.return_mask = return_mask # set False when training detection only self.return_keypoint = return_keypoint self.mask_ratio = mask_ratio self.mask_overlap = mask_overlap self.batch_idx = batch_idx # keep the batch indexes def __call__(self, labels): img = labels["img"] h, w = img.shape[:2] cls = labels.pop("cls") instances = labels.pop("instances") instances.convert_bbox(format=self.bbox_format) instances.denormalize(w, h) nl = len(instances) if self.return_mask: if nl: masks, instances, cls = self._format_segments(instances, cls, w, h) masks = torch.from_numpy(masks) else: masks = torch.zeros(1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio, img.shape[1] // self.mask_ratio) labels["masks"] = masks if self.normalize: instances.normalize(w, h) labels["img"] = self._format_img(img) labels["cls"] = torch.from_numpy(cls) if nl else torch.zeros(nl) labels["bboxes"] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4)) if self.return_keypoint: labels["keypoints"] = torch.from_numpy(instances.keypoints) if nl else torch.zeros((nl, 17, 2)) # then we can use collate_fn if self.batch_idx: labels["batch_idx"] = torch.zeros(nl) return labels def _format_img(self, img): if len(img.shape) < 3: img = np.expand_dims(img, -1) img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1]) img = torch.from_numpy(img) return img def _format_segments(self, instances, cls, w, h): """convert polygon points to bitmap""" segments = instances.segments if self.mask_overlap: masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio) masks = masks[None] # (640, 640) -> (1, 640, 640) instances = instances[sorted_idx] cls = cls[sorted_idx] else: masks = polygons2masks((h, w), segments, color=1, downsample_ratio=self.mask_ratio) return masks, instances, cls def mosaic_transforms(dataset, imgsz, hyp): pre_transform = Compose([ Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic, border=[-imgsz // 2, -imgsz // 2]), CopyPaste(p=hyp.copy_paste), RandomPerspective( degrees=hyp.degrees, translate=hyp.translate, scale=hyp.scale, shear=hyp.shear, perspective=hyp.perspective, border=[-imgsz // 2, -imgsz // 2], ),]) return Compose([ pre_transform, MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup), Albumentations(p=1.0), RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v), RandomFlip(direction="vertical", p=hyp.flipud), RandomFlip(direction="horizontal", p=hyp.fliplr),]) # transforms def affine_transforms(imgsz, hyp): return Compose([ LetterBox(new_shape=(imgsz, imgsz)), RandomPerspective( degrees=hyp.degrees, translate=hyp.translate, scale=hyp.scale, shear=hyp.shear, perspective=hyp.perspective, border=[0, 0], ), Albumentations(p=1.0), RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v), RandomFlip(direction="vertical", p=hyp.flipud), RandomFlip(direction="horizontal", p=hyp.fliplr),]) # transforms # Classification augmentations ----------------------------------------------------------------------------------------- def classify_transforms(size=224): # Transforms to apply if albumentations not installed assert isinstance(size, int), f"ERROR: classify_transforms size {size} must be integer, not (list, tuple)" # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) def classify_albumentations( augment=True, size=224, scale=(0.08, 1.0), hflip=0.5, vflip=0.0, jitter=0.4, mean=IMAGENET_MEAN, std=IMAGENET_STD, auto_aug=False, ): # YOLOv5 classification Albumentations (optional, only used if package is installed) prefix = colorstr("albumentations: ") try: import albumentations as A from albumentations.pytorch import ToTensorV2 check_version(A.__version__, "1.0.3", hard=True) # version requirement if augment: # Resize and crop T = [A.RandomResizedCrop(height=size, width=size, scale=scale)] if auto_aug: # TODO: implement AugMix, AutoAug & RandAug in albumentation LOGGER.info(f"{prefix}auto augmentations are currently not supported") else: if hflip > 0: T += [A.HorizontalFlip(p=hflip)] if vflip > 0: T += [A.VerticalFlip(p=vflip)] if jitter > 0: color_jitter = (float(jitter),) * 3 # repeat value for brightness, contrast, saturation, 0 hue T += [A.ColorJitter(*color_jitter, 0)] else: # Use fixed crop for eval set (reproducibility) T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor LOGGER.info(prefix + ", ".join(f"{x}".replace("always_apply=False, ", "") for x in T if x.p)) return A.Compose(T) except ImportError: # package not installed, skip pass except Exception as e: LOGGER.info(f"{prefix}{e}") class ClassifyLetterBox: # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) def __init__(self, size=(640, 640), auto=False, stride=32): super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size self.auto = auto # pass max size integer, automatically solve for short side using stride self.stride = stride # used with auto def __call__(self, im): # im = np.array HWC imh, imw = im.shape[:2] r = min(self.h / imh, self.w / imw) # ratio of new/old h, w = round(imh * r), round(imw * r) # resized image hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype) im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) return im_out class CenterCrop: # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) def __init__(self, size=640): super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size def __call__(self, im): # im = np.array HWC imh, imw = im.shape[:2] m = min(imh, imw) # min dimension top, left = (imh - m) // 2, (imw - m) // 2 return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) class ToTensor: # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) def __init__(self, half=False): super().__init__() self.half = half def __call__(self, im): # im = np.array HWC in BGR order im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous im = torch.from_numpy(im) # to torch im = im.half() if self.half else im.float() # uint8 to fp16/32 im /= 255.0 # 0-255 to 0.0-1.0 return im ================================================ FILE: yolo/data/base.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import glob import math import os from multiprocessing.pool import ThreadPool from pathlib import Path from typing import Optional import cv2 import numpy as np from torch.utils.data import Dataset from tqdm import tqdm from ..utils import NUM_THREADS, TQDM_BAR_FORMAT from .utils import HELP_URL, IMG_FORMATS, LOCAL_RANK class BaseDataset(Dataset): """Base Dataset. Args: img_path (str): image path. pipeline (dict): a dict of image transforms. label_path (str): label path, this can also be an ann_file or other custom label path. """ def __init__( self, img_path, imgsz=640, label_path=None, cache=False, augment=True, hyp=None, prefix="", rect=False, batch_size=None, stride=32, pad=0.5, single_cls=False, ): super().__init__() self.img_path = img_path self.imgsz = imgsz self.label_path = label_path self.augment = augment self.single_cls = single_cls self.prefix = prefix self.im_files = self.get_img_files(self.img_path) self.labels = self.get_labels() if self.single_cls: self.update_labels(include_class=[]) self.ni = len(self.labels) # rect stuff self.rect = rect self.batch_size = batch_size self.stride = stride self.pad = pad if self.rect: assert self.batch_size is not None self.set_rectangle() # cache stuff self.ims = [None] * self.ni self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files] if cache: self.cache_images(cache) # transforms self.transforms = self.build_transforms(hyp=hyp) def get_img_files(self, img_path): """Read image files.""" try: f = [] # image files for p in img_path if isinstance(img_path, list) else [img_path]: p = Path(p) # os-agnostic if p.is_dir(): # dir f += glob.glob(str(p / "**" / "*.*"), recursive=True) # f = list(p.rglob('*.*')) # pathlib elif p.is_file(): # file with open(p) as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep f += [x.replace("./", parent) if x.startswith("./") else x for x in t] # local to global path # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) else: raise FileNotFoundError(f"{self.prefix}{p} does not exist") im_files = sorted(x.replace("/", os.sep) for x in f if x.split(".")[-1].lower() in IMG_FORMATS) # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib assert im_files, f"{self.prefix}No images found" except Exception as e: raise FileNotFoundError(f"{self.prefix}Error loading data from {img_path}: {e}\n{HELP_URL}") from e return im_files def update_labels(self, include_class: Optional[list]): """include_class, filter labels to include only these classes (optional)""" include_class_array = np.array(include_class).reshape(1, -1) for i in range(len(self.labels)): if include_class: cls = self.labels[i]["cls"] bboxes = self.labels[i]["bboxes"] segments = self.labels[i]["segments"] j = (cls == include_class_array).any(1) self.labels[i]["cls"] = cls[j] self.labels[i]["bboxes"] = bboxes[j] if segments: self.labels[i]["segments"] = segments[j] if self.single_cls: self.labels[i]["cls"] = 0 def load_image(self, i): # Loads 1 image from dataset index 'i', returns (im, resized hw) im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i] if im is None: # not cached in RAM if fn.exists(): # load npy im = np.load(fn) else: # read image im = cv2.imread(f) # BGR assert im is not None, f"Image Not Found {f}" h0, w0 = im.shape[:2] # orig hw r = self.imgsz / max(h0, w0) # ratio if r != 1: # if sizes are not equal interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA im = cv2.resize(im, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized def cache_images(self, cache): # cache images to memory or disk gb = 0 # Gigabytes of cached images self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni fcn = self.cache_images_to_disk if cache == "disk" else self.load_image results = ThreadPool(NUM_THREADS).imap(fcn, range(self.ni)) pbar = tqdm(enumerate(results), total=self.ni, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0) for i, x in pbar: if cache == "disk": gb += self.npy_files[i].stat().st_size else: # 'ram' self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) gb += self.ims[i].nbytes pbar.desc = f"{self.prefix}Caching images ({gb / 1E9:.1f}GB {cache})" pbar.close() def cache_images_to_disk(self, i): # Saves an image as an *.npy file for faster loading f = self.npy_files[i] if not f.exists(): np.save(f.as_posix(), cv2.imread(self.im_files[i])) def set_rectangle(self): bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index nb = bi[-1] + 1 # number of batches s = np.array([x.pop("shape") for x in self.labels]) # hw ar = s[:, 0] / s[:, 1] # aspect ratio irect = ar.argsort() self.im_files = [self.im_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] ar = ar[irect] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride self.batch = bi # batch index of image def __getitem__(self, index): return self.transforms(self.get_label_info(index)) def get_label_info(self, index): label = self.labels[index].copy() label["img"], label["ori_shape"], label["resized_shape"] = self.load_image(index) label["ratio_pad"] = ( label["resized_shape"][0] / label["ori_shape"][0], label["resized_shape"][1] / label["ori_shape"][1], ) # for evaluation if self.rect: label["rect_shape"] = self.batch_shapes[self.batch[index]] label = self.update_labels_info(label) return label def __len__(self): return len(self.im_files) def update_labels_info(self, label): """custom your label format here""" return label def build_transforms(self, hyp=None): """Users can custom augmentations here like: if self.augment: # training transforms return Compose([]) else: # val transforms return Compose([]) """ raise NotImplementedError def get_labels(self): """Users can custom their own format here. Make sure your output is a list with each element like below: dict( im_file=im_file, shape=shape, # format: (height, width) cls=cls, bboxes=bboxes, # xywh segments=segments, # xy keypoints=keypoints, # xy normalized=True, # or False bbox_format="xyxy", # or xywh, ltwh ) """ raise NotImplementedError ================================================ FILE: yolo/data/build.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import os import random import numpy as np import torch from torch.utils.data import DataLoader, dataloader, distributed from ..utils import LOGGER, colorstr from ..utils.torch_utils import torch_distributed_zero_first from .dataset import ClassificationDataset, YOLODataset from .utils import PIN_MEMORY, RANK class InfiniteDataLoader(dataloader.DataLoader): """Dataloader that reuses workers Uses same syntax as vanilla DataLoader """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler)) self.iterator = super().__iter__() def __len__(self): return len(self.batch_sampler.sampler) def __iter__(self): for _ in range(len(self)): yield next(self.iterator) class _RepeatSampler: """Sampler that repeats forever Args: sampler (Sampler) """ def __init__(self, sampler): self.sampler = sampler def __iter__(self): while True: yield from iter(self.sampler) def seed_worker(worker_id): # Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader worker_seed = torch.initial_seed() % 2 ** 32 np.random.seed(worker_seed) random.seed(worker_seed) def build_dataloader(cfg, batch_size, img_path, stride=32, label_path=None, rank=-1, mode="train"): assert mode in ["train", "val"] shuffle = mode == "train" if cfg.rect and shuffle: LOGGER.warning("WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False") shuffle = False with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = YOLODataset( img_path=img_path, label_path=label_path, imgsz=cfg.imgsz, batch_size=batch_size, augment=mode == "train", # augmentation hyp=cfg, # TODO: probably add a get_hyps_from_cfg function rect=cfg.rect if mode == "train" else True, # rectangular batches cache=cfg.get("cache", None), single_cls=cfg.get("single_cls", False), stride=int(stride), pad=0.0 if mode == "train" else 0.5, prefix=colorstr(f"{mode}: "), use_segments=cfg.task == "segment", use_keypoints=cfg.task == "keypoint") batch_size = min(batch_size, len(dataset)) nd = torch.cuda.device_count() # number of CUDA devices workers = cfg.workers if mode == "train" else cfg.workers * 2 nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) loader = DataLoader if cfg.image_weights or cfg.close_mosaic else InfiniteDataLoader # allow attribute updates generator = torch.Generator() generator.manual_seed(6148914691236517205 + RANK) return loader(dataset=dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, pin_memory=PIN_MEMORY, collate_fn=getattr(dataset, "collate_fn", None), worker_init_fn=seed_worker, generator=generator), dataset # build classification # TODO: using cfg like `build_dataloader` def build_classification_dataloader(path, imgsz=224, batch_size=16, augment=True, cache=False, rank=-1, workers=8, shuffle=True): # Returns Dataloader object to be used with YOLOv5 Classifier with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache) batch_size = min(batch_size, len(dataset)) nd = torch.cuda.device_count() nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) generator = torch.Generator() generator.manual_seed(6148914691236517205 + RANK) return InfiniteDataLoader(dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, pin_memory=PIN_MEMORY, worker_init_fn=seed_worker, generator=generator) # or DataLoader(persistent_workers=True) ================================================ FILE: yolo/data/dataloaders/__init__.py ================================================ ================================================ FILE: yolo/data/dataloaders/stream_loaders.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import glob import math import os import time from pathlib import Path from threading import Thread from urllib.parse import urlparse import cv2 import numpy as np import torch from ultralytics.yolo.data.augment import LetterBox from ultralytics.yolo.data.utils import IMG_FORMATS, VID_FORMATS from ultralytics.yolo.utils import LOGGER, is_colab, is_kaggle, ops from ultralytics.yolo.utils.checks import check_requirements class LoadStreams: # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams` def __init__(self, sources='file.streams', imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1): torch.backends.cudnn.benchmark = True # faster for fixed-size inference self.mode = 'stream' self.imgsz = imgsz self.stride = stride self.vid_stride = vid_stride # video frame-rate stride sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources] n = len(sources) self.sources = [ops.clean_str(x) for x in sources] # clean source names for later self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n for i, s in enumerate(sources): # index, source # Start thread to read frames from video stream st = f'{i + 1}/{n}: {s}... ' if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/Zgi9g1ksQHc' check_requirements(('pafy', 'youtube_dl==2020.12.2')) import pafy s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam if s == 0: assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.' assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.' cap = cv2.VideoCapture(s) assert cap.isOpened(), f'{st}Failed to open {s}' w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback _, self.imgs[i] = cap.read() # guarantee first frame self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)") self.threads[i].start() LOGGER.info('') # newline # check for common shapes s = np.stack([LetterBox(imgsz, auto, stride=stride)(image=x).shape for x in self.imgs]) self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal self.auto = auto and self.rect self.transforms = transforms # optional if not self.rect: LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.') def update(self, i, cap, stream): # Read stream `i` frames in daemon thread n, f = 0, self.frames[i] # frame number, frame array while cap.isOpened() and n < f: n += 1 cap.grab() # .read() = .grab() followed by .retrieve() if n % self.vid_stride == 0: success, im = cap.retrieve() if success: self.imgs[i] = im else: LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.') self.imgs[i] = np.zeros_like(self.imgs[i]) cap.open(stream) # re-open stream if signal was lost time.sleep(0.0) # wait time def __iter__(self): self.count = -1 return self def __next__(self): self.count += 1 if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit cv2.destroyAllWindows() raise StopIteration im0 = self.imgs.copy() if self.transforms: im = np.stack([self.transforms(x) for x in im0]) # transforms else: im = np.stack([LetterBox(self.imgsz, self.auto, stride=self.stride)(image=x) for x in im0]) im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW im = np.ascontiguousarray(im) # contiguous return self.sources, im, im0, None, '' def __len__(self): return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years class LoadScreenshots: # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"` def __init__(self, source, imgsz=640, stride=32, auto=True, transforms=None): # source = [screen_number left top width height] (pixels) check_requirements('mss') import mss source, *params = source.split() self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0 if len(params) == 1: self.screen = int(params[0]) elif len(params) == 4: left, top, width, height = (int(x) for x in params) elif len(params) == 5: self.screen, left, top, width, height = (int(x) for x in params) self.imgsz = imgsz self.stride = stride self.transforms = transforms self.auto = auto self.mode = 'stream' self.frame = 0 self.sct = mss.mss() # Parse monitor shape monitor = self.sct.monitors[self.screen] self.top = monitor["top"] if top is None else (monitor["top"] + top) self.left = monitor["left"] if left is None else (monitor["left"] + left) self.width = width or monitor["width"] self.height = height or monitor["height"] self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height} def __iter__(self): return self def __next__(self): # mss screen capture: get raw pixels from the screen as np array im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: " if self.transforms: im = self.transforms(im0) # transforms else: im = LetterBox(self.imgsz, self.auto, stride=self.stride)(image=im0) im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB im = np.ascontiguousarray(im) # contiguous self.frame += 1 return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s class LoadImages: # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` def __init__(self, path, imgsz=640, stride=32, auto=True, transforms=None, vid_stride=1): if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line path = Path(path).read_text().rsplit() files = [] for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: p = str(Path(p).resolve()) if '*' in p: files.extend(sorted(glob.glob(p, recursive=True))) # glob elif os.path.isdir(p): files.extend(sorted(glob.glob(os.path.join(p, '*.*')))) # dir elif os.path.isfile(p): files.append(p) # files else: raise FileNotFoundError(f'{p} does not exist') images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] ni, nv = len(images), len(videos) self.imgsz = imgsz self.stride = stride self.files = images + videos self.nf = ni + nv # number of files self.video_flag = [False] * ni + [True] * nv self.mode = 'image' self.auto = auto self.transforms = transforms # optional self.vid_stride = vid_stride # video frame-rate stride if any(videos): self._new_video(videos[0]) # new video else: self.cap = None assert self.nf > 0, f'No images or videos found in {p}. ' \ f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}' def __iter__(self): self.count = 0 return self def __next__(self): if self.count == self.nf: raise StopIteration path = self.files[self.count] if self.video_flag[self.count]: # Read video self.mode = 'video' for _ in range(self.vid_stride): self.cap.grab() ret_val, im0 = self.cap.retrieve() while not ret_val: self.count += 1 self.cap.release() if self.count == self.nf: # last video raise StopIteration path = self.files[self.count] self._new_video(path) ret_val, im0 = self.cap.read() self.frame += 1 # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' else: # Read image self.count += 1 im0 = cv2.imread(path) # BGR assert im0 is not None, f'Image Not Found {path}' s = f'image {self.count}/{self.nf} {path}: ' if self.transforms: im = self.transforms(im0) # transforms else: im = LetterBox(self.imgsz, self.auto, stride=self.stride)(image=im0) im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB im = np.ascontiguousarray(im) # contiguous return path, im, im0, self.cap, s def _new_video(self, path): # Create a new video capture object self.frame = 0 self.cap = cv2.VideoCapture(path) self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 def _cv2_rotate(self, im): # Rotate a cv2 video manually if self.orientation == 0: return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) elif self.orientation == 180: return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) elif self.orientation == 90: return cv2.rotate(im, cv2.ROTATE_180) return im def __len__(self): return self.nf # number of files ================================================ FILE: yolo/data/dataloaders/v5augmentations.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Image augmentation functions """ import math import random import cv2 import numpy as np import torch import torchvision.transforms as T import torchvision.transforms.functional as TF from ultralytics.yolo.utils import LOGGER, colorstr from ultralytics.yolo.utils.checks import check_version from ultralytics.yolo.utils.metrics import bbox_ioa from ultralytics.yolo.utils.ops import resample_segments, segment2box, xywhn2xyxy IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation class Albumentations: # YOLOv5 Albumentations class (optional, only used if package is installed) def __init__(self, size=640): self.transform = None prefix = colorstr('albumentations: ') try: import albumentations as A check_version(A.__version__, '1.0.3', hard=True) # version requirement T = [ A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0), A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), A.CLAHE(p=0.01), A.RandomBrightnessContrast(p=0.0), A.RandomGamma(p=0.0), A.ImageCompression(quality_lower=75, p=0.0)] # transforms self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) except ImportError: # package not installed, skip pass except Exception as e: LOGGER.info(f'{prefix}{e}') def __call__(self, im, labels, p=1.0): if self.transform and random.random() < p: new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])]) return im, labels def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False): # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std return TF.normalize(x, mean, std, inplace=inplace) def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD): # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean for i in range(3): x[:, i] = x[:, i] * std[i] + mean[i] return x def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): # HSV color-space augmentation if hgain or sgain or vgain: r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV)) dtype = im.dtype # uint8 x = np.arange(0, 256, dtype=r.dtype) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed def hist_equalize(im, clahe=True, bgr=False): # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255 yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV) if clahe: c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) yuv[:, :, 0] = c.apply(yuv[:, :, 0]) else: yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB def replicate(im, labels): # Replicate labels h, w = im.shape[:2] boxes = labels[:, 1:].astype(int) x1, y1, x2, y2 = boxes.T s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices x1b, y1b, x2b, y2b = boxes[i] bh, bw = y2b - y1b, x2b - x1b yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax] labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) return im, labels def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): # Resize and pad image while meeting stride-multiple constraints shape = im.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scaleup: # only scale down, do not scale up (for better val mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimum rectangle dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im, ratio, (dw, dh) def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] height = im.shape[0] + border[0] * 2 # shape(h,w,c) width = im.shape[1] + border[1] * 2 # Center C = np.eye(3) C[0, 2] = -im.shape[1] / 2 # x translation (pixels) C[1, 2] = -im.shape[0] / 2 # y translation (pixels) # Perspective P = np.eye(3) P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) # Rotation and Scale R = np.eye(3) a = random.uniform(-degrees, degrees) # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations s = random.uniform(1 - scale, 1 + scale) # s = 2 ** random.uniform(-scale, scale) R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) # Shear S = np.eye(3) S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) # Translation T = np.eye(3) T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) # Combined rotation matrix M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed if perspective: im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) else: # affine im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) # Visualize # import matplotlib.pyplot as plt # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() # ax[0].imshow(im[:, :, ::-1]) # base # ax[1].imshow(im2[:, :, ::-1]) # warped # Transform label coordinates n = len(targets) if n: use_segments = any(x.any() for x in segments) new = np.zeros((n, 4)) if use_segments: # warp segments segments = resample_segments(segments) # upsample for i, segment in enumerate(segments): xy = np.ones((len(segment), 3)) xy[:, :2] = segment xy = xy @ M.T # transform xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine # clip new[i] = segment2box(xy, width, height) else: # warp boxes xy = np.ones((n * 4, 3)) xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 xy = xy @ M.T # transform xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine # create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T # clip new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) # filter candidates i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) targets = targets[i] targets[:, 1:5] = new[i] return im, targets def copy_paste(im, labels, segments, p=0.5): # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) n = len(segments) if p and n: h, w, c = im.shape # height, width, channels im_new = np.zeros(im.shape, np.uint8) # calculate ioa first then select indexes randomly boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) n = len(indexes) for j in random.sample(list(indexes), k=round(p * n)): l, box, s = labels[j], boxes[j], segments[j] labels = np.concatenate((labels, [[l[0], *box]]), 0) segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED) result = cv2.flip(im, 1) # augment segments (flip left-right) i = cv2.flip(im_new, 1).astype(bool) im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug return im, labels, segments def cutout(im, labels, p=0.5): # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 if random.random() < p: h, w = im.shape[:2] scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction for s in scales: mask_h = random.randint(1, int(h * s)) # create random masks mask_w = random.randint(1, int(w * s)) # box xmin = max(0, random.randint(0, w) - mask_w // 2) ymin = max(0, random.randint(0, h) - mask_h // 2) xmax = min(w, xmin + mask_w) ymax = min(h, ymin + mask_h) # apply random color mask im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] # return unobscured labels if len(labels) and s > 0.03: box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32) ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area labels = labels[ioa < 0.60] # remove >60% obscured labels return labels def mixup(im, labels, im2, labels2): # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 im = (im * r + im2 * (1 - r)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) return im, labels def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates def classify_albumentations( augment=True, size=224, scale=(0.08, 1.0), ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33 hflip=0.5, vflip=0.0, jitter=0.4, mean=IMAGENET_MEAN, std=IMAGENET_STD, auto_aug=False): # YOLOv5 classification Albumentations (optional, only used if package is installed) prefix = colorstr('albumentations: ') try: import albumentations as A from albumentations.pytorch import ToTensorV2 check_version(A.__version__, '1.0.3', hard=True) # version requirement if augment: # Resize and crop T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)] if auto_aug: # TODO: implement AugMix, AutoAug & RandAug in albumentation LOGGER.info(f'{prefix}auto augmentations are currently not supported') else: if hflip > 0: T += [A.HorizontalFlip(p=hflip)] if vflip > 0: T += [A.VerticalFlip(p=vflip)] if jitter > 0: color_jitter = (float(jitter),) * 3 # repeat value for brightness, contrast, satuaration, 0 hue T += [A.ColorJitter(*color_jitter, 0)] else: # Use fixed crop for eval set (reproducibility) T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) return A.Compose(T) except ImportError: # package not installed, skip LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)') except Exception as e: LOGGER.info(f'{prefix}{e}') def classify_transforms(size=224): # Transforms to apply if albumentations not installed assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)' # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) class LetterBox: # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) def __init__(self, size=(640, 640), auto=False, stride=32): super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size self.auto = auto # pass max size integer, automatically solve for short side using stride self.stride = stride # used with auto def __call__(self, im): # im = np.array HWC imh, imw = im.shape[:2] r = min(self.h / imh, self.w / imw) # ratio of new/old h, w = round(imh * r), round(imw * r) # resized image hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype) im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) return im_out class CenterCrop: # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) def __init__(self, size=640): super().__init__() self.h, self.w = (size, size) if isinstance(size, int) else size def __call__(self, im): # im = np.array HWC imh, imw = im.shape[:2] m = min(imh, imw) # min dimension top, left = (imh - m) // 2, (imw - m) // 2 return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) class ToTensor: # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) def __init__(self, half=False): super().__init__() self.half = half def __call__(self, im): # im = np.array HWC in BGR order im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous im = torch.from_numpy(im) # to torch im = im.half() if self.half else im.float() # uint8 to fp16/32 im /= 255.0 # 0-255 to 0.0-1.0 return im ================================================ FILE: yolo/data/dataloaders/v5loader.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Dataloaders and dataset utils """ import contextlib import glob import hashlib import json import math import os import random import shutil import time from itertools import repeat from multiprocessing.pool import Pool, ThreadPool from pathlib import Path from threading import Thread from urllib.parse import urlparse import cv2 import numpy as np import psutil import torch import torchvision import yaml from PIL import ExifTags, Image, ImageOps from torch.utils.data import DataLoader, Dataset, dataloader, distributed from tqdm import tqdm from ultralytics.yolo.data.utils import check_dataset, unzip_file from ultralytics.yolo.utils import DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, is_colab, is_kaggle from ultralytics.yolo.utils.checks import check_requirements, check_yaml from ultralytics.yolo.utils.ops import clean_str, segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn from ultralytics.yolo.utils.torch_utils import torch_distributed_zero_first from .v5augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, letterbox, mixup, random_perspective) # Parameters HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # include image suffixes VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders # Get orientation exif tag for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == 'Orientation': break def get_hash(paths): # Returns a single hash value of a list of paths (files or dirs) size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes h = hashlib.md5(str(size).encode()) # hash sizes h.update(''.join(paths).encode()) # hash paths return h.hexdigest() # return hash def exif_size(img): # Returns exif-corrected PIL size s = img.size # (width, height) with contextlib.suppress(Exception): rotation = dict(img._getexif().items())[orientation] if rotation in [6, 8]: # rotation 270 or 90 s = (s[1], s[0]) return s def exif_transpose(image): """ Transpose a PIL image accordingly if it has an EXIF Orientation tag. Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose() :param image: The image to transpose. :return: An image. """ exif = image.getexif() orientation = exif.get(0x0112, 1) # default 1 if orientation > 1: method = { 2: Image.FLIP_LEFT_RIGHT, 3: Image.ROTATE_180, 4: Image.FLIP_TOP_BOTTOM, 5: Image.TRANSPOSE, 6: Image.ROTATE_270, 7: Image.TRANSVERSE, 8: Image.ROTATE_90}.get(orientation) if method is not None: image = image.transpose(method) del exif[0x0112] image.info["exif"] = exif.tobytes() return image def seed_worker(worker_id): # Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader worker_seed = torch.initial_seed() % 2 ** 32 np.random.seed(worker_seed) random.seed(worker_seed) def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=None, augment=False, cache=False, pad=0.0, rect=False, rank=-1, workers=8, image_weights=False, close_mosaic=False, min_items=0, prefix='', shuffle=False, seed=0): if rect and shuffle: LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') shuffle = False with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = LoadImagesAndLabels( path, imgsz, batch_size, augment=augment, # augmentation hyp=hyp, # hyperparameters rect=rect, # rectangular batches cache_images=cache, single_cls=single_cls, stride=int(stride), pad=pad, image_weights=image_weights, min_items=min_items, prefix=prefix) batch_size = min(batch_size, len(dataset)) nd = torch.cuda.device_count() # number of CUDA devices nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader # DataLoader allows attribute updates generator = torch.Generator() generator.manual_seed(6148914691236517205 + seed + RANK) return loader(dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, pin_memory=PIN_MEMORY, collate_fn=LoadImagesAndLabels.collate_fn, worker_init_fn=seed_worker, generator=generator), dataset class InfiniteDataLoader(dataloader.DataLoader): """ Dataloader that reuses workers Uses same syntax as vanilla DataLoader """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) self.iterator = super().__iter__() def __len__(self): return len(self.batch_sampler.sampler) def __iter__(self): for _ in range(len(self)): yield next(self.iterator) class _RepeatSampler: """ Sampler that repeats forever Args: sampler (Sampler) """ def __init__(self, sampler): self.sampler = sampler def __iter__(self): while True: yield from iter(self.sampler) class LoadScreenshots: # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"` def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None): # source = [screen_number left top width height] (pixels) check_requirements('mss') import mss source, *params = source.split() self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0 if len(params) == 1: self.screen = int(params[0]) elif len(params) == 4: left, top, width, height = (int(x) for x in params) elif len(params) == 5: self.screen, left, top, width, height = (int(x) for x in params) self.img_size = img_size self.stride = stride self.transforms = transforms self.auto = auto self.mode = 'stream' self.frame = 0 self.sct = mss.mss() # Parse monitor shape monitor = self.sct.monitors[self.screen] self.top = monitor["top"] if top is None else (monitor["top"] + top) self.left = monitor["left"] if left is None else (monitor["left"] + left) self.width = width or monitor["width"] self.height = height or monitor["height"] self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height} def __iter__(self): return self def __next__(self): # mss screen capture: get raw pixels from the screen as np array im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: " if self.transforms: im = self.transforms(im0) # transforms else: im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB im = np.ascontiguousarray(im) # contiguous self.frame += 1 return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s class LoadImages: # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line path = Path(path).read_text().rsplit() files = [] for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: p = str(Path(p).resolve()) if '*' in p: files.extend(sorted(glob.glob(p, recursive=True))) # glob elif os.path.isdir(p): files.extend(sorted(glob.glob(os.path.join(p, '*.*')))) # dir elif os.path.isfile(p): files.append(p) # files else: raise FileNotFoundError(f'{p} does not exist') images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] ni, nv = len(images), len(videos) self.img_size = img_size self.stride = stride self.files = images + videos self.nf = ni + nv # number of files self.video_flag = [False] * ni + [True] * nv self.mode = 'image' self.auto = auto self.transforms = transforms # optional self.vid_stride = vid_stride # video frame-rate stride if any(videos): self._new_video(videos[0]) # new video else: self.cap = None assert self.nf > 0, f'No images or videos found in {p}. ' \ f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}' def __iter__(self): self.count = 0 return self def __next__(self): if self.count == self.nf: raise StopIteration path = self.files[self.count] if self.video_flag[self.count]: # Read video self.mode = 'video' for _ in range(self.vid_stride): self.cap.grab() ret_val, im0 = self.cap.retrieve() while not ret_val: self.count += 1 self.cap.release() if self.count == self.nf: # last video raise StopIteration path = self.files[self.count] self._new_video(path) ret_val, im0 = self.cap.read() self.frame += 1 # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' else: # Read image self.count += 1 im0 = cv2.imread(path) # BGR assert im0 is not None, f'Image Not Found {path}' s = f'image {self.count}/{self.nf} {path}: ' if self.transforms: im = self.transforms(im0) # transforms else: im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB im = np.ascontiguousarray(im) # contiguous return path, im, im0, self.cap, s def _new_video(self, path): # Create a new video capture object self.frame = 0 self.cap = cv2.VideoCapture(path) self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 def _cv2_rotate(self, im): # Rotate a cv2 video manually if self.orientation == 0: return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) elif self.orientation == 180: return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) elif self.orientation == 90: return cv2.rotate(im, cv2.ROTATE_180) return im def __len__(self): return self.nf # number of files class LoadStreams: # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams` def __init__(self, sources='file.streams', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): torch.backends.cudnn.benchmark = True # faster for fixed-size inference self.mode = 'stream' self.img_size = img_size self.stride = stride self.vid_stride = vid_stride # video frame-rate stride sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources] n = len(sources) self.sources = [clean_str(x) for x in sources] # clean source names for later self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n for i, s in enumerate(sources): # index, source # Start thread to read frames from video stream st = f'{i + 1}/{n}: {s}... ' if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/Zgi9g1ksQHc' check_requirements(('pafy', 'youtube_dl==2020.12.2')) import pafy s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam if s == 0: assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.' assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.' cap = cv2.VideoCapture(s) assert cap.isOpened(), f'{st}Failed to open {s}' w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback _, self.imgs[i] = cap.read() # guarantee first frame self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)") self.threads[i].start() LOGGER.info('') # newline # check for common shapes s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs]) self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal self.auto = auto and self.rect self.transforms = transforms # optional if not self.rect: LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.') def update(self, i, cap, stream): # Read stream `i` frames in daemon thread n, f = 0, self.frames[i] # frame number, frame array while cap.isOpened() and n < f: n += 1 cap.grab() # .read() = .grab() followed by .retrieve() if n % self.vid_stride == 0: success, im = cap.retrieve() if success: self.imgs[i] = im else: LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.') self.imgs[i] = np.zeros_like(self.imgs[i]) cap.open(stream) # re-open stream if signal was lost time.sleep(0.0) # wait time def __iter__(self): self.count = -1 return self def __next__(self): self.count += 1 if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit cv2.destroyAllWindows() raise StopIteration im0 = self.imgs.copy() if self.transforms: im = np.stack([self.transforms(x) for x in im0]) # transforms else: im = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0] for x in im0]) # resize im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW im = np.ascontiguousarray(im) # contiguous return self.sources, im, im0, None, '' def __len__(self): return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years def img2label_paths(img_paths): # Define label paths as a function of image paths sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] class LoadImagesAndLabels(Dataset): # YOLOv5 train_loader/val_loader, loads images and labels for training and validation cache_version = 0.6 # dataset labels *.cache version rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0, min_items=0, prefix=''): self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride self.path = path self.albumentations = Albumentations(size=img_size) if augment else None try: f = [] # image files for p in path if isinstance(path, list) else [path]: p = Path(p) # os-agnostic if p.is_dir(): # dir f += glob.glob(str(p / '**' / '*.*'), recursive=True) # f = list(p.rglob('*.*')) # pathlib elif p.is_file(): # file with open(p) as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep f += [x.replace('./', parent, 1) if x.startswith('./') else x for x in t] # to global path # f += [p.parent / x.lstrip(os.sep) for x in t] # to global path (pathlib) else: raise FileNotFoundError(f'{prefix}{p} does not exist') self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib assert self.im_files, f'{prefix}No images found' except Exception as e: raise FileNotFoundError(f'{prefix}Error loading data from {path}: {e}\n{HELP_URL}') from e # Check cache self.label_files = img2label_paths(self.im_files) # labels cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') try: cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict assert cache['version'] == self.cache_version # matches current version assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash except Exception: cache, exists = self.cache_labels(cache_path, prefix), False # run cache ops # Display cache nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total if exists and LOCAL_RANK in {-1, 0}: d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt" tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results if cache['msgs']: LOGGER.info('\n'.join(cache['msgs'])) # display warnings assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}' # Read cache [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items labels, shapes, self.segments = zip(*cache.values()) nl = len(np.concatenate(labels, 0)) # number of labels assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}' self.labels = list(labels) self.shapes = np.array(shapes) self.im_files = list(cache.keys()) # update self.label_files = img2label_paths(cache.keys()) # update # Filter images if min_items: include = np.array([len(x) >= min_items for x in self.labels]).nonzero()[0].astype(int) LOGGER.info(f'{prefix}{n - len(include)}/{n} images filtered from dataset') self.im_files = [self.im_files[i] for i in include] self.label_files = [self.label_files[i] for i in include] self.labels = [self.labels[i] for i in include] self.segments = [self.segments[i] for i in include] self.shapes = self.shapes[include] # wh # Create indices n = len(self.shapes) # number of images bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index nb = bi[-1] + 1 # number of batches self.batch = bi # batch index of image self.n = n self.indices = range(n) # Update labels include_class = [] # filter labels to include only these classes (optional) include_class_array = np.array(include_class).reshape(1, -1) for i, (label, segment) in enumerate(zip(self.labels, self.segments)): if include_class: j = (label[:, 0:1] == include_class_array).any(1) self.labels[i] = label[j] if segment: self.segments[i] = segment[j] if single_cls: # single-class training, merge all classes into 0 self.labels[i][:, 0] = 0 # Rectangular Training if self.rect: # Sort by aspect ratio s = self.shapes # wh ar = s[:, 1] / s[:, 0] # aspect ratio irect = ar.argsort() self.im_files = [self.im_files[i] for i in irect] self.label_files = [self.label_files[i] for i in irect] self.labels = [self.labels[i] for i in irect] self.segments = [self.segments[i] for i in irect] self.shapes = s[irect] # wh ar = ar[irect] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride # Cache images into RAM/disk for faster training if cache_images == 'ram' and not self.check_cache_ram(prefix=prefix): cache_images = False self.ims = [None] * n self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] if cache_images: b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes self.im_hw0, self.im_hw = [None] * n, [None] * n fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image results = ThreadPool(NUM_THREADS).imap(fcn, range(n)) pbar = tqdm(enumerate(results), total=n, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0) for i, x in pbar: if cache_images == 'disk': b += self.npy_files[i].stat().st_size else: # 'ram' self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) b += self.ims[i].nbytes pbar.desc = f'{prefix}Caching images ({b / gb:.1f}GB {cache_images})' pbar.close() def check_cache_ram(self, safety_margin=0.1, prefix=''): # Check image caching requirements vs available memory b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes n = min(self.n, 30) # extrapolate from 30 random images for _ in range(n): im = cv2.imread(random.choice(self.im_files)) # sample image ratio = self.img_size / max(im.shape[0], im.shape[1]) # max(h, w) # ratio b += im.nbytes * ratio ** 2 mem_required = b * self.n / n # GB required to cache dataset into RAM mem = psutil.virtual_memory() cache = mem_required * (1 + safety_margin) < mem.available # to cache or not to cache, that is the question if not cache: LOGGER.info(f"{prefix}{mem_required / gb:.1f}GB RAM required, " f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, " f"{'caching images ✅' if cache else 'not caching images ⚠️'}") return cache def cache_labels(self, path=Path('./labels.cache'), prefix=''): # Cache dataset labels, check images and read shapes x = {} # dict nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages desc = f"{prefix}Scanning {path.parent / path.stem}..." with Pool(NUM_THREADS) as pool: pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))), desc=desc, total=len(self.im_files), bar_format=TQDM_BAR_FORMAT) for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: nm += nm_f nf += nf_f ne += ne_f nc += nc_f if im_file: x[im_file] = [lb, shape, segments] if msg: msgs.append(msg) pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt" pbar.close() if msgs: LOGGER.info('\n'.join(msgs)) if nf == 0: LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}') x['hash'] = get_hash(self.label_files + self.im_files) x['results'] = nf, nm, ne, nc, len(self.im_files) x['msgs'] = msgs # warnings x['version'] = self.cache_version # cache version try: np.save(path, x) # save cache for next time path.with_suffix('.cache.npy').rename(path) # remove .npy suffix LOGGER.info(f'{prefix}New cache created: {path}') except Exception as e: LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}') # not writeable return x def __len__(self): return len(self.im_files) # def __iter__(self): # self.count = -1 # print('ran dataset iter') # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) # return self def __getitem__(self, index): index = self.indices[index] # linear, shuffled, or image_weights hyp = self.hyp mosaic = self.mosaic and random.random() < hyp['mosaic'] if mosaic: # Load mosaic img, labels = self.load_mosaic(index) shapes = None # MixUp augmentation if random.random() < hyp['mixup']: img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1))) else: # Load image img, (h0, w0), (h, w) = self.load_image(index) # Letterbox shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling labels = self.labels[index].copy() if labels.size: # normalized xywh to pixel xyxy format labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) if self.augment: img, labels = random_perspective(img, labels, degrees=hyp['degrees'], translate=hyp['translate'], scale=hyp['scale'], shear=hyp['shear'], perspective=hyp['perspective']) nl = len(labels) # number of labels if nl: labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) if self.augment: # Albumentations img, labels = self.albumentations(img, labels) nl = len(labels) # update after albumentations # HSV color-space augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) # Flip up-down if random.random() < hyp['flipud']: img = np.flipud(img) if nl: labels[:, 2] = 1 - labels[:, 2] # Flip left-right if random.random() < hyp['fliplr']: img = np.fliplr(img) if nl: labels[:, 1] = 1 - labels[:, 1] # Cutouts # labels = cutout(img, labels, p=0.5) # nl = len(labels) # update after cutout labels_out = torch.zeros((nl, 6)) if nl: labels_out[:, 1:] = torch.from_numpy(labels) # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) return torch.from_numpy(img), labels_out, self.im_files[index], shapes def load_image(self, i): # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw) im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i], if im is None: # not cached in RAM if fn.exists(): # load npy im = np.load(fn) else: # read image im = cv2.imread(f) # BGR assert im is not None, f'Image Not Found {f}' h0, w0 = im.shape[:2] # orig hw r = self.img_size / max(h0, w0) # ratio if r != 1: # if sizes are not equal interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA im = cv2.resize(im, (math.ceil(w0 * r), math.ceil(h0 * r)), interpolation=interp) return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized def cache_images_to_disk(self, i): # Saves an image as an *.npy file for faster loading f = self.npy_files[i] if not f.exists(): np.save(f.as_posix(), cv2.imread(self.im_files[i])) def load_mosaic(self, index): # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic labels4, segments4 = [], [] s = self.img_size yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices random.shuffle(indices) for i, index in enumerate(indices): # Load image img, _, (h, w) = self.load_image(index) # place img in img4 if i == 0: # top left img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b # Labels labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padw, padh) for x in segments] labels4.append(labels) segments4.extend(segments) # Concat/clip labels labels4 = np.concatenate(labels4, 0) for x in (labels4[:, 1:], *segments4): np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() # img4, labels4 = replicate(img4, labels4) # replicate # Augment img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste']) img4, labels4 = random_perspective(img4, labels4, segments4, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], border=self.mosaic_border) # border to remove return img4, labels4 def load_mosaic9(self, index): # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic labels9, segments9 = [], [] s = self.img_size indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices random.shuffle(indices) hp, wp = -1, -1 # height, width previous for i, index in enumerate(indices): # Load image img, _, (h, w) = self.load_image(index) # place img in img9 if i == 0: # center img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles h0, w0 = h, w c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates elif i == 1: # top c = s, s - h, s + w, s elif i == 2: # top right c = s + wp, s - h, s + wp + w, s elif i == 3: # right c = s + w0, s, s + w0 + w, s + h elif i == 4: # bottom right c = s + w0, s + hp, s + w0 + w, s + hp + h elif i == 5: # bottom c = s + w0 - w, s + h0, s + w0, s + h0 + h elif i == 6: # bottom left c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h elif i == 7: # left c = s - w, s + h0 - h, s, s + h0 elif i == 8: # top left c = s - w, s + h0 - hp - h, s, s + h0 - hp padx, pady = c[:2] x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords # Labels labels, segments = self.labels[index].copy(), self.segments[index].copy() if labels.size: labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padx, pady) for x in segments] labels9.append(labels) segments9.extend(segments) # Image img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] hp, wp = h, w # height, width previous # Offset yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] # Concat/clip labels labels9 = np.concatenate(labels9, 0) labels9[:, [1, 3]] -= xc labels9[:, [2, 4]] -= yc c = np.array([xc, yc]) # centers segments9 = [x - c for x in segments9] for x in (labels9[:, 1:], *segments9): np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() # img9, labels9 = replicate(img9, labels9) # replicate # Augment img9, labels9, segments9 = copy_paste(img9, labels9, segments9, p=self.hyp['copy_paste']) img9, labels9 = random_perspective(img9, labels9, segments9, degrees=self.hyp['degrees'], translate=self.hyp['translate'], scale=self.hyp['scale'], shear=self.hyp['shear'], perspective=self.hyp['perspective'], border=self.mosaic_border) # border to remove return img9, labels9 @staticmethod def collate_fn(batch): # YOLOv8 collate function, outputs dict im, label, path, shapes = zip(*batch) # transposed for i, lb in enumerate(label): lb[:, 0] = i # add target image index for build_targets() batch_idx, cls, bboxes = torch.cat(label, 0).split((1, 1, 4), dim=1) return { 'ori_shape': tuple((x[0] if x else None) for x in shapes), 'ratio_pad': tuple((x[1] if x else None) for x in shapes), 'im_file': path, 'img': torch.stack(im, 0), 'cls': cls, 'bboxes': bboxes, 'batch_idx': batch_idx.view(-1)} @staticmethod def collate_fn_old(batch): # YOLOv5 original collate function im, label, path, shapes = zip(*batch) # transposed for i, lb in enumerate(label): lb[:, 0] = i # add target image index for build_targets() return torch.stack(im, 0), torch.cat(label, 0), path, shapes # Ancillary functions -------------------------------------------------------------------------------------------------- def flatten_recursive(path=DATASETS_DIR / 'coco128'): # Flatten a recursive directory by bringing all files to top level new_path = Path(f'{str(path)}_flat') if os.path.exists(new_path): shutil.rmtree(new_path) # delete output folder os.makedirs(new_path) # make new output folder for file in tqdm(glob.glob(f'{str(Path(path))}/**/*.*', recursive=True)): shutil.copyfile(file, new_path / Path(file).name) def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders import *; extract_boxes() # Convert detection dataset into classification dataset, with one directory per class path = Path(path) # images dir shutil.rmtree(path / 'classification') if (path / 'classification').is_dir() else None # remove existing files = list(path.rglob('*.*')) n = len(files) # number of files for im_file in tqdm(files, total=n): if im_file.suffix[1:] in IMG_FORMATS: # image im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB h, w = im.shape[:2] # labels lb_file = Path(img2label_paths([str(im_file)])[0]) if Path(lb_file).exists(): with open(lb_file) as f: lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels for j, x in enumerate(lb): c = int(x[0]) # class f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename if not f.parent.is_dir(): f.parent.mkdir(parents=True) b = x[1:] * [w, h, w, h] # box # b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.2 + 3 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files Usage: from utils.dataloaders import *; autosplit() Arguments path: Path to images directory weights: Train, val, test weights (list, tuple) annotated_only: Only use images with an annotated txt file """ path = Path(path) # images dir files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only n = len(files) # number of files random.seed(0) # for reproducibility indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files for x in txt: if (path.parent / x).exists(): (path.parent / x).unlink() # remove existing print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only) for i, img in tqdm(zip(indices, files), total=n): if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label with open(path.parent / txt[i], 'a') as f: f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file def verify_image_label(args): # Verify one image-label pair im_file, lb_file, prefix = args nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments try: # verify images im = Image.open(im_file) im.verify() # PIL verify shape = exif_size(im) # image size assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels' assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}' if im.format.lower() in ('jpg', 'jpeg'): with open(im_file, 'rb') as f: f.seek(-2, 2) if f.read() != b'\xff\xd9': # corrupt JPEG ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' # verify labels if os.path.isfile(lb_file): nf = 1 # label found with open(lb_file) as f: lb = [x.split() for x in f.read().strip().splitlines() if len(x)] if any(len(x) > 6 for x in lb): # is segment classes = np.array([x[0] for x in lb], dtype=np.float32) segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) lb = np.array(lb, dtype=np.float32) nl = len(lb) if nl: assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}' assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}' _, i = np.unique(lb, axis=0, return_index=True) if len(i) < nl: # duplicate row check lb = lb[i] # remove duplicates if segments: segments = [segments[x] for x in i] msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed' else: ne = 1 # label empty lb = np.zeros((0, 5), dtype=np.float32) else: nm = 1 # label missing lb = np.zeros((0, 5), dtype=np.float32) return im_file, lb, shape, segments, nm, nf, ne, nc, msg except Exception as e: nc = 1 msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}' return [None, None, None, None, nm, nf, ne, nc, msg] class HUBDatasetStats(): """ Class for generating HUB dataset JSON and `-hub` dataset directory Arguments path: Path to data.yaml or data.zip (with data.yaml inside data.zip) autodownload: Attempt to download dataset if not found locally Usage from utils.dataloaders import HUBDatasetStats stats = HUBDatasetStats('coco128.yaml', autodownload=True) # usage 1 stats = HUBDatasetStats('path/to/coco128.zip') # usage 2 stats.get_json(save=False) stats.process_images() """ def __init__(self, path='coco128.yaml', autodownload=False): # Initialize class zipped, data_dir, yaml_path = self._unzip(Path(path)) try: with open(check_yaml(yaml_path), errors='ignore') as f: data = yaml.safe_load(f) # data dict if zipped: data['path'] = data_dir except Exception as e: raise Exception("error/HUB/dataset_stats/yaml_load") from e check_dataset(data, autodownload) # download dataset if missing self.hub_dir = Path(data['path'] + '-hub') self.im_dir = self.hub_dir / 'images' self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images self.stats = {'nc': data['nc'], 'names': list(data['names'].values())} # statistics dictionary self.data = data @staticmethod def _find_yaml(dir): # Return data.yaml file files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive assert files, f'No *.yaml file found in {dir}' if len(files) > 1: files = [f for f in files if f.stem == dir.stem] # prefer *.yaml files that match dir name assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed' assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}' return files[0] def _unzip(self, path): # Unzip data.zip if not str(path).endswith('.zip'): # path is data.yaml return False, None, path assert Path(path).is_file(), f'Error unzipping {path}, file not found' unzip_file(path, path=path.parent) dir = path.with_suffix('') # dataset directory == zip name assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path def _hub_ops(self, f, max_dim=1920): # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing f_new = self.im_dir / Path(f).name # dataset-hub image filename try: # use PIL im = Image.open(f) r = max_dim / max(im.height, im.width) # ratio if r < 1.0: # image too large im = im.resize((int(im.width * r), int(im.height * r))) im.save(f_new, 'JPEG', quality=50, optimize=True) # save except Exception as e: # use OpenCV LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}') im = cv2.imread(f) im_height, im_width = im.shape[:2] r = max_dim / max(im_height, im_width) # ratio if r < 1.0: # image too large im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA) cv2.imwrite(str(f_new), im) def get_json(self, save=False, verbose=False): # Return dataset JSON for Ultralytics HUB def _round(labels): # Update labels to integer class and 6 decimal place floats return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels] for split in 'train', 'val', 'test': if self.data.get(split) is None: self.stats[split] = None # i.e. no test set continue dataset = LoadImagesAndLabels(self.data[split]) # load dataset x = np.array([ np.bincount(label[:, 0].astype(int), minlength=self.data['nc']) for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics')]) # shape(128x80) self.stats[split] = { 'instance_stats': { 'total': int(x.sum()), 'per_class': x.sum(0).tolist()}, 'image_stats': { 'total': dataset.n, 'unlabelled': int(np.all(x == 0, 1).sum()), 'per_class': (x > 0).sum(0).tolist()}, 'labels': [{ str(Path(k).name): _round(v.tolist())} for k, v in zip(dataset.im_files, dataset.labels)]} # Save, print and return if save: stats_path = self.hub_dir / 'stats.json' print(f'Saving {stats_path.resolve()}...') with open(stats_path, 'w') as f: json.dump(self.stats, f) # save stats.json if verbose: print(json.dumps(self.stats, indent=2, sort_keys=False)) return self.stats def process_images(self): # Compress images for Ultralytics HUB for split in 'train', 'val', 'test': if self.data.get(split) is None: continue dataset = LoadImagesAndLabels(self.data[split]) # load dataset desc = f'{split} images' for _ in tqdm(ThreadPool(NUM_THREADS).imap(self._hub_ops, dataset.im_files), total=dataset.n, desc=desc): pass print(f'Done. All images saved to {self.im_dir}') return self.im_dir # Classification dataloaders ------------------------------------------------------------------------------------------- class ClassificationDataset(torchvision.datasets.ImageFolder): """ YOLOv5 Classification Dataset. Arguments root: Dataset path transform: torchvision transforms, used by default album_transform: Albumentations transforms, used if installed """ def __init__(self, root, augment, imgsz, cache=False): super().__init__(root=root) self.torch_transforms = classify_transforms(imgsz) self.album_transforms = classify_albumentations(augment, imgsz) if augment else None self.cache_ram = cache is True or cache == 'ram' self.cache_disk = cache == 'disk' self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im def __getitem__(self, i): f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image if self.cache_ram and im is None: im = self.samples[i][3] = cv2.imread(f) elif self.cache_disk: if not fn.exists(): # load npy np.save(fn.as_posix(), cv2.imread(f)) im = np.load(fn) else: # read image im = cv2.imread(f) # BGR if self.album_transforms: sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))["image"] else: sample = self.torch_transforms(im) return sample, j def create_classification_dataloader(path, imgsz=224, batch_size=16, augment=True, cache=False, rank=-1, workers=8, shuffle=True): # Returns Dataloader object to be used with YOLOv5 Classifier with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache) batch_size = min(batch_size, len(dataset)) nd = torch.cuda.device_count() nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) generator = torch.Generator() generator.manual_seed(6148914691236517205 + RANK) return InfiniteDataLoader(dataset, batch_size=batch_size, shuffle=shuffle and sampler is None, num_workers=nw, sampler=sampler, pin_memory=PIN_MEMORY, worker_init_fn=seed_worker, generator=generator) # or DataLoader(persistent_workers=True) ================================================ FILE: yolo/data/dataset.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from itertools import repeat from multiprocessing.pool import Pool from pathlib import Path import torchvision from tqdm import tqdm from ..utils import NUM_THREADS, TQDM_BAR_FORMAT from .augment import * from .base import BaseDataset from .utils import HELP_URL, LOCAL_RANK, get_hash, img2label_paths, verify_image_label class YOLODataset(BaseDataset): cache_version = 1.0 # dataset labels *.cache version, >= 1.0 for YOLOv8 rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] """YOLO Dataset. Args: img_path (str): image path. prefix (str): prefix. """ def __init__( self, img_path, imgsz=640, label_path=None, cache=False, augment=True, hyp=None, prefix="", rect=False, batch_size=None, stride=32, pad=0.0, single_cls=False, use_segments=False, use_keypoints=False, ): self.use_segments = use_segments self.use_keypoints = use_keypoints assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints." super().__init__(img_path, imgsz, label_path, cache, augment, hyp, prefix, rect, batch_size, stride, pad, single_cls) def cache_labels(self, path=Path("./labels.cache")): # Cache dataset labels, check images and read shapes x = {"labels": []} nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages desc = f"{self.prefix}Scanning {path.parent / path.stem}..." with Pool(NUM_THREADS) as pool: pbar = tqdm( pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(self.prefix), repeat(self.use_keypoints))), desc=desc, total=len(self.im_files), bar_format=TQDM_BAR_FORMAT, ) for im_file, lb, shape, segments, keypoint, nm_f, nf_f, ne_f, nc_f, msg in pbar: nm += nm_f nf += nf_f ne += ne_f nc += nc_f if im_file: x["labels"].append( dict( im_file=im_file, shape=shape, cls=lb[:, 0:1], # n, 1 bboxes=lb[:, 1:], # n, 4 segments=segments, keypoints=keypoint, normalized=True, bbox_format="xywh", )) if msg: msgs.append(msg) pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt" pbar.close() if msgs: LOGGER.info("\n".join(msgs)) if nf == 0: LOGGER.warning(f"{self.prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}") x["hash"] = get_hash(self.label_files + self.im_files) x["results"] = nf, nm, ne, nc, len(self.im_files) x["msgs"] = msgs # warnings x["version"] = self.cache_version # cache version try: np.save(path, x) # save cache for next time path.with_suffix(".cache.npy").rename(path) # remove .npy suffix LOGGER.info(f"{self.prefix}New cache created: {path}") except Exception as e: LOGGER.warning( f"{self.prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}") # not writeable return x def get_labels(self): self.label_files = img2label_paths(self.im_files) cache_path = Path(self.label_files[0]).parent.with_suffix(".cache") try: cache, exists = np.load(str(cache_path), allow_pickle=True).item(), True # load dict assert cache["version"] == self.cache_version # matches current version assert cache["hash"] == get_hash(self.label_files + self.im_files) # identical hash except Exception: cache, exists = self.cache_labels(cache_path), False # run cache ops # Display cache nf, nm, ne, nc, n = cache.pop("results") # found, missing, empty, corrupt, total if exists and LOCAL_RANK in {-1, 0}: d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt" tqdm(None, desc=self.prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results if cache["msgs"]: LOGGER.info("\n".join(cache["msgs"])) # display warnings assert nf > 0, f"{self.prefix}No labels found in {cache_path}, can not start training. {HELP_URL}" # Read cache [cache.pop(k) for k in ("hash", "version", "msgs")] # remove items labels = cache["labels"] nl = len(np.concatenate([label["cls"] for label in labels], 0)) # number of labels assert nl > 0, f"{self.prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}" return labels # TODO: use hyp config to set all these augmentations def build_transforms(self, hyp=None): if self.augment: mosaic = self.augment and not self.rect transforms = mosaic_transforms(self, self.imgsz, hyp) if mosaic else affine_transforms(self.imgsz, hyp) else: transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)]) transforms.append( Format(bbox_format="xywh", normalize=True, return_mask=self.use_segments, return_keypoint=self.use_keypoints, batch_idx=True)) return transforms def close_mosaic(self, hyp): self.transforms = affine_transforms(self.imgsz, hyp) self.transforms.append( Format(bbox_format="xywh", normalize=True, return_mask=self.use_segments, return_keypoint=self.use_keypoints, batch_idx=True)) def update_labels_info(self, label): """custom your label format here""" # NOTE: cls is not with bboxes now, classification and semantic segmentation need an independent cls label # we can make it also support classification and semantic segmentation by add or remove some dict keys there. bboxes = label.pop("bboxes") segments = label.pop("segments") keypoints = label.pop("keypoints", None) bbox_format = label.pop("bbox_format") normalized = label.pop("normalized") label["instances"] = Instances(bboxes, segments, keypoints, bbox_format=bbox_format, normalized=normalized) return label @staticmethod def collate_fn(batch): # TODO: returning a dict can make thing easier and cleaner when using dataset in training # but I don't know if this will slow down a little bit. new_batch = {} keys = batch[0].keys() values = list(zip(*[list(b.values()) for b in batch])) for i, k in enumerate(keys): value = values[i] if k == "img": value = torch.stack(value, 0) if k in ["masks", "keypoints", "bboxes", "cls"]: value = torch.cat(value, 0) new_batch[k] = value new_batch["batch_idx"] = list(new_batch["batch_idx"]) for i in range(len(new_batch["batch_idx"])): new_batch["batch_idx"][i] += i # add target image index for build_targets() new_batch["batch_idx"] = torch.cat(new_batch["batch_idx"], 0) return new_batch # Classification dataloaders ------------------------------------------------------------------------------------------- class ClassificationDataset(torchvision.datasets.ImageFolder): """ YOLOv5 Classification Dataset. Arguments root: Dataset path transform: torchvision transforms, used by default album_transform: Albumentations transforms, used if installed """ def __init__(self, root, augment, imgsz, cache=False): super().__init__(root=root) self.torch_transforms = classify_transforms(imgsz) self.album_transforms = classify_albumentations(augment, imgsz) if augment else None self.cache_ram = cache is True or cache == "ram" self.cache_disk = cache == "disk" self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples] # file, index, npy, im def __getitem__(self, i): f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image if self.cache_ram and im is None: im = self.samples[i][3] = cv2.imread(f) elif self.cache_disk: if not fn.exists(): # load npy np.save(fn.as_posix(), cv2.imread(f)) im = np.load(fn) else: # read image im = cv2.imread(f) # BGR if self.album_transforms: sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))["image"] else: sample = self.torch_transforms(im) return {'img': sample, 'cls': j} def __len__(self) -> int: return len(self.samples) # TODO: support semantic segmentation class SemanticDataset(BaseDataset): def __init__(self): pass ================================================ FILE: yolo/data/dataset_wrappers.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import collections from copy import deepcopy from .augment import LetterBox class MixAndRectDataset: """A wrapper of multiple images mixed dataset. Args: dataset (:obj:`BaseDataset`): The dataset to be mixed. transforms (Sequence[dict]): config dict to be composed. """ def __init__(self, dataset): self.dataset = dataset self.imgsz = dataset.imgsz def __len__(self): return len(self.dataset) def __getitem__(self, index): labels = deepcopy(self.dataset[index]) for transform in self.dataset.transforms.tolist(): # mosaic and mixup if hasattr(transform, "get_indexes"): indexes = transform.get_indexes(self.dataset) if not isinstance(indexes, collections.abc.Sequence): indexes = [indexes] mix_labels = [deepcopy(self.dataset[index]) for index in indexes] labels["mix_labels"] = mix_labels if self.dataset.rect and isinstance(transform, LetterBox): transform.new_shape = self.dataset.batch_shapes[self.dataset.batch[index]] labels = transform(labels) if "mix_labels" in labels: labels.pop("mix_labels") return labels ================================================ FILE: yolo/data/datasets/Argoverse.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI # Example usage: python train.py --data Argoverse.yaml # parent # ├── yolov5 # └── datasets # └── Argoverse ← downloads here (31.3 GB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/Argoverse # dataset root dir train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview # Classes names: 0: person 1: bicycle 2: car 3: motorcycle 4: bus 5: truck 6: traffic_light 7: stop_sign # Download script/URL (optional) --------------------------------------------------------------------------------------- download: | import json from tqdm import tqdm from utils.general import download, Path def argoverse2yolo(set): labels = {} a = json.load(open(set, "rb")) for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."): img_id = annot['image_id'] img_name = a['images'][img_id]['name'] img_label_name = f'{img_name[:-3]}txt' cls = annot['category_id'] # instance class id x_center, y_center, width, height = annot['bbox'] x_center = (x_center + width / 2) / 1920.0 # offset and scale y_center = (y_center + height / 2) / 1200.0 # offset and scale width /= 1920.0 # scale height /= 1200.0 # scale img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']] if not img_dir.exists(): img_dir.mkdir(parents=True, exist_ok=True) k = str(img_dir / img_label_name) if k not in labels: labels[k] = [] labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n") for k in labels: with open(k, "w") as f: f.writelines(labels[k]) # Download dir = Path(yaml['path']) # dataset root dir urls = ['https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip'] download(urls, dir=dir, delete=False) # Convert annotations_dir = 'Argoverse-HD/annotations/' (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images' for d in "train.json", "val.json": argoverse2yolo(dir / annotations_dir / d) # convert VisDrone annotations to YOLO labels ================================================ FILE: yolo/data/datasets/GlobalWheat2020.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan # Example usage: python train.py --data GlobalWheat2020.yaml # parent # ├── yolov5 # └── datasets # └── GlobalWheat2020 ← downloads here (7.0 GB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/GlobalWheat2020 # dataset root dir train: # train images (relative to 'path') 3422 images - images/arvalis_1 - images/arvalis_2 - images/arvalis_3 - images/ethz_1 - images/rres_1 - images/inrae_1 - images/usask_1 val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1) - images/ethz_1 test: # test images (optional) 1276 images - images/utokyo_1 - images/utokyo_2 - images/nau_1 - images/uq_1 # Classes names: 0: wheat_head # Download script/URL (optional) --------------------------------------------------------------------------------------- download: | from utils.general import download, Path # Download dir = Path(yaml['path']) # dataset root dir urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip'] download(urls, dir=dir) # Make Directories for p in 'annotations', 'images', 'labels': (dir / p).mkdir(parents=True, exist_ok=True) # Move for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \ 'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1': (dir / p).rename(dir / 'images' / p) # move to /images f = (dir / p).with_suffix('.json') # json file if f.exists(): f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations ================================================ FILE: yolo/data/datasets/ImageNet.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University # Simplified class names from https://github.com/anishathalye/imagenet-simple-labels # Example usage: python classify/train.py --data imagenet # parent # ├── yolov5 # └── datasets # └── imagenet ← downloads here (144 GB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/imagenet # dataset root dir train: train # train images (relative to 'path') 1281167 images val: val # val images (relative to 'path') 50000 images test: # test images (optional) # Classes names: 0: tench 1: goldfish 2: great white shark 3: tiger shark 4: hammerhead shark 5: electric ray 6: stingray 7: cock 8: hen 9: ostrich 10: brambling 11: goldfinch 12: house finch 13: junco 14: indigo bunting 15: American robin 16: bulbul 17: jay 18: magpie 19: chickadee 20: American dipper 21: kite 22: bald eagle 23: vulture 24: great grey owl 25: fire salamander 26: smooth newt 27: newt 28: spotted salamander 29: axolotl 30: American bullfrog 31: tree frog 32: tailed frog 33: loggerhead sea turtle 34: leatherback sea turtle 35: mud turtle 36: terrapin 37: box turtle 38: banded gecko 39: green iguana 40: Carolina anole 41: desert grassland whiptail lizard 42: agama 43: frilled-necked lizard 44: alligator lizard 45: Gila monster 46: European green lizard 47: chameleon 48: Komodo dragon 49: Nile crocodile 50: American alligator 51: triceratops 52: worm snake 53: ring-necked snake 54: eastern hog-nosed snake 55: smooth green snake 56: kingsnake 57: garter snake 58: water snake 59: vine snake 60: night snake 61: boa constrictor 62: African rock python 63: Indian cobra 64: green mamba 65: sea snake 66: Saharan horned viper 67: eastern diamondback rattlesnake 68: sidewinder 69: trilobite 70: harvestman 71: scorpion 72: yellow garden spider 73: barn spider 74: European garden spider 75: southern black widow 76: tarantula 77: wolf spider 78: tick 79: centipede 80: black grouse 81: ptarmigan 82: ruffed grouse 83: prairie grouse 84: peacock 85: quail 86: partridge 87: grey parrot 88: macaw 89: sulphur-crested cockatoo 90: lorikeet 91: coucal 92: bee eater 93: hornbill 94: hummingbird 95: jacamar 96: toucan 97: duck 98: red-breasted merganser 99: goose 100: black swan 101: tusker 102: echidna 103: platypus 104: wallaby 105: koala 106: wombat 107: jellyfish 108: sea anemone 109: brain coral 110: flatworm 111: nematode 112: conch 113: snail 114: slug 115: sea slug 116: chiton 117: chambered nautilus 118: Dungeness crab 119: rock crab 120: fiddler crab 121: red king crab 122: American lobster 123: spiny lobster 124: crayfish 125: hermit crab 126: isopod 127: white stork 128: black stork 129: spoonbill 130: flamingo 131: little blue heron 132: great egret 133: bittern 134: crane (bird) 135: limpkin 136: common gallinule 137: American coot 138: bustard 139: ruddy turnstone 140: dunlin 141: common redshank 142: dowitcher 143: oystercatcher 144: pelican 145: king penguin 146: albatross 147: grey whale 148: killer whale 149: dugong 150: sea lion 151: Chihuahua 152: Japanese Chin 153: Maltese 154: Pekingese 155: Shih Tzu 156: King Charles Spaniel 157: Papillon 158: toy terrier 159: Rhodesian Ridgeback 160: Afghan Hound 161: Basset Hound 162: Beagle 163: Bloodhound 164: Bluetick Coonhound 165: Black and Tan Coonhound 166: Treeing Walker Coonhound 167: English foxhound 168: Redbone Coonhound 169: borzoi 170: Irish Wolfhound 171: Italian Greyhound 172: Whippet 173: Ibizan Hound 174: Norwegian Elkhound 175: Otterhound 176: Saluki 177: Scottish Deerhound 178: Weimaraner 179: Staffordshire Bull Terrier 180: American Staffordshire Terrier 181: Bedlington Terrier 182: Border Terrier 183: Kerry Blue Terrier 184: Irish Terrier 185: Norfolk Terrier 186: Norwich Terrier 187: Yorkshire Terrier 188: Wire Fox Terrier 189: Lakeland Terrier 190: Sealyham Terrier 191: Airedale Terrier 192: Cairn Terrier 193: Australian Terrier 194: Dandie Dinmont Terrier 195: Boston Terrier 196: Miniature Schnauzer 197: Giant Schnauzer 198: Standard Schnauzer 199: Scottish Terrier 200: Tibetan Terrier 201: Australian Silky Terrier 202: Soft-coated Wheaten Terrier 203: West Highland White Terrier 204: Lhasa Apso 205: Flat-Coated Retriever 206: Curly-coated Retriever 207: Golden Retriever 208: Labrador Retriever 209: Chesapeake Bay Retriever 210: German Shorthaired Pointer 211: Vizsla 212: English Setter 213: Irish Setter 214: Gordon Setter 215: Brittany 216: Clumber Spaniel 217: English Springer Spaniel 218: Welsh Springer Spaniel 219: Cocker Spaniels 220: Sussex Spaniel 221: Irish Water Spaniel 222: Kuvasz 223: Schipperke 224: Groenendael 225: Malinois 226: Briard 227: Australian Kelpie 228: Komondor 229: Old English Sheepdog 230: Shetland Sheepdog 231: collie 232: Border Collie 233: Bouvier des Flandres 234: Rottweiler 235: German Shepherd Dog 236: Dobermann 237: Miniature Pinscher 238: Greater Swiss Mountain Dog 239: Bernese Mountain Dog 240: Appenzeller Sennenhund 241: Entlebucher Sennenhund 242: Boxer 243: Bullmastiff 244: Tibetan Mastiff 245: French Bulldog 246: Great Dane 247: St. Bernard 248: husky 249: Alaskan Malamute 250: Siberian Husky 251: Dalmatian 252: Affenpinscher 253: Basenji 254: pug 255: Leonberger 256: Newfoundland 257: Pyrenean Mountain Dog 258: Samoyed 259: Pomeranian 260: Chow Chow 261: Keeshond 262: Griffon Bruxellois 263: Pembroke Welsh Corgi 264: Cardigan Welsh Corgi 265: Toy Poodle 266: Miniature Poodle 267: Standard Poodle 268: Mexican hairless dog 269: grey wolf 270: Alaskan tundra wolf 271: red wolf 272: coyote 273: dingo 274: dhole 275: African wild dog 276: hyena 277: red fox 278: kit fox 279: Arctic fox 280: grey fox 281: tabby cat 282: tiger cat 283: Persian cat 284: Siamese cat 285: Egyptian Mau 286: cougar 287: lynx 288: leopard 289: snow leopard 290: jaguar 291: lion 292: tiger 293: cheetah 294: brown bear 295: American black bear 296: polar bear 297: sloth bear 298: mongoose 299: meerkat 300: tiger beetle 301: ladybug 302: ground beetle 303: longhorn beetle 304: leaf beetle 305: dung beetle 306: rhinoceros beetle 307: weevil 308: fly 309: bee 310: ant 311: grasshopper 312: cricket 313: stick insect 314: cockroach 315: mantis 316: cicada 317: leafhopper 318: lacewing 319: dragonfly 320: damselfly 321: red admiral 322: ringlet 323: monarch butterfly 324: small white 325: sulphur butterfly 326: gossamer-winged butterfly 327: starfish 328: sea urchin 329: sea cucumber 330: cottontail rabbit 331: hare 332: Angora rabbit 333: hamster 334: porcupine 335: fox squirrel 336: marmot 337: beaver 338: guinea pig 339: common sorrel 340: zebra 341: pig 342: wild boar 343: warthog 344: hippopotamus 345: ox 346: water buffalo 347: bison 348: ram 349: bighorn sheep 350: Alpine ibex 351: hartebeest 352: impala 353: gazelle 354: dromedary 355: llama 356: weasel 357: mink 358: European polecat 359: black-footed ferret 360: otter 361: skunk 362: badger 363: armadillo 364: three-toed sloth 365: orangutan 366: gorilla 367: chimpanzee 368: gibbon 369: siamang 370: guenon 371: patas monkey 372: baboon 373: macaque 374: langur 375: black-and-white colobus 376: proboscis monkey 377: marmoset 378: white-headed capuchin 379: howler monkey 380: titi 381: Geoffroy's spider monkey 382: common squirrel monkey 383: ring-tailed lemur 384: indri 385: Asian elephant 386: African bush elephant 387: red panda 388: giant panda 389: snoek 390: eel 391: coho salmon 392: rock beauty 393: clownfish 394: sturgeon 395: garfish 396: lionfish 397: pufferfish 398: abacus 399: abaya 400: academic gown 401: accordion 402: acoustic guitar 403: aircraft carrier 404: airliner 405: airship 406: altar 407: ambulance 408: amphibious vehicle 409: analog clock 410: apiary 411: apron 412: waste container 413: assault rifle 414: backpack 415: bakery 416: balance beam 417: balloon 418: ballpoint pen 419: Band-Aid 420: banjo 421: baluster 422: barbell 423: barber chair 424: barbershop 425: barn 426: barometer 427: barrel 428: wheelbarrow 429: baseball 430: basketball 431: bassinet 432: bassoon 433: swimming cap 434: bath towel 435: bathtub 436: station wagon 437: lighthouse 438: beaker 439: military cap 440: beer bottle 441: beer glass 442: bell-cot 443: bib 444: tandem bicycle 445: bikini 446: ring binder 447: binoculars 448: birdhouse 449: boathouse 450: bobsleigh 451: bolo tie 452: poke bonnet 453: bookcase 454: bookstore 455: bottle cap 456: bow 457: bow tie 458: brass 459: bra 460: breakwater 461: breastplate 462: broom 463: bucket 464: buckle 465: bulletproof vest 466: high-speed train 467: butcher shop 468: taxicab 469: cauldron 470: candle 471: cannon 472: canoe 473: can opener 474: cardigan 475: car mirror 476: carousel 477: tool kit 478: carton 479: car wheel 480: automated teller machine 481: cassette 482: cassette player 483: castle 484: catamaran 485: CD player 486: cello 487: mobile phone 488: chain 489: chain-link fence 490: chain mail 491: chainsaw 492: chest 493: chiffonier 494: chime 495: china cabinet 496: Christmas stocking 497: church 498: movie theater 499: cleaver 500: cliff dwelling 501: cloak 502: clogs 503: cocktail shaker 504: coffee mug 505: coffeemaker 506: coil 507: combination lock 508: computer keyboard 509: confectionery store 510: container ship 511: convertible 512: corkscrew 513: cornet 514: cowboy boot 515: cowboy hat 516: cradle 517: crane (machine) 518: crash helmet 519: crate 520: infant bed 521: Crock Pot 522: croquet ball 523: crutch 524: cuirass 525: dam 526: desk 527: desktop computer 528: rotary dial telephone 529: diaper 530: digital clock 531: digital watch 532: dining table 533: dishcloth 534: dishwasher 535: disc brake 536: dock 537: dog sled 538: dome 539: doormat 540: drilling rig 541: drum 542: drumstick 543: dumbbell 544: Dutch oven 545: electric fan 546: electric guitar 547: electric locomotive 548: entertainment center 549: envelope 550: espresso machine 551: face powder 552: feather boa 553: filing cabinet 554: fireboat 555: fire engine 556: fire screen sheet 557: flagpole 558: flute 559: folding chair 560: football helmet 561: forklift 562: fountain 563: fountain pen 564: four-poster bed 565: freight car 566: French horn 567: frying pan 568: fur coat 569: garbage truck 570: gas mask 571: gas pump 572: goblet 573: go-kart 574: golf ball 575: golf cart 576: gondola 577: gong 578: gown 579: grand piano 580: greenhouse 581: grille 582: grocery store 583: guillotine 584: barrette 585: hair spray 586: half-track 587: hammer 588: hamper 589: hair dryer 590: hand-held computer 591: handkerchief 592: hard disk drive 593: harmonica 594: harp 595: harvester 596: hatchet 597: holster 598: home theater 599: honeycomb 600: hook 601: hoop skirt 602: horizontal bar 603: horse-drawn vehicle 604: hourglass 605: iPod 606: clothes iron 607: jack-o'-lantern 608: jeans 609: jeep 610: T-shirt 611: jigsaw puzzle 612: pulled rickshaw 613: joystick 614: kimono 615: knee pad 616: knot 617: lab coat 618: ladle 619: lampshade 620: laptop computer 621: lawn mower 622: lens cap 623: paper knife 624: library 625: lifeboat 626: lighter 627: limousine 628: ocean liner 629: lipstick 630: slip-on shoe 631: lotion 632: speaker 633: loupe 634: sawmill 635: magnetic compass 636: mail bag 637: mailbox 638: tights 639: tank suit 640: manhole cover 641: maraca 642: marimba 643: mask 644: match 645: maypole 646: maze 647: measuring cup 648: medicine chest 649: megalith 650: microphone 651: microwave oven 652: military uniform 653: milk can 654: minibus 655: miniskirt 656: minivan 657: missile 658: mitten 659: mixing bowl 660: mobile home 661: Model T 662: modem 663: monastery 664: monitor 665: moped 666: mortar 667: square academic cap 668: mosque 669: mosquito net 670: scooter 671: mountain bike 672: tent 673: computer mouse 674: mousetrap 675: moving van 676: muzzle 677: nail 678: neck brace 679: necklace 680: nipple 681: notebook computer 682: obelisk 683: oboe 684: ocarina 685: odometer 686: oil filter 687: organ 688: oscilloscope 689: overskirt 690: bullock cart 691: oxygen mask 692: packet 693: paddle 694: paddle wheel 695: padlock 696: paintbrush 697: pajamas 698: palace 699: pan flute 700: paper towel 701: parachute 702: parallel bars 703: park bench 704: parking meter 705: passenger car 706: patio 707: payphone 708: pedestal 709: pencil case 710: pencil sharpener 711: perfume 712: Petri dish 713: photocopier 714: plectrum 715: Pickelhaube 716: picket fence 717: pickup truck 718: pier 719: piggy bank 720: pill bottle 721: pillow 722: ping-pong ball 723: pinwheel 724: pirate ship 725: pitcher 726: hand plane 727: planetarium 728: plastic bag 729: plate rack 730: plow 731: plunger 732: Polaroid camera 733: pole 734: police van 735: poncho 736: billiard table 737: soda bottle 738: pot 739: potter's wheel 740: power drill 741: prayer rug 742: printer 743: prison 744: projectile 745: projector 746: hockey puck 747: punching bag 748: purse 749: quill 750: quilt 751: race car 752: racket 753: radiator 754: radio 755: radio telescope 756: rain barrel 757: recreational vehicle 758: reel 759: reflex camera 760: refrigerator 761: remote control 762: restaurant 763: revolver 764: rifle 765: rocking chair 766: rotisserie 767: eraser 768: rugby ball 769: ruler 770: running shoe 771: safe 772: safety pin 773: salt shaker 774: sandal 775: sarong 776: saxophone 777: scabbard 778: weighing scale 779: school bus 780: schooner 781: scoreboard 782: CRT screen 783: screw 784: screwdriver 785: seat belt 786: sewing machine 787: shield 788: shoe store 789: shoji 790: shopping basket 791: shopping cart 792: shovel 793: shower cap 794: shower curtain 795: ski 796: ski mask 797: sleeping bag 798: slide rule 799: sliding door 800: slot machine 801: snorkel 802: snowmobile 803: snowplow 804: soap dispenser 805: soccer ball 806: sock 807: solar thermal collector 808: sombrero 809: soup bowl 810: space bar 811: space heater 812: space shuttle 813: spatula 814: motorboat 815: spider web 816: spindle 817: sports car 818: spotlight 819: stage 820: steam locomotive 821: through arch bridge 822: steel drum 823: stethoscope 824: scarf 825: stone wall 826: stopwatch 827: stove 828: strainer 829: tram 830: stretcher 831: couch 832: stupa 833: submarine 834: suit 835: sundial 836: sunglass 837: sunglasses 838: sunscreen 839: suspension bridge 840: mop 841: sweatshirt 842: swimsuit 843: swing 844: switch 845: syringe 846: table lamp 847: tank 848: tape player 849: teapot 850: teddy bear 851: television 852: tennis ball 853: thatched roof 854: front curtain 855: thimble 856: threshing machine 857: throne 858: tile roof 859: toaster 860: tobacco shop 861: toilet seat 862: torch 863: totem pole 864: tow truck 865: toy store 866: tractor 867: semi-trailer truck 868: tray 869: trench coat 870: tricycle 871: trimaran 872: tripod 873: triumphal arch 874: trolleybus 875: trombone 876: tub 877: turnstile 878: typewriter keyboard 879: umbrella 880: unicycle 881: upright piano 882: vacuum cleaner 883: vase 884: vault 885: velvet 886: vending machine 887: vestment 888: viaduct 889: violin 890: volleyball 891: waffle iron 892: wall clock 893: wallet 894: wardrobe 895: military aircraft 896: sink 897: washing machine 898: water bottle 899: water jug 900: water tower 901: whiskey jug 902: whistle 903: wig 904: window screen 905: window shade 906: Windsor tie 907: wine bottle 908: wing 909: wok 910: wooden spoon 911: wool 912: split-rail fence 913: shipwreck 914: yawl 915: yurt 916: website 917: comic book 918: crossword 919: traffic sign 920: traffic light 921: dust jacket 922: menu 923: plate 924: guacamole 925: consomme 926: hot pot 927: trifle 928: ice cream 929: ice pop 930: baguette 931: bagel 932: pretzel 933: cheeseburger 934: hot dog 935: mashed potato 936: cabbage 937: broccoli 938: cauliflower 939: zucchini 940: spaghetti squash 941: acorn squash 942: butternut squash 943: cucumber 944: artichoke 945: bell pepper 946: cardoon 947: mushroom 948: Granny Smith 949: strawberry 950: orange 951: lemon 952: fig 953: pineapple 954: banana 955: jackfruit 956: custard apple 957: pomegranate 958: hay 959: carbonara 960: chocolate syrup 961: dough 962: meatloaf 963: pizza 964: pot pie 965: burrito 966: red wine 967: espresso 968: cup 969: eggnog 970: alp 971: bubble 972: cliff 973: coral reef 974: geyser 975: lakeshore 976: promontory 977: shoal 978: seashore 979: valley 980: volcano 981: baseball player 982: bridegroom 983: scuba diver 984: rapeseed 985: daisy 986: yellow lady's slipper 987: corn 988: acorn 989: rose hip 990: horse chestnut seed 991: coral fungus 992: agaric 993: gyromitra 994: stinkhorn mushroom 995: earth star 996: hen-of-the-woods 997: bolete 998: ear 999: toilet paper # Download script/URL (optional) download: data/scripts/get_imagenet.sh ================================================ FILE: yolo/data/datasets/Objects365.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # Objects365 dataset https://www.objects365.org/ by Megvii # Example usage: python train.py --data Objects365.yaml # parent # ├── yolov5 # └── datasets # └── Objects365 ← downloads here (712 GB = 367G data + 345G zips) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/Objects365 # dataset root dir train: images/train # train images (relative to 'path') 1742289 images val: images/val # val images (relative to 'path') 80000 images test: # test images (optional) # Classes names: 0: Person 1: Sneakers 2: Chair 3: Other Shoes 4: Hat 5: Car 6: Lamp 7: Glasses 8: Bottle 9: Desk 10: Cup 11: Street Lights 12: Cabinet/shelf 13: Handbag/Satchel 14: Bracelet 15: Plate 16: Picture/Frame 17: Helmet 18: Book 19: Gloves 20: Storage box 21: Boat 22: Leather Shoes 23: Flower 24: Bench 25: Potted Plant 26: Bowl/Basin 27: Flag 28: Pillow 29: Boots 30: Vase 31: Microphone 32: Necklace 33: Ring 34: SUV 35: Wine Glass 36: Belt 37: Monitor/TV 38: Backpack 39: Umbrella 40: Traffic Light 41: Speaker 42: Watch 43: Tie 44: Trash bin Can 45: Slippers 46: Bicycle 47: Stool 48: Barrel/bucket 49: Van 50: Couch 51: Sandals 52: Basket 53: Drum 54: Pen/Pencil 55: Bus 56: Wild Bird 57: High Heels 58: Motorcycle 59: Guitar 60: Carpet 61: Cell Phone 62: Bread 63: Camera 64: Canned 65: Truck 66: Traffic cone 67: Cymbal 68: Lifesaver 69: Towel 70: Stuffed Toy 71: Candle 72: Sailboat 73: Laptop 74: Awning 75: Bed 76: Faucet 77: Tent 78: Horse 79: Mirror 80: Power outlet 81: Sink 82: Apple 83: Air Conditioner 84: Knife 85: Hockey Stick 86: Paddle 87: Pickup Truck 88: Fork 89: Traffic Sign 90: Balloon 91: Tripod 92: Dog 93: Spoon 94: Clock 95: Pot 96: Cow 97: Cake 98: Dinning Table 99: Sheep 100: Hanger 101: Blackboard/Whiteboard 102: Napkin 103: Other Fish 104: Orange/Tangerine 105: Toiletry 106: Keyboard 107: Tomato 108: Lantern 109: Machinery Vehicle 110: Fan 111: Green Vegetables 112: Banana 113: Baseball Glove 114: Airplane 115: Mouse 116: Train 117: Pumpkin 118: Soccer 119: Skiboard 120: Luggage 121: Nightstand 122: Tea pot 123: Telephone 124: Trolley 125: Head Phone 126: Sports Car 127: Stop Sign 128: Dessert 129: Scooter 130: Stroller 131: Crane 132: Remote 133: Refrigerator 134: Oven 135: Lemon 136: Duck 137: Baseball Bat 138: Surveillance Camera 139: Cat 140: Jug 141: Broccoli 142: Piano 143: Pizza 144: Elephant 145: Skateboard 146: Surfboard 147: Gun 148: Skating and Skiing shoes 149: Gas stove 150: Donut 151: Bow Tie 152: Carrot 153: Toilet 154: Kite 155: Strawberry 156: Other Balls 157: Shovel 158: Pepper 159: Computer Box 160: Toilet Paper 161: Cleaning Products 162: Chopsticks 163: Microwave 164: Pigeon 165: Baseball 166: Cutting/chopping Board 167: Coffee Table 168: Side Table 169: Scissors 170: Marker 171: Pie 172: Ladder 173: Snowboard 174: Cookies 175: Radiator 176: Fire Hydrant 177: Basketball 178: Zebra 179: Grape 180: Giraffe 181: Potato 182: Sausage 183: Tricycle 184: Violin 185: Egg 186: Fire Extinguisher 187: Candy 188: Fire Truck 189: Billiards 190: Converter 191: Bathtub 192: Wheelchair 193: Golf Club 194: Briefcase 195: Cucumber 196: Cigar/Cigarette 197: Paint Brush 198: Pear 199: Heavy Truck 200: Hamburger 201: Extractor 202: Extension Cord 203: Tong 204: Tennis Racket 205: Folder 206: American Football 207: earphone 208: Mask 209: Kettle 210: Tennis 211: Ship 212: Swing 213: Coffee Machine 214: Slide 215: Carriage 216: Onion 217: Green beans 218: Projector 219: Frisbee 220: Washing Machine/Drying Machine 221: Chicken 222: Printer 223: Watermelon 224: Saxophone 225: Tissue 226: Toothbrush 227: Ice cream 228: Hot-air balloon 229: Cello 230: French Fries 231: Scale 232: Trophy 233: Cabbage 234: Hot dog 235: Blender 236: Peach 237: Rice 238: Wallet/Purse 239: Volleyball 240: Deer 241: Goose 242: Tape 243: Tablet 244: Cosmetics 245: Trumpet 246: Pineapple 247: Golf Ball 248: Ambulance 249: Parking meter 250: Mango 251: Key 252: Hurdle 253: Fishing Rod 254: Medal 255: Flute 256: Brush 257: Penguin 258: Megaphone 259: Corn 260: Lettuce 261: Garlic 262: Swan 263: Helicopter 264: Green Onion 265: Sandwich 266: Nuts 267: Speed Limit Sign 268: Induction Cooker 269: Broom 270: Trombone 271: Plum 272: Rickshaw 273: Goldfish 274: Kiwi fruit 275: Router/modem 276: Poker Card 277: Toaster 278: Shrimp 279: Sushi 280: Cheese 281: Notepaper 282: Cherry 283: Pliers 284: CD 285: Pasta 286: Hammer 287: Cue 288: Avocado 289: Hamimelon 290: Flask 291: Mushroom 292: Screwdriver 293: Soap 294: Recorder 295: Bear 296: Eggplant 297: Board Eraser 298: Coconut 299: Tape Measure/Ruler 300: Pig 301: Showerhead 302: Globe 303: Chips 304: Steak 305: Crosswalk Sign 306: Stapler 307: Camel 308: Formula 1 309: Pomegranate 310: Dishwasher 311: Crab 312: Hoverboard 313: Meat ball 314: Rice Cooker 315: Tuba 316: Calculator 317: Papaya 318: Antelope 319: Parrot 320: Seal 321: Butterfly 322: Dumbbell 323: Donkey 324: Lion 325: Urinal 326: Dolphin 327: Electric Drill 328: Hair Dryer 329: Egg tart 330: Jellyfish 331: Treadmill 332: Lighter 333: Grapefruit 334: Game board 335: Mop 336: Radish 337: Baozi 338: Target 339: French 340: Spring Rolls 341: Monkey 342: Rabbit 343: Pencil Case 344: Yak 345: Red Cabbage 346: Binoculars 347: Asparagus 348: Barbell 349: Scallop 350: Noddles 351: Comb 352: Dumpling 353: Oyster 354: Table Tennis paddle 355: Cosmetics Brush/Eyeliner Pencil 356: Chainsaw 357: Eraser 358: Lobster 359: Durian 360: Okra 361: Lipstick 362: Cosmetics Mirror 363: Curling 364: Table Tennis # Download script/URL (optional) --------------------------------------------------------------------------------------- download: | from tqdm import tqdm from utils.general import Path, check_requirements, download, np, xyxy2xywhn check_requirements(('pycocotools>=2.0',)) from pycocotools.coco import COCO # Make Directories dir = Path(yaml['path']) # dataset root dir for p in 'images', 'labels': (dir / p).mkdir(parents=True, exist_ok=True) for q in 'train', 'val': (dir / p / q).mkdir(parents=True, exist_ok=True) # Train, Val Splits for split, patches in [('train', 50 + 1), ('val', 43 + 1)]: print(f"Processing {split} in {patches} patches ...") images, labels = dir / 'images' / split, dir / 'labels' / split # Download url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/" if split == 'train': download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir, delete=False) # annotations json download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, delete=False, threads=8) elif split == 'val': download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir, delete=False) # annotations json download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, delete=False, threads=8) download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, delete=False, threads=8) # Move for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'): f.rename(images / f.name) # move to /images/{split} # Labels coco = COCO(dir / f'zhiyuan_objv2_{split}.json') names = [x["name"] for x in coco.loadCats(coco.getCatIds())] for cid, cat in enumerate(names): catIds = coco.getCatIds(catNms=[cat]) imgIds = coco.getImgIds(catIds=catIds) for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'): width, height = im["width"], im["height"] path = Path(im["file_name"]) # image filename try: with open(labels / path.with_suffix('.txt').name, 'a') as file: annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None) for a in coco.loadAnns(annIds): x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) xyxy = np.array([x, y, x + w, y + h])[None] # pixels(1,4) x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0] # normalized and clipped file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n") except Exception as e: print(e) ================================================ FILE: yolo/data/datasets/SKU-110K.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail # Example usage: python train.py --data SKU-110K.yaml # parent # ├── yolov5 # └── datasets # └── SKU-110K ← downloads here (13.6 GB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/SKU-110K # dataset root dir train: train.txt # train images (relative to 'path') 8219 images val: val.txt # val images (relative to 'path') 588 images test: test.txt # test images (optional) 2936 images # Classes names: 0: object # Download script/URL (optional) --------------------------------------------------------------------------------------- download: | import shutil from tqdm import tqdm from utils.general import np, pd, Path, download, xyxy2xywh # Download dir = Path(yaml['path']) # dataset root dir parent = Path(dir.parent) # download dir urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz'] download(urls, dir=parent, delete=False) # Rename directories if dir.exists(): shutil.rmtree(dir) (parent / 'SKU110K_fixed').rename(dir) # rename dir (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir # Convert labels names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv': x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations images, unique_images = x[:, 0], np.unique(x[:, 0]) with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f: f.writelines(f'./images/{s}\n' for s in unique_images) for im in tqdm(unique_images, desc=f'Converting {dir / d}'): cls = 0 # single-class dataset with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f: for r in x[images == im]: w, h = r[6], r[7] # image width, height xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label ================================================ FILE: yolo/data/datasets/VOC.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford # Example usage: python train.py --data VOC.yaml # parent # ├── yolov5 # └── datasets # └── VOC ← downloads here (2.8 GB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/VOC train: # train images (relative to 'path') 16551 images - images/train2012 - images/train2007 - images/val2012 - images/val2007 val: # val images (relative to 'path') 4952 images - images/test2007 test: # test images (optional) - images/test2007 # Classes names: 0: aeroplane 1: bicycle 2: bird 3: boat 4: bottle 5: bus 6: car 7: cat 8: chair 9: cow 10: diningtable 11: dog 12: horse 13: motorbike 14: person 15: pottedplant 16: sheep 17: sofa 18: train 19: tvmonitor # Download script/URL (optional) --------------------------------------------------------------------------------------- download: | import xml.etree.ElementTree as ET from tqdm import tqdm from utils.general import download, Path def convert_label(path, lb_path, year, image_id): def convert_box(size, box): dw, dh = 1. / size[0], 1. / size[1] x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2] return x * dw, y * dh, w * dw, h * dh in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml') out_file = open(lb_path, 'w') tree = ET.parse(in_file) root = tree.getroot() size = root.find('size') w = int(size.find('width').text) h = int(size.find('height').text) names = list(yaml['names'].values()) # names list for obj in root.iter('object'): cls = obj.find('name').text if cls in names and int(obj.find('difficult').text) != 1: xmlbox = obj.find('bndbox') bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')]) cls_id = names.index(cls) # class id out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n') # Download dir = Path(yaml['path']) # dataset root dir url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images download(urls, dir=dir / 'images', delete=False, curl=True, threads=3) # Convert path = dir / 'images/VOCdevkit' for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'): imgs_path = dir / 'images' / f'{image_set}{year}' lbs_path = dir / 'labels' / f'{image_set}{year}' imgs_path.mkdir(exist_ok=True, parents=True) lbs_path.mkdir(exist_ok=True, parents=True) with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f: image_ids = f.read().strip().split() for id in tqdm(image_ids, desc=f'{image_set}{year}'): f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path f.rename(imgs_path / f.name) # move image convert_label(path, lb_path, year, id) # convert labels to YOLO format ================================================ FILE: yolo/data/datasets/VisDrone.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University # Example usage: python train.py --data VisDrone.yaml # parent # ├── yolov5 # └── datasets # └── VisDrone ← downloads here (2.3 GB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/VisDrone # dataset root dir train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images # Classes names: 0: pedestrian 1: people 2: bicycle 3: car 4: van 5: truck 6: tricycle 7: awning-tricycle 8: bus 9: motor # Download script/URL (optional) --------------------------------------------------------------------------------------- download: | from utils.general import download, os, Path def visdrone2yolo(dir): from PIL import Image from tqdm import tqdm def convert_box(size, box): # Convert VisDrone box to YOLO xywh box dw = 1. / size[0] dh = 1. / size[1] return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh (dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}') for f in pbar: img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size lines = [] with open(f, 'r') as file: # read annotation.txt for row in [x.split(',') for x in file.read().strip().splitlines()]: if row[4] == '0': # VisDrone 'ignored regions' class 0 continue cls = int(row[5]) - 1 box = convert_box(img_size, tuple(map(int, row[:4]))) lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n") with open(str(f).replace(os.sep + 'annotations' + os.sep, os.sep + 'labels' + os.sep), 'w') as fl: fl.writelines(lines) # write label.txt # Download dir = Path(yaml['path']) # dataset root dir urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip', 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip'] download(urls, dir=dir, curl=True, threads=4) # Convert for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev': visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels ================================================ FILE: yolo/data/datasets/coco.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # COCO 2017 dataset http://cocodataset.org by Microsoft # Example usage: python train.py --data coco.yaml # parent # ├── yolov5 # └── datasets # └── coco ← downloads here (20.1 GB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/coco # dataset root dir train: train2017.txt # train images (relative to 'path') 118287 images val: val2017.txt # val images (relative to 'path') 5000 images test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 # Classes names: 0: person 1: bicycle 2: car 3: motorcycle 4: airplane 5: bus 6: train 7: truck 8: boat 9: traffic light 10: fire hydrant 11: stop sign 12: parking meter 13: bench 14: bird 15: cat 16: dog 17: horse 18: sheep 19: cow 20: elephant 21: bear 22: zebra 23: giraffe 24: backpack 25: umbrella 26: handbag 27: tie 28: suitcase 29: frisbee 30: skis 31: snowboard 32: sports ball 33: kite 34: baseball bat 35: baseball glove 36: skateboard 37: surfboard 38: tennis racket 39: bottle 40: wine glass 41: cup 42: fork 43: knife 44: spoon 45: bowl 46: banana 47: apple 48: sandwich 49: orange 50: broccoli 51: carrot 52: hot dog 53: pizza 54: donut 55: cake 56: chair 57: couch 58: potted plant 59: bed 60: dining table 61: toilet 62: tv 63: laptop 64: mouse 65: remote 66: keyboard 67: cell phone 68: microwave 69: oven 70: toaster 71: sink 72: refrigerator 73: book 74: clock 75: vase 76: scissors 77: teddy bear 78: hair drier 79: toothbrush # Download script/URL (optional) download: | from utils.general import download, Path # Download labels segments = True # segment or box labels dir = Path(yaml['path']) # dataset root dir url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels download(urls, dir=dir.parent) # Download data urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) download(urls, dir=dir / 'images', threads=3) ================================================ FILE: yolo/data/datasets/coco128-seg.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # Example usage: python train.py --data coco128.yaml # parent # ├── yolov5 # └── datasets # └── coco128-seg ← downloads here (7 MB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/coco128-seg # dataset root dir train: images/train2017 # train images (relative to 'path') 128 images val: images/train2017 # val images (relative to 'path') 128 images test: # test images (optional) # Classes names: 0: person 1: bicycle 2: car 3: motorcycle 4: airplane 5: bus 6: train 7: truck 8: boat 9: traffic light 10: fire hydrant 11: stop sign 12: parking meter 13: bench 14: bird 15: cat 16: dog 17: horse 18: sheep 19: cow 20: elephant 21: bear 22: zebra 23: giraffe 24: backpack 25: umbrella 26: handbag 27: tie 28: suitcase 29: frisbee 30: skis 31: snowboard 32: sports ball 33: kite 34: baseball bat 35: baseball glove 36: skateboard 37: surfboard 38: tennis racket 39: bottle 40: wine glass 41: cup 42: fork 43: knife 44: spoon 45: bowl 46: banana 47: apple 48: sandwich 49: orange 50: broccoli 51: carrot 52: hot dog 53: pizza 54: donut 55: cake 56: chair 57: couch 58: potted plant 59: bed 60: dining table 61: toilet 62: tv 63: laptop 64: mouse 65: remote 66: keyboard 67: cell phone 68: microwave 69: oven 70: toaster 71: sink 72: refrigerator 73: book 74: clock 75: vase 76: scissors 77: teddy bear 78: hair drier 79: toothbrush # Download script/URL (optional) download: https://ultralytics.com/assets/coco128-seg.zip ================================================ FILE: yolo/data/datasets/coco128.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics # Example usage: python train.py --data coco128.yaml # parent # ├── yolov5 # └── datasets # └── coco128 ← downloads here (7 MB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/coco128 # dataset root dir train: images/train2017 # train images (relative to 'path') 128 images val: images/train2017 # val images (relative to 'path') 128 images test: # test images (optional) # Classes names: 0: person 1: bicycle 2: car 3: motorcycle 4: airplane 5: bus 6: train 7: truck 8: boat 9: traffic light 10: fire hydrant 11: stop sign 12: parking meter 13: bench 14: bird 15: cat 16: dog 17: horse 18: sheep 19: cow 20: elephant 21: bear 22: zebra 23: giraffe 24: backpack 25: umbrella 26: handbag 27: tie 28: suitcase 29: frisbee 30: skis 31: snowboard 32: sports ball 33: kite 34: baseball bat 35: baseball glove 36: skateboard 37: surfboard 38: tennis racket 39: bottle 40: wine glass 41: cup 42: fork 43: knife 44: spoon 45: bowl 46: banana 47: apple 48: sandwich 49: orange 50: broccoli 51: carrot 52: hot dog 53: pizza 54: donut 55: cake 56: chair 57: couch 58: potted plant 59: bed 60: dining table 61: toilet 62: tv 63: laptop 64: mouse 65: remote 66: keyboard 67: cell phone 68: microwave 69: oven 70: toaster 71: sink 72: refrigerator 73: book 74: clock 75: vase 76: scissors 77: teddy bear 78: hair drier 79: toothbrush # Download script/URL (optional) download: https://ultralytics.com/assets/coco128.zip ================================================ FILE: yolo/data/datasets/xView.yaml ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license # DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA) # -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! -------- # Example usage: python train.py --data xView.yaml # parent # ├── yolov5 # └── datasets # └── xView ← downloads here (20.7 GB) # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] path: ../datasets/xView # dataset root dir train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images # Classes names: 0: Fixed-wing Aircraft 1: Small Aircraft 2: Cargo Plane 3: Helicopter 4: Passenger Vehicle 5: Small Car 6: Bus 7: Pickup Truck 8: Utility Truck 9: Truck 10: Cargo Truck 11: Truck w/Box 12: Truck Tractor 13: Trailer 14: Truck w/Flatbed 15: Truck w/Liquid 16: Crane Truck 17: Railway Vehicle 18: Passenger Car 19: Cargo Car 20: Flat Car 21: Tank car 22: Locomotive 23: Maritime Vessel 24: Motorboat 25: Sailboat 26: Tugboat 27: Barge 28: Fishing Vessel 29: Ferry 30: Yacht 31: Container Ship 32: Oil Tanker 33: Engineering Vehicle 34: Tower crane 35: Container Crane 36: Reach Stacker 37: Straddle Carrier 38: Mobile Crane 39: Dump Truck 40: Haul Truck 41: Scraper/Tractor 42: Front loader/Bulldozer 43: Excavator 44: Cement Mixer 45: Ground Grader 46: Hut/Tent 47: Shed 48: Building 49: Aircraft Hangar 50: Damaged Building 51: Facility 52: Construction Site 53: Vehicle Lot 54: Helipad 55: Storage Tank 56: Shipping container lot 57: Shipping Container 58: Pylon 59: Tower # Download script/URL (optional) --------------------------------------------------------------------------------------- download: | import json import os from pathlib import Path import numpy as np from PIL import Image from tqdm import tqdm from utils.dataloaders import autosplit from utils.general import download, xyxy2xywhn def convert_labels(fname=Path('xView/xView_train.geojson')): # Convert xView geoJSON labels to YOLO format path = fname.parent with open(fname) as f: print(f'Loading {fname}...') data = json.load(f) # Make dirs labels = Path(path / 'labels' / 'train') os.system(f'rm -rf {labels}') labels.mkdir(parents=True, exist_ok=True) # xView classes 11-94 to 0-59 xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11, 12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1, 29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46, 47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59] shapes = {} for feature in tqdm(data['features'], desc=f'Converting {fname}'): p = feature['properties'] if p['bounds_imcoords']: id = p['image_id'] file = path / 'train_images' / id if file.exists(): # 1395.tif missing try: box = np.array([int(num) for num in p['bounds_imcoords'].split(",")]) assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}' cls = p['type_id'] cls = xview_class2index[int(cls)] # xView class to 0-60 assert 59 >= cls >= 0, f'incorrect class index {cls}' # Write YOLO label if id not in shapes: shapes[id] = Image.open(file).size box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True) with open((labels / id).with_suffix('.txt'), 'a') as f: f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt except Exception as e: print(f'WARNING: skipping one label for {file}: {e}') # Download manually from https://challenge.xviewdataset.org dir = Path(yaml['path']) # dataset root dir # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels # 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images # 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels) # download(urls, dir=dir, delete=False) # Convert labels convert_labels(dir / 'xView_train.geojson') # Move images images = Path(dir / 'images') images.mkdir(parents=True, exist_ok=True) Path(dir / 'train_images').rename(dir / 'images' / 'train') Path(dir / 'val_images').rename(dir / 'images' / 'val') # Split autosplit(dir / 'images' / 'train') ================================================ FILE: yolo/data/scripts/download_weights.sh ================================================ #!/bin/bash # Ultralytics YOLO 🚀, GPL-3.0 license # Download latest models from https://github.com/ultralytics/yolov5/releases # Example usage: bash data/scripts/download_weights.sh # parent # └── yolov5 # ├── yolov5s.pt ← downloads here # ├── yolov5m.pt # └── ... python - < 9) & (shape[1] > 9), f"image size {shape} <10 pixels" assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}" if im.format.lower() in ("jpg", "jpeg"): with open(im_file, "rb") as f: f.seek(-2, 2) if f.read() != b"\xff\xd9": # corrupt JPEG ImageOps.exif_transpose(Image.open(im_file)).save(im_file, "JPEG", subsampling=0, quality=100) msg = f"{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved" # verify labels if os.path.isfile(lb_file): nf = 1 # label found with open(lb_file) as f: lb = [x.split() for x in f.read().strip().splitlines() if len(x)] if any(len(x) > 6 for x in lb) and (not keypoint): # is segment classes = np.array([x[0] for x in lb], dtype=np.float32) segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) lb = np.array(lb, dtype=np.float32) nl = len(lb) if nl: if keypoint: assert lb.shape[1] == 56, "labels require 56 columns each" assert (lb[:, 5::3] <= 1).all(), "non-normalized or out of bounds coordinate labels" assert (lb[:, 6::3] <= 1).all(), "non-normalized or out of bounds coordinate labels" kpts = np.zeros((lb.shape[0], 39)) for i in range(len(lb)): kpt = np.delete(lb[i, 5:], np.arange(2, lb.shape[1] - 5, 3)) # remove the occlusion parameter from the GT kpts[i] = np.hstack((lb[i, :5], kpt)) lb = kpts assert lb.shape[1] == 39, "labels require 39 columns each after removing occlusion parameter" else: assert lb.shape[1] == 5, f"labels require 5 columns, {lb.shape[1]} columns detected" assert (lb >= 0).all(), f"negative label values {lb[lb < 0]}" assert (lb[:, 1:] <= 1).all(), f"non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}" _, i = np.unique(lb, axis=0, return_index=True) if len(i) < nl: # duplicate row check lb = lb[i] # remove duplicates if segments: segments = [segments[x] for x in i] msg = f"{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed" else: ne = 1 # label empty lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) else: nm = 1 # label missing lb = np.zeros((0, 39), dtype=np.float32) if keypoint else np.zeros((0, 5), dtype=np.float32) if keypoint: keypoints = lb[:, 5:].reshape(-1, 17, 2) lb = lb[:, :5] return im_file, lb, shape, segments, keypoints, nm, nf, ne, nc, msg except Exception as e: nc = 1 msg = f"{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}" return [None, None, None, None, None, nm, nf, ne, nc, msg] def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1): """ Args: imgsz (tuple): The image size. polygons (np.ndarray): [N, M], N is the number of polygons, M is the number of points(Be divided by 2). color (int): color downsample_ratio (int): downsample ratio """ mask = np.zeros(imgsz, dtype=np.uint8) polygons = np.asarray(polygons) polygons = polygons.astype(np.int32) shape = polygons.shape polygons = polygons.reshape(shape[0], -1, 2) cv2.fillPoly(mask, polygons, color=color) nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio) # NOTE: fillPoly firstly then resize is trying the keep the same way # of loss calculation when mask-ratio=1. mask = cv2.resize(mask, (nw, nh)) return mask def polygons2masks(imgsz, polygons, color, downsample_ratio=1): """ Args: imgsz (tuple): The image size. polygons (list[np.ndarray]): each polygon is [N, M], N is number of polygons, M is number of points (M % 2 = 0) color (int): color downsample_ratio (int): downsample ratio """ masks = [] for si in range(len(polygons)): mask = polygon2mask(imgsz, [polygons[si].reshape(-1)], color, downsample_ratio) masks.append(mask) return np.array(masks) def polygons2masks_overlap(imgsz, segments, downsample_ratio=1): """Return a (640, 640) overlap mask.""" masks = np.zeros((imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio), dtype=np.int32 if len(segments) > 255 else np.uint8) areas = [] ms = [] for si in range(len(segments)): mask = polygon2mask( imgsz, [segments[si].reshape(-1)], downsample_ratio=downsample_ratio, color=1, ) ms.append(mask) areas.append(mask.sum()) areas = np.asarray(areas) index = np.argsort(-areas) ms = np.array(ms)[index] for i in range(len(segments)): mask = ms[i] * (i + 1) masks = masks + mask masks = np.clip(masks, a_min=0, a_max=i + 1) return masks, index def check_dataset_yaml(data, autodownload=True): # Download, check and/or unzip dataset if not found locally data = check_file(data) DATASETS_DIR = (Path.cwd() / "../datasets").resolve() # TODO: handle global dataset dir # Download (optional) extract_dir = '' if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)): download(data, dir=f'{DATASETS_DIR}/{Path(data).stem}', unzip=True, delete=False, curl=False, threads=1) data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml')) extract_dir, autodownload = data.parent, False # Read yaml (optional) if isinstance(data, (str, Path)): data = yaml_load(data, append_filename=True) # dictionary # Checks for k in 'train', 'val', 'names': assert k in data, f"data.yaml '{k}:' field missing ❌" if isinstance(data['names'], (list, tuple)): # old array format data['names'] = dict(enumerate(data['names'])) # convert to dict data['nc'] = len(data['names']) # Resolve paths path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.' if not path.is_absolute(): path = (Path.cwd() / path).resolve() data['path'] = path # download scripts for k in 'train', 'val', 'test': if data.get(k): # prepend path if isinstance(data[k], str): x = (path / data[k]).resolve() if not x.exists() and data[k].startswith('../'): x = (path / data[k][3:]).resolve() data[k] = str(x) else: data[k] = [str((path / x).resolve()) for x in data[k]] # Parse yaml train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) if val: val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path if not all(x.exists() for x in val): LOGGER.info('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]) if not s or not autodownload: raise FileNotFoundError('Dataset not found ❌') t = time.time() if s.startswith('http') and s.endswith('.zip'): # URL f = Path(s).name # filename LOGGER.info(f'Downloading {s} to {f}...') torch.hub.download_url_to_file(s, f) Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root unzip_file(f, path=DATASETS_DIR) # unzip Path(f).unlink() # remove zip r = None # success elif s.startswith('bash '): # bash script LOGGER.info(f'Running {s} ...') r = os.system(s) else: # python script r = exec(s, {'yaml': data}) # return None dt = f'({round(time.time() - t, 1)}s)' s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌" LOGGER.info(f"Dataset download {s}") check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts return data # dictionary def check_dataset(dataset: str): """ Check a classification dataset such as Imagenet. Copy code This function takes a `dataset` name as input and returns a dictionary containing information about the dataset. If the dataset is not found, it attempts to download the dataset from the internet and save it to the local file system. Args: dataset (str): Name of the dataset. Returns: data (dict): A dictionary containing the following keys and values: 'train': Path object for the directory containing the training set of the dataset 'val': Path object for the directory containing the validation set of the dataset 'nc': Number of classes in the dataset 'names': List of class names in the dataset """ data_dir = (Path.cwd() / "datasets" / dataset).resolve() if not data_dir.is_dir(): LOGGER.info(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...') t = time.time() if dataset == 'imagenet': subprocess.run(f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", shell=True, check=True) else: url = f'https://github.com/ultralytics/yolov5/releases/download/v1.0/{dataset}.zip' download(url, dir=data_dir.parent) s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n" LOGGER.info(s) train_set = data_dir / "train" test_set = data_dir / 'test' if (data_dir / 'test').exists() else data_dir / 'val' # data/test or data/val nc = len([x for x in (data_dir / 'train').glob('*') if x.is_dir()]) # number of classes names = [x.name for x in (data_dir / 'train').iterdir() if x.is_dir()] # class names list names = dict(enumerate(sorted(names))) return {"train": train_set, "val": test_set, "nc": nc, "names": names} ================================================ FILE: yolo/engine/__init__.py ================================================ ================================================ FILE: yolo/engine/exporter.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit Format | `format=argument` | Model --- | --- | --- PyTorch | - | yolov8n.pt TorchScript | `torchscript` | yolov8n.torchscript ONNX | `onnx` | yolov8n.onnx OpenVINO | `openvino` | yolov8n_openvino_model/ TensorRT | `engine` | yolov8n.engine CoreML | `coreml` | yolov8n.mlmodel TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/ TensorFlow GraphDef | `pb` | yolov8n.pb TensorFlow Lite | `tflite` | yolov8n.tflite TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite TensorFlow.js | `tfjs` | yolov8n_web_model/ PaddlePaddle | `paddle` | yolov8n_paddle_model/ Requirements: $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPU Python: from ultralytics import YOLO model = YOLO('yolov8n.yaml') results = model.export(format='onnx') CLI: $ yolo mode=export model=yolov8n.pt format=onnx Inference: $ python detect.py --weights yolov8n.pt # PyTorch yolov8n.torchscript # TorchScript yolov8n.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov8n_openvino_model # OpenVINO yolov8n.engine # TensorRT yolov8n.mlmodel # CoreML (macOS-only) yolov8n_saved_model # TensorFlow SavedModel yolov8n.pb # TensorFlow GraphDef yolov8n.tflite # TensorFlow Lite yolov8n_edgetpu.tflite # TensorFlow Edge TPU yolov8n_paddle_model # PaddlePaddle TensorFlow.js: $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example $ npm install $ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model $ npm start """ import contextlib import json import os import platform import re import subprocess import time import warnings from collections import defaultdict from copy import deepcopy from pathlib import Path import hydra import numpy as np import pandas as pd import torch import ultralytics from nn.modules import Detect, Segment from nn.tasks import ClassificationModel, DetectionModel, SegmentationModel from yolo.configs import get_config from yolo.data.dataloaders.stream_loaders import LoadImages from yolo.data.utils import check_dataset from yolo.utils import DEFAULT_CONFIG, LOGGER, callbacks, colorstr, get_default_args, yaml_save from yolo.utils.checks import check_imgsz, check_requirements, check_version, check_yaml from yolo.utils.files import file_size from yolo.utils.ops import Profile from yolo.utils.torch_utils import guess_task_from_head, select_device, smart_inference_mode MACOS = platform.system() == 'Darwin' # macOS environment def export_formats(): # YOLOv5 export formats x = [ ['PyTorch', '-', '.pt', True, True], ['TorchScript', 'torchscript', '.torchscript', True, True], ['ONNX', 'onnx', '.onnx', True, True], ['OpenVINO', 'openvino', '_openvino_model', True, False], ['TensorRT', 'engine', '.engine', False, True], ['CoreML', 'coreml', '.mlmodel', True, False], ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True], ['TensorFlow GraphDef', 'pb', '.pb', True, True], ['TensorFlow Lite', 'tflite', '.tflite', True, False], ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False], ['TensorFlow.js', 'tfjs', '_web_model', False, False], ['PaddlePaddle', 'paddle', '_paddle_model', True, True],] return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU']) def try_export(inner_func): # YOLOv5 export decorator, i..e @try_export inner_args = get_default_args(inner_func) def outer_func(*args, **kwargs): prefix = inner_args['prefix'] try: with Profile() as dt: f, model = inner_func(*args, **kwargs) LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)') return f, model except Exception as e: LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}') return None, None return outer_func class Exporter: """ Exporter A class for exporting a model. Attributes: args (OmegaConf): Configuration for the exporter. save_dir (Path): Directory to save results. """ def __init__(self, config=DEFAULT_CONFIG, overrides=None): """ Initializes the Exporter class. Args: config (str, optional): Path to a configuration file. Defaults to DEFAULT_CONFIG. overrides (dict, optional): Configuration overrides. Defaults to None. """ if overrides is None: overrides = {} self.args = get_config(config, overrides) self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()}) # add callbacks callbacks.add_integration_callbacks(self) @smart_inference_mode() def __call__(self, model=None): self.run_callbacks("on_export_start") t = time.time() format = self.args.format.lower() # to lowercase fmts = tuple(export_formats()['Argument'][1:]) # available export formats flags = [x == format for x in fmts] assert sum(flags), f'ERROR: Invalid format={format}, valid formats are {fmts}' jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle = flags # export booleans # Load PyTorch model self.device = select_device('cpu' if self.args.device is None else self.args.device) if self.args.half: if self.device.type == 'cpu' and not coreml: LOGGER.info('half=True only compatible with GPU or CoreML export, i.e. use device=0 or format=coreml') self.args.half = False assert not self.args.dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic' # Checks # if self.args.batch == model.args['batch_size']: # user has not modified training batch_size self.args.batch = 1 self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size if self.args.optimize: assert self.device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu' # Input im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device) file = Path(getattr(model, 'pt_path', None) or getattr(model, 'yaml_file', None) or model.yaml['yaml_file']) if file.suffix == '.yaml': file = Path(file.name) # Update model model = deepcopy(model).to(self.device) for p in model.parameters(): p.requires_grad = False model.eval() model = model.fuse() for k, m in model.named_modules(): if isinstance(m, (Detect, Segment)): m.dynamic = self.args.dynamic m.export = True y = None for _ in range(2): y = model(im) # dry runs if self.args.half and not coreml: im, model = im.half(), model.half() # to FP16 shape = tuple((y[0] if isinstance(y, tuple) else y).shape) # model output shape LOGGER.info( f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)") # Warnings warnings.filterwarnings('ignore', category=torch.jit.TracerWarning) # suppress TracerWarning warnings.filterwarnings('ignore', category=UserWarning) # suppress shape prim::Constant missing ONNX warning warnings.filterwarnings('ignore', category=DeprecationWarning) # suppress CoreML np.bool deprecation warning # Assign self.im = im self.model = model self.file = file self.output_shape = tuple(y.shape) if isinstance(y, torch.Tensor) else (x.shape for x in y) self.metadata = {'stride': int(max(model.stride)), 'names': model.names} # model metadata self.pretty_name = self.file.stem.replace('yolo', 'YOLO') # Exports f = [''] * len(fmts) # exported filenames if jit: # TorchScript f[0], _ = self._export_torchscript() if engine: # TensorRT required before ONNX f[1], _ = self._export_engine() if onnx or xml: # OpenVINO requires ONNX f[2], _ = self._export_onnx() if xml: # OpenVINO f[3], _ = self._export_openvino() if coreml: # CoreML f[4], _ = self._export_coreml() if any((saved_model, pb, tflite, edgetpu, tfjs)): # TensorFlow formats raise NotImplementedError('YOLOv8 TensorFlow export support is still under development. ' 'Please consider contributing to the effort if you have TF expertise. Thank you!') assert not isinstance(model, ClassificationModel), 'ClassificationModel TF exports not yet supported.' nms = False f[5], s_model = self._export_saved_model(nms=nms or self.args.agnostic_nms or tfjs, agnostic_nms=self.args.agnostic_nms or tfjs) if pb or tfjs: # pb prerequisite to tfjs f[6], _ = self._export_pb(s_model) if tflite or edgetpu: f[7], _ = self._export_tflite(s_model, int8=self.args.int8 or edgetpu, data=self.args.data, nms=nms, agnostic_nms=self.args.agnostic_nms) if edgetpu: f[8], _ = self._export_edgetpu() self._add_tflite_metadata(f[8] or f[7], num_outputs=len(s_model.outputs)) if tfjs: f[9], _ = self._export_tfjs() if paddle: # PaddlePaddle f[10], _ = self._export_paddle() # Finish f = [str(x) for x in f if x] # filter out '' and None if any(f): task = guess_task_from_head(model.yaml["head"][-1][-2]) s = "-WARNING ⚠️ not yet supported for YOLOv8 exported models" LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)' f"\nResults saved to {colorstr('bold', file.parent.resolve())}" f"\nPredict: yolo task={task} mode=predict model={f[-1]} {s}" f"\nValidate: yolo task={task} mode=val model={f[-1]} {s}" f"\nVisualize: https://netron.app") self.run_callbacks("on_export_end") return f # return list of exported files/dirs @try_export def _export_torchscript(self, prefix=colorstr('TorchScript:')): # YOLOv8 TorchScript model export LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...') f = self.file.with_suffix('.torchscript') ts = torch.jit.trace(self.model, self.im, strict=False) d = {"shape": self.im.shape, "stride": int(max(self.model.stride)), "names": self.model.names} extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap() if self.args.optimize: # https://pytorch.org/tutorials/recipes/mobile_interpreter.html LOGGER.info(f'{prefix} optimizing for mobile...') from torch.utils.mobile_optimizer import optimize_for_mobile optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files) else: ts.save(str(f), _extra_files=extra_files) return f, None @try_export def _export_onnx(self, prefix=colorstr('ONNX:')): # YOLOv8 ONNX export check_requirements('onnx>=1.12.0') import onnx # noqa LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...') f = str(self.file.with_suffix('.onnx')) output_names = ['output0', 'output1'] if isinstance(self.model, SegmentationModel) else ['output0'] dynamic = self.args.dynamic if dynamic: dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}} # shape(1,3,640,640) if isinstance(self.model, SegmentationModel): dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85) dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'} # shape(1,32,160,160) elif isinstance(self.model, DetectionModel): dynamic['output0'] = {0: 'batch', 1: 'anchors'} # shape(1,25200,85) torch.onnx.export( self.model.cpu() if dynamic else self.model, # --dynamic only compatible with cpu self.im.cpu() if dynamic else self.im, f, verbose=False, opset_version=self.args.opset, do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False input_names=['images'], output_names=output_names, dynamic_axes=dynamic or None) # Checks model_onnx = onnx.load(f) # load onnx model onnx.checker.check_model(model_onnx) # check onnx model # Metadata d = {'stride': int(max(self.model.stride)), 'names': self.model.names} for k, v in d.items(): meta = model_onnx.metadata_props.add() meta.key, meta.value = k, str(v) onnx.save(model_onnx, f) # Simplify if self.args.simplify: try: check_requirements('onnxsim') import onnxsim LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...') subprocess.run(f'onnxsim {f} {f}', shell=True) except Exception as e: LOGGER.info(f'{prefix} simplifier failure: {e}') return f, model_onnx @try_export def _export_openvino(self, prefix=colorstr('OpenVINO:')): # YOLOv8 OpenVINO export check_requirements('openvino-dev') # requires openvino-dev: https://pypi.org/project/openvino-dev/ import openvino.inference_engine as ie # noqa LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...') f = str(self.file).replace(self.file.suffix, f'_openvino_model{os.sep}') f_onnx = self.file.with_suffix('.onnx') cmd = f"mo --input_model {f_onnx} --output_dir {f} --data_type {'FP16' if self.args.half else 'FP32'}" subprocess.run(cmd.split(), check=True, env=os.environ) # export yaml_save(Path(f) / self.file.with_suffix('.yaml').name, self.metadata) # add metadata.yaml return f, None @try_export def _export_paddle(self, prefix=colorstr('PaddlePaddle:')): # YOLOv8 Paddle export check_requirements(('paddlepaddle', 'x2paddle')) import x2paddle # noqa from x2paddle.convert import pytorch2paddle # noqa LOGGER.info(f'\n{prefix} starting export with X2Paddle {x2paddle.__version__}...') f = str(self.file).replace(self.file.suffix, f'_paddle_model{os.sep}') pytorch2paddle(module=self.model, save_dir=f, jit_type='trace', input_examples=[self.im]) # export yaml_save(Path(f) / self.file.with_suffix('.yaml').name, self.metadata) # add metadata.yaml return f, None @try_export def _export_coreml(self, prefix=colorstr('CoreML:')): # YOLOv8 CoreML export check_requirements('coremltools>=6.0') import coremltools as ct # noqa class iOSModel(torch.nn.Module): # Wrap an Ultralytics YOLO model for iOS export def __init__(self, model, im): super().__init__() b, c, h, w = im.shape # batch, channel, height, width self.model = model self.nc = len(model.names) # number of classes if w == h: self.normalize = 1.0 / w # scalar else: self.normalize = torch.tensor([1.0 / w, 1.0 / h, 1.0 / w, 1.0 / h]) # broadcast (slower, smaller) def forward(self, x): xywh, cls = self.model(x)[0].transpose(0, 1).split((4, self.nc), 1) return cls, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4) LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...') f = self.file.with_suffix('.mlmodel') model = iOSModel(self.model, self.im) if self.args.nms else self.model ts = torch.jit.trace(model, self.im, strict=False) # TorchScript model ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=self.im.shape, scale=1 / 255, bias=[0, 0, 0])]) bits, mode = (8, 'kmeans_lut') if self.args.int8 else (16, 'linear') if self.args.half else (32, None) if bits < 32: if MACOS: # quantization only supported on macOS ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode) else: LOGGER.info(f'{prefix} quantization only supported on macOS, skipping...') if self.args.nms: ct_model = self._pipeline_coreml(ct_model) ct_model.save(str(f)) return f, ct_model @try_export def _export_engine(self, workspace=4, verbose=False, prefix=colorstr('TensorRT:')): # YOLOv8 TensorRT export https://developer.nvidia.com/tensorrt assert self.im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `device==0`' try: import tensorrt as trt # noqa except ImportError: if platform.system() == 'Linux': check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com') import tensorrt as trt # noqa check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=8.0.0 self._export_onnx() onnx = self.file.with_suffix('.onnx') LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...') assert onnx.exists(), f'failed to export ONNX file: {onnx}' f = self.file.with_suffix('.engine') # TensorRT engine file logger = trt.Logger(trt.Logger.INFO) if verbose: logger.min_severity = trt.Logger.Severity.VERBOSE builder = trt.Builder(logger) config = builder.create_builder_config() config.max_workspace_size = workspace * 1 << 30 # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(str(onnx)): raise RuntimeError(f'failed to load ONNX file: {onnx}') inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] for inp in inputs: LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}') for out in outputs: LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}') if self.args.dynamic: shape = self.im.shape if shape[0] <= 1: LOGGER.warning(f"{prefix} WARNING ⚠️ --dynamic model requires maximum --batch-size argument") profile = builder.create_optimization_profile() for inp in inputs: profile.set_shape(inp.name, (1, *shape[1:]), (max(1, shape[0] // 2), *shape[1:]), shape) config.add_optimization_profile(profile) LOGGER.info( f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and self.args.half else 32} engine as {f}') if builder.platform_has_fast_fp16 and self.args.half: config.set_flag(trt.BuilderFlag.FP16) with builder.build_engine(network, config) as engine, open(f, 'wb') as t: t.write(engine.serialize()) return f, None @try_export def _export_saved_model(self, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, prefix=colorstr('TensorFlow SavedModel:')): # YOLOv8 TensorFlow SavedModel export try: import tensorflow as tf # noqa except ImportError: check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}") import tensorflow as tf # noqa check_requirements(("onnx", "onnx2tf", "sng4onnx", "onnxsim", "onnx_graphsurgeon"), cmds="--extra-index-url https://pypi.ngc.nvidia.com ") LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') f = str(self.file).replace(self.file.suffix, '_saved_model') # Export to ONNX self._export_onnx() onnx = self.file.with_suffix('.onnx') # Export to TF SavedModel subprocess.run(f'onnx2tf -i {onnx} --output_signaturedefs -o {f}', shell=True) # Load saved_model keras_model = tf.saved_model.load(f, tags=None, options=None) return f, keras_model @try_export def _export_saved_model_OLD(self, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, prefix=colorstr('TensorFlow SavedModel:')): # YOLOv8 TensorFlow SavedModel export try: import tensorflow as tf # noqa except ImportError: check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}") import tensorflow as tf # noqa # from models.tf import TFModel from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') f = str(self.file).replace(self.file.suffix, '_saved_model') batch_size, ch, *imgsz = list(self.im.shape) # BCHW tf_models = None # TODO: no TF modules available tf_model = tf_models.TFModel(cfg=self.model.yaml, model=self.model.cpu(), nc=self.model.nc, imgsz=imgsz) im = tf.zeros((batch_size, *imgsz, ch)) # BHWC order for TensorFlow _ = tf_model.predict(im, nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if self.args.dynamic else batch_size) outputs = tf_model.predict(inputs, nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) keras_model = tf.keras.Model(inputs=inputs, outputs=outputs) keras_model.trainable = False keras_model.summary() if self.args.keras: keras_model.save(f, save_format='tf') else: spec = tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype) m = tf.function(lambda x: keras_model(x)) # full model m = m.get_concrete_function(spec) frozen_func = convert_variables_to_constants_v2(m) tfm = tf.Module() tfm.__call__ = tf.function(lambda x: frozen_func(x)[:4] if nms else frozen_func(x), [spec]) tfm.__call__(im) tf.saved_model.save(tfm, f, options=tf.saved_model.SaveOptions(experimental_custom_gradients=False) if check_version(tf.__version__, '2.6') else tf.saved_model.SaveOptions()) return f, keras_model @try_export def _export_pb(self, keras_model, file, prefix=colorstr('TensorFlow GraphDef:')): # YOLOv8 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow import tensorflow as tf # noqa from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 # noqa LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') f = file.with_suffix('.pb') m = tf.function(lambda x: keras_model(x)) # full model m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)) frozen_func = convert_variables_to_constants_v2(m) frozen_func.graph.as_graph_def() tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False) return f, None @try_export def _export_tflite(self, keras_model, int8, data, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')): # YOLOv8 TensorFlow Lite export import tensorflow as tf # noqa LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') batch_size, ch, *imgsz = list(self.im.shape) # BCHW f = str(self.file).replace(self.file.suffix, '-fp16.tflite') converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] converter.target_spec.supported_types = [tf.float16] converter.optimizations = [tf.lite.Optimize.DEFAULT] if int8: def representative_dataset_gen(dataset, n_images=100): # Dataset generator for use with converter.representative_dataset, returns a generator of np arrays for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): im = np.transpose(img, [1, 2, 0]) im = np.expand_dims(im, axis=0).astype(np.float32) im /= 255 yield [im] if n >= n_images: break dataset = LoadImages(check_dataset(check_yaml(data))['train'], imgsz=imgsz, auto=False) converter.representative_dataset = lambda: representative_dataset_gen(dataset, n_images=100) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] converter.target_spec.supported_types = [] converter.inference_input_type = tf.uint8 # or tf.int8 converter.inference_output_type = tf.uint8 # or tf.int8 converter.experimental_new_quantizer = True f = str(self.file).replace(self.file.suffix, '-int8.tflite') if nms or agnostic_nms: converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS) tflite_model = converter.convert() open(f, "wb").write(tflite_model) return f, None @try_export def _export_edgetpu(self, prefix=colorstr('Edge TPU:')): # YOLOv8 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/ cmd = 'edgetpu_compiler --version' help_url = 'https://coral.ai/docs/edgetpu/compiler/' assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}' if subprocess.run(f'{cmd} >/dev/null', shell=True).returncode != 0: LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}') sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0 # sudo installed on system for c in ( 'curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -', 'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | ' # no comma 'sudo tee /etc/apt/sources.list.d/coral-edgetpu.list', 'sudo apt-get update', 'sudo apt-get install edgetpu-compiler'): subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True) ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1] LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...') f = str(self.file).replace(self.file.suffix, '-int8_edgetpu.tflite') # Edge TPU model f_tfl = str(self.file).replace(self.file.suffix, '-int8.tflite') # TFLite model cmd = f"edgetpu_compiler -s -d -k 10 --out_dir {self.file.parent} {f_tfl}" subprocess.run(cmd.split(), check=True) return f, None @try_export def _export_tfjs(self, prefix=colorstr('TensorFlow.js:')): # YOLOv8 TensorFlow.js export check_requirements('tensorflowjs') import tensorflowjs as tfjs # noqa LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...') f = str(self.file).replace(self.file.suffix, '_web_model') # js dir f_pb = self.file.with_suffix('.pb') # *.pb path f_json = Path(f) / 'model.json' # *.json path cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \ f'--output_node_names=Identity,Identity_1,Identity_2,Identity_3 {f_pb} {f}' subprocess.run(cmd.split()) with open(f_json, 'w') as j: # sort JSON Identity_* in ascending order subst = re.sub( r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}, ' r'"Identity.?.?": {"name": "Identity.?.?"}}}', r'{"outputs": {"Identity": {"name": "Identity"}, ' r'"Identity_1": {"name": "Identity_1"}, ' r'"Identity_2": {"name": "Identity_2"}, ' r'"Identity_3": {"name": "Identity_3"}}}', f_json.read_text()) j.write(subst) return f, None def _add_tflite_metadata(self, file, num_outputs): # Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata with contextlib.suppress(ImportError): # check_requirements('tflite_support') from tflite_support import flatbuffers # noqa from tflite_support import metadata as _metadata # noqa from tflite_support import metadata_schema_py_generated as _metadata_fb # noqa tmp_file = Path('/tmp/meta.txt') with open(tmp_file, 'w') as meta_f: meta_f.write(str(self.metadata)) model_meta = _metadata_fb.ModelMetadataT() label_file = _metadata_fb.AssociatedFileT() label_file.name = tmp_file.name model_meta.associatedFiles = [label_file] subgraph = _metadata_fb.SubGraphMetadataT() subgraph.inputTensorMetadata = [_metadata_fb.TensorMetadataT()] subgraph.outputTensorMetadata = [_metadata_fb.TensorMetadataT()] * num_outputs model_meta.subgraphMetadata = [subgraph] b = flatbuffers.Builder(0) b.Finish(model_meta.Pack(b), _metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER) metadata_buf = b.Output() populator = _metadata.MetadataPopulator.with_model_file(file) populator.load_metadata_buffer(metadata_buf) populator.load_associated_files([str(tmp_file)]) populator.populate() tmp_file.unlink() def _pipeline_coreml(self, model, prefix=colorstr('CoreML Pipeline:')): # YOLOv8 CoreML pipeline import coremltools as ct # noqa LOGGER.info(f'{prefix} starting pipeline with coremltools {ct.__version__}...') batch_size, ch, h, w = list(self.im.shape) # BCHW # Output shapes spec = model.get_spec() out0, out1 = iter(spec.description.output) if MACOS: from PIL import Image img = Image.new('RGB', (w, h)) # img(192 width, 320 height) # img = torch.zeros((*opt.img_size, 3)).numpy() # img size(320,192,3) iDetection out = model.predict({'image': img}) out0_shape = out[out0.name].shape out1_shape = out[out1.name].shape else: # linux and windows can not run model.predict(), get sizes from pytorch output y out0_shape = self.output_shape[1], self.output_shape[2] - 5 # (3780, 80) out1_shape = self.output_shape[1], 4 # (3780, 4) # Checks names = self.metadata['names'] nx, ny = spec.description.input[0].type.imageType.width, spec.description.input[0].type.imageType.height na, nc = out0_shape # na, nc = out0.type.multiArrayType.shape # number anchors, classes assert len(names) == nc, f'{len(names)} names found for nc={nc}' # check # Define output shapes (missing) out0.type.multiArrayType.shape[:] = out0_shape # (3780, 80) out1.type.multiArrayType.shape[:] = out1_shape # (3780, 4) # spec.neuralNetwork.preprocessing[0].featureName = '0' # Flexible input shapes # from coremltools.models.neural_network import flexible_shape_utils # s = [] # shapes # s.append(flexible_shape_utils.NeuralNetworkImageSize(320, 192)) # s.append(flexible_shape_utils.NeuralNetworkImageSize(640, 384)) # (height, width) # flexible_shape_utils.add_enumerated_image_sizes(spec, feature_name='image', sizes=s) # r = flexible_shape_utils.NeuralNetworkImageSizeRange() # shape ranges # r.add_height_range((192, 640)) # r.add_width_range((192, 640)) # flexible_shape_utils.update_image_size_range(spec, feature_name='image', size_range=r) # Print print(spec.description) # Model from spec model = ct.models.MLModel(spec) # 3. Create NMS protobuf nms_spec = ct.proto.Model_pb2.Model() nms_spec.specificationVersion = 5 for i in range(2): decoder_output = model._spec.description.output[i].SerializeToString() nms_spec.description.input.add() nms_spec.description.input[i].ParseFromString(decoder_output) nms_spec.description.output.add() nms_spec.description.output[i].ParseFromString(decoder_output) nms_spec.description.output[0].name = 'confidence' nms_spec.description.output[1].name = 'coordinates' output_sizes = [nc, 4] for i in range(2): ma_type = nms_spec.description.output[i].type.multiArrayType ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[0].lowerBound = 0 ma_type.shapeRange.sizeRanges[0].upperBound = -1 ma_type.shapeRange.sizeRanges.add() ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i] ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i] del ma_type.shape[:] nms = nms_spec.nonMaximumSuppression nms.confidenceInputFeatureName = out0.name # 1x507x80 nms.coordinatesInputFeatureName = out1.name # 1x507x4 nms.confidenceOutputFeatureName = 'confidence' nms.coordinatesOutputFeatureName = 'coordinates' nms.iouThresholdInputFeatureName = 'iouThreshold' nms.confidenceThresholdInputFeatureName = 'confidenceThreshold' nms.iouThreshold = 0.45 nms.confidenceThreshold = 0.25 nms.pickTop.perClass = True nms.stringClassLabels.vector.extend(names.values()) nms_model = ct.models.MLModel(nms_spec) # 4. Pipeline models together pipeline = ct.models.pipeline.Pipeline(input_features=[('image', ct.models.datatypes.Array(3, ny, nx)), ('iouThreshold', ct.models.datatypes.Double()), ('confidenceThreshold', ct.models.datatypes.Double())], output_features=['confidence', 'coordinates']) pipeline.add_model(model) pipeline.add_model(nms_model) # Correct datatypes pipeline.spec.description.input[0].ParseFromString(model._spec.description.input[0].SerializeToString()) pipeline.spec.description.output[0].ParseFromString(nms_model._spec.description.output[0].SerializeToString()) pipeline.spec.description.output[1].ParseFromString(nms_model._spec.description.output[1].SerializeToString()) # Update metadata pipeline.spec.specificationVersion = 5 pipeline.spec.description.metadata.versionString = f'Ultralytics YOLOv{__version__}' pipeline.spec.description.metadata.shortDescription = f'Ultralytics {self.pretty_name} CoreML model' pipeline.spec.description.metadata.author = 'Ultralytics (https://com)' pipeline.spec.description.metadata.license = 'GPL-3.0 license (https://com/license)' pipeline.spec.description.metadata.userDefined.update({ 'IoU threshold': str(nms.iouThreshold), 'Confidence threshold': str(nms.confidenceThreshold)}) # Save the model model = ct.models.MLModel(pipeline.spec) model.input_description['image'] = 'Input image' model.input_description['iouThreshold'] = f'(optional) IOU threshold override (default: {nms.iouThreshold})' model.input_description['confidenceThreshold'] = \ f'(optional) Confidence threshold override (default: {nms.confidenceThreshold})' model.output_description['confidence'] = 'Boxes × Class confidence (see user-defined metadata "classes")' model.output_description['coordinates'] = 'Boxes × [x, y, width, height] (relative to image size)' LOGGER.info(f'{prefix} pipeline success') return model def run_callbacks(self, event: str): for callback in self.callbacks.get(event, []): callback(self) @hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name) def export(cfg): cfg.model = cfg.model or "yolov8n.yaml" cfg.format = cfg.format or "torchscript" # exporter = Exporter(cfg) # # model = None # if isinstance(cfg.model, (str, Path)): # if Path(cfg.model).suffix == '.yaml': # model = DetectionModel(cfg.model) # elif Path(cfg.model).suffix == '.pt': # model = attempt_load_weights(cfg.model, fuse=True) # else: # TypeError(f'Unsupported model type {cfg.model}') # exporter(model=model) from ultralytics import YOLO model = YOLO(cfg.model) model.export(**cfg) if __name__ == "__main__": """ CLI: yolo mode=export model=yolov8n.yaml format=onnx """ export() ================================================ FILE: yolo/engine/model.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from pathlib import Path from ultralytics import yolo # noqa from nn.tasks import ClassificationModel, DetectionModel, SegmentationModel, attempt_load_one_weight from yolo.configs import get_config from yolo.engine.exporter import Exporter from yolo.utils import DEFAULT_CONFIG, LOGGER, yaml_load from yolo.utils.checks import check_imgsz, check_yaml from yolo.utils.torch_utils import guess_task_from_head, smart_inference_mode # Map head to model, trainer, validator, and predictor classes MODEL_MAP = { "classify": [ ClassificationModel, 'yolo.TYPE.classify.ClassificationTrainer', 'yolo.TYPE.classify.ClassificationValidator', 'yolo.TYPE.classify.ClassificationPredictor'], "detect": [ DetectionModel, 'yolo.TYPE.detect.DetectionTrainer', 'yolo.TYPE.detect.DetectionValidator', 'yolo.TYPE.detect.DetectionPredictor'], "segment": [ SegmentationModel, 'yolo.TYPE.segment.SegmentationTrainer', 'yolo.TYPE.segment.SegmentationValidator', 'yolo.TYPE.segment.SegmentationPredictor']} class YOLO: """ YOLO A python interface which emulates a model-like behaviour by wrapping trainers. """ def __init__(self, model='yolov8n.yaml', type="v8") -> None: """ > Initializes the YOLO object. Args: model (str, Path): model to load or create type (str): Type/version of models to use. Defaults to "v8". """ self.type = type self.ModelClass = None # model class self.TrainerClass = None # trainer class self.ValidatorClass = None # validator class self.PredictorClass = None # predictor class self.model = None # model object self.trainer = None # trainer object self.task = None # task type self.ckpt = None # if loaded from *.pt self.cfg = None # if loaded from *.yaml self.ckpt_path = None self.overrides = {} # overrides for trainer object # Load or create new YOLO model {'.pt': self._load, '.yaml': self._new}[Path(model).suffix](model) def __call__(self, source, **kwargs): return self.predict(source, **kwargs) def _new(self, cfg: str, verbose=True): """ > Initializes a new model and infers the task type from the model definitions. Args: cfg (str): model configuration file verbose (bool): display model info on load """ cfg = check_yaml(cfg) # check YAML cfg_dict = yaml_load(cfg, append_filename=True) # model dict self.task = guess_task_from_head(cfg_dict["head"][-1][-2]) self.ModelClass, self.TrainerClass, self.ValidatorClass, self.PredictorClass = \ self._guess_ops_from_task(self.task) self.model = self.ModelClass(cfg_dict, verbose=verbose) # initialize self.cfg = cfg def _load(self, weights: str): """ > Initializes a new model and infers the task type from the model head. Args: weights (str): model checkpoint to be loaded """ self.model, self.ckpt = attempt_load_one_weight(weights) self.ckpt_path = weights self.task = self.model.args["task"] self.overrides = self.model.args self._reset_ckpt_args(self.overrides) self.ModelClass, self.TrainerClass, self.ValidatorClass, self.PredictorClass = \ self._guess_ops_from_task(self.task) def reset(self): """ > Resets the model modules. """ for m in self.model.modules(): if hasattr(m, 'reset_parameters'): m.reset_parameters() for p in self.model.parameters(): p.requires_grad = True def info(self, verbose=False): """ > Logs model info. Args: verbose (bool): Controls verbosity. """ self.model.info(verbose=verbose) def fuse(self): self.model.fuse() @smart_inference_mode() def predict(self, source, **kwargs): """ Visualize prediction. Args: source (str): Accepts all source types accepted by yolo **kwargs : Any other args accepted by the predictors. To see all args check 'configuration' section in docs """ overrides = self.overrides.copy() overrides["conf"] = 0.25 overrides.update(kwargs) overrides["mode"] = "predict" overrides["save"] = kwargs.get("save", False) # not save files by default predictor = self.PredictorClass(overrides=overrides) predictor.args.imgsz = check_imgsz(predictor.args.imgsz, min_dim=2) # check image size predictor.setup(model=self.model, source=source) return predictor() @smart_inference_mode() def val(self, data=None, **kwargs): """ > Validate a model on a given dataset . Args: data (str): The dataset to validate on. Accepts all formats accepted by yolo **kwargs : Any other args accepted by the validators. To see all args check 'configuration' section in docs """ overrides = self.overrides.copy() overrides.update(kwargs) overrides["mode"] = "val" args = get_config(config=DEFAULT_CONFIG, overrides=overrides) args.data = data or args.data args.task = self.task validator = self.ValidatorClass(args=args) validator(model=self.model) @smart_inference_mode() def export(self, **kwargs): """ > Export model. Args: **kwargs : Any other args accepted by the predictors. To see all args check 'configuration' section in docs """ overrides = self.overrides.copy() overrides.update(kwargs) args = get_config(config=DEFAULT_CONFIG, overrides=overrides) args.task = self.task exporter = Exporter(overrides=args) exporter(model=self.model) def train(self, **kwargs): """ > Trains the model on a given dataset. Args: **kwargs (Any): Any number of arguments representing the training configuration. List of all args can be found in 'config' section. You can pass all arguments as a yaml file in `cfg`. Other args are ignored if `cfg` file is passed """ overrides = self.overrides.copy() overrides.update(kwargs) if kwargs.get("cfg"): LOGGER.info(f"cfg file passed. Overriding default params with {kwargs['cfg']}.") overrides = yaml_load(check_yaml(kwargs["cfg"]), append_filename=True) overrides["task"] = self.task overrides["mode"] = "train" if not overrides.get("data"): raise AttributeError("dataset not provided! Please define `data` in config.yaml or pass as an argument.") if overrides.get("resume"): overrides["resume"] = self.ckpt_path self.trainer = self.TrainerClass(overrides=overrides) if not overrides.get("resume"): # manually set model only if not resuming self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml) self.model = self.trainer.model self.trainer.train() def to(self, device): """ > Sends the model to the given device. Args: device (str): device """ self.model.to(device) def _guess_ops_from_task(self, task): model_class, train_lit, val_lit, pred_lit = MODEL_MAP[task] # warning: eval is unsafe. Use with caution trainer_class = eval(train_lit.replace("TYPE", f"{self.type}")) validator_class = eval(val_lit.replace("TYPE", f"{self.type}")) predictor_class = eval(pred_lit.replace("TYPE", f"{self.type}")) return model_class, trainer_class, validator_class, predictor_class @staticmethod def _reset_ckpt_args(args): args.pop("device", None) args.pop("project", None) args.pop("name", None) args.pop("batch", None) args.pop("epochs", None) args.pop("cache", None) args.pop("save_json", None) ================================================ FILE: yolo/engine/predictor.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Run prediction on images, videos, directories, globs, YouTube, webcam, streams, etc. Usage - sources: $ yolo task=... mode=predict model=s.pt --source 0 # webcam img.jpg # image vid.mp4 # video screen # screenshot path/ # directory list.txt # list of images list.streams # list of streams 'path/*.jpg' # glob 'https://youtu.be/Zgi9g1ksQHc' # YouTube 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream Usage - formats: $ yolo task=... mode=predict --weights yolov8n.pt # PyTorch yolov8n.torchscript # TorchScript yolov8n.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov8n_openvino_model # OpenVINO yolov8n.engine # TensorRT yolov8n.mlmodel # CoreML (macOS-only) yolov8n_saved_model # TensorFlow SavedModel yolov8n.pb # TensorFlow GraphDef yolov8n.tflite # TensorFlow Lite yolov8n_edgetpu.tflite # TensorFlow Edge TPU yolov8n_paddle_model # PaddlePaddle """ import platform from collections import defaultdict from pathlib import Path import cv2 from sort import * from nn.autobackend import AutoBackend from yolo.configs import get_config from yolo.data.dataloaders.stream_loaders import LoadImages, LoadScreenshots, LoadStreams from yolo.data.utils import IMG_FORMATS, VID_FORMATS from yolo.utils import DEFAULT_CONFIG, LOGGER, SETTINGS, callbacks, colorstr, ops from yolo.utils.checks import check_file, check_imgsz, check_imshow from yolo.utils.files import increment_path from yolo.utils.torch_utils import select_device, smart_inference_mode class BasePredictor: """ BasePredictor A base class for cre ating predictors. Attributes: args (OmegaConf): Configuration for the predictor. save_dir (Path): Directory to save results. done_setup (bool): Whether the predictor has finished setup. model (nn.Module): Model used for prediction. data (dict): Data configuration. device (torch.device): Device used for prediction. dataset (Dataset): Dataset used for prediction. vid_path (str): Path to video file. vid_writer (cv2.VideoWriter): Video writer for saving video output. annotator (Annotator): Annotator used for prediction. data_path (str): Path to data. """ def __init__(self, config=DEFAULT_CONFIG, overrides=None): """ Initializes the BasePredictor class. Args: config (str, optional): Path to a configuration file. Defaults to DEFAULT_CONFIG. overrides (dict, optional): Configuration overrides. Defaults to None. """ print("This is a tracker",tracker) if overrides is None: overrides = {} self.args = get_config(config, overrides) project = self.args.project or Path(SETTINGS['runs_dir']) / self.args.task name = self.args.name or f"{self.args.mode}" self.save_dir = increment_path(Path(project) / name, exist_ok=self.args.exist_ok) if self.args.save: (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) if self.args.conf is None: self.args.conf = 0.25 # default conf=0.25 self.done_setup = False # Usable if setup is done self.model = None self.data = self.args.data # data_dict self.device = None self.dataset = None self.vid_path, self.vid_writer = None, None self.annotator = None self.data_path = None self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()}) # add callbacks callbacks.add_integration_callbacks(self) def preprocess(self, img): pass def get_annotator(self, img): raise NotImplementedError("get_annotator function needs to be implemented") def get_tracker(self,img): def write_results(self, pred, batch, print_string): raise NotImplementedError("print_results function needs to be implemented") def postprocess(self, preds, img, orig_img): return preds def setup(self, source=None, model=None): # source source = str(source if source is not None else self.args.source) is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file) screenshot = source.lower().startswith('screen') if is_url and is_file: source = check_file(source) # download # model device = select_device(self.args.device) model = model or self.args.model self.args.half &= device.type != 'cpu' # half precision only supported on CUDA model = AutoBackend(model, device=device, dnn=self.args.dnn, fp16=self.args.half) stride, pt = model.stride, model.pt imgsz = check_imgsz(self.args.imgsz, stride=stride) # check image size # Dataloader bs = 1 # batch_size if self.args.show: self.args.show = check_imshow(warn=True) if webcam: self.dataset = LoadStreams(source, imgsz=imgsz, stride=stride, auto=pt, transforms=getattr(model.model, 'transforms', None), vid_stride=self.args.vid_stride) bs = len(self.dataset) elif screenshot: self.dataset = LoadScreenshots(source, imgsz=imgsz, stride=stride, auto=pt, transforms=getattr(model.model, 'transforms', None)) else: self.dataset = LoadImages(source, imgsz=imgsz, stride=stride, auto=pt, transforms=getattr(model.model, 'transforms', None), vid_stride=self.args.vid_stride) self.vid_path, self.vid_writer = [None] * bs, [None] * bs model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup self.model = model self.webcam = webcam self.screenshot = screenshot self.imgsz = imgsz self.done_setup = True self.device = device return model @smart_inference_mode() def __call__(self, source=None, model=None): self.run_callbacks("on_predict_start") model= self.model if self.done_setup else self.setup(source, model) model.eval() print(tracker) self.seen, self.windows, self.dt = 0, [], (ops.Profile(), ops.Profile(), ops.Profile()) self.all_outputs = [] for batch in self.dataset: self.run_callbacks("on_predict_batch_start") path, im, im0s, vid_cap, s = batch visualize = increment_path(self.save_dir / Path(path).stem, mkdir=True) if self.args.visualize else False with self.dt[0]: im = self.preprocess(im) if len(im.shape) == 3: im = im[None] # Inference with self.dt[1]: preds = model(im, augment=self.args.augment, visualize=visualize) # postprocess with self.dt[2]: preds = self.postprocess(preds, im, im0s) for i in range(len(im)): if self.webcam: path, im0s = path[i], im0s[i] p = Path(path) s += self.write_results(i, preds, (p, im, im0s)) if self.args.show: self.show(p) if self.args.save: self.save_preds(vid_cap, i, str(self.save_dir / p.name)) # Print time (inference-only) LOGGER.info(f"{s}{'' if len(preds) else '(no detections), '}{self.dt[1].dt * 1E3:.1f}ms") self.run_callbacks("on_predict_batch_end") # Print results t = tuple(x.t / self.seen * 1E3 for x in self.dt) # speeds per image LOGGER.info( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms postprocess per image at shape {(1, 3, *self.imgsz)}' % t) if self.args.save_txt or self.args.save: s = f"\n{len(list(self.save_dir.glob('labels/*.txt')))} labels saved to {self.save_dir / 'labels'}" if self.args.save_txt else '' LOGGER.info(f"Results saved to {colorstr('bold', self.save_dir)}{s}") self.run_callbacks("on_predict_end") return self.all_outputs def show(self, p): im0 = self.annotator.result() if platform.system() == 'Linux' and p not in self.windows: self.windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond def save_preds(self, vid_cap, idx, save_path): im0 = self.annotator.result() # save imgs if self.dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if self.vid_path[idx] != save_path: # new video self.vid_path[idx] = save_path if isinstance(self.vid_writer[idx], cv2.VideoWriter): self.vid_writer[idx].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos self.vid_writer[idx] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) self.vid_writer[idx].write(im0) def run_callbacks(self, event: str): for callback in self.callbacks.get(event, []): callback(self) ================================================ FILE: yolo/engine/sort.py ================================================ from __future__ import print_function import os import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.patches as patches from skimage import io import glob import time import argparse from filterpy.kalman import KalmanFilter np.random.seed(0) def linear_assignment(cost_matrix): try: import lap #linear assignment problem solver _, x, y = lap.lapjv(cost_matrix, extend_cost = True) return np.array([[y[i],i] for i in x if i>=0]) except ImportError: from scipy.optimize import linear_sum_assignment x,y = linear_sum_assignment(cost_matrix) return np.array(list(zip(x,y))) """From SORT: Computes IOU between two boxes in the form [x1,y1,x2,y2]""" def iou_batch(bb_test, bb_gt): bb_gt = np.expand_dims(bb_gt, 0) bb_test = np.expand_dims(bb_test, 1) xx1 = np.maximum(bb_test[...,0], bb_gt[..., 0]) yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1]) xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2]) yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3]) w = np.maximum(0., xx2 - xx1) h = np.maximum(0., yy2 - yy1) wh = w * h o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh) return(o) """Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form [x,y,s,r] where x,y is the center of the box and s is the scale/area and r is the aspect ratio""" def convert_bbox_to_z(bbox): w = bbox[2] - bbox[0] h = bbox[3] - bbox[1] x = bbox[0] + w/2. y = bbox[1] + h/2. s = w * h #scale is just area r = w / float(h) return np.array([x, y, s, r]).reshape((4, 1)) """Takes a bounding box in the centre form [x,y,s,r] and returns it in the form [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right""" def convert_x_to_bbox(x, score=None): w = np.sqrt(x[2] * x[3]) h = x[2] / w if(score==None): return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4)) else: return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5)) """This class represents the internal state of individual tracked objects observed as bbox.""" class KalmanBoxTracker(object): count = 0 def __init__(self, bbox): """ Initialize a tracker using initial bounding box Parameter 'bbox' must have 'detected class' int number at the -1 position. """ self.kf = KalmanFilter(dim_x=7, dim_z=4) self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],[0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]]) self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]]) self.kf.R[2:,2:] *= 10. # R: Covariance matrix of measurement noise (set to high for noisy inputs -> more 'inertia' of boxes') self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities self.kf.P *= 10. self.kf.Q[-1,-1] *= 0.5 # Q: Covariance matrix of process noise (set to high for erratically moving things) self.kf.Q[4:,4:] *= 0.5 self.kf.x[:4] = convert_bbox_to_z(bbox) # STATE VECTOR self.time_since_update = 0 self.id = KalmanBoxTracker.count KalmanBoxTracker.count += 1 self.history = [] self.hits = 0 self.hit_streak = 0 self.age = 0 self.centroidarr = [] CX = (bbox[0]+bbox[2])//2 CY = (bbox[1]+bbox[3])//2 self.centroidarr.append((CX,CY)) #keep yolov5 detected class information self.detclass = bbox[5] # If we want to store bbox self.bbox_history = [bbox] def update(self, bbox): """ Updates the state vector with observed bbox """ self.time_since_update = 0 self.history = [] self.hits += 1 self.hit_streak += 1 self.kf.update(convert_bbox_to_z(bbox)) self.detclass = bbox[5] CX = (bbox[0]+bbox[2])//2 CY = (bbox[1]+bbox[3])//2 self.centroidarr.append((CX,CY)) self.bbox_history.append(bbox) def predict(self): """ Advances the state vector and returns the predicted bounding box estimate """ if((self.kf.x[6]+self.kf.x[2])<=0): self.kf.x[6] *= 0.0 self.kf.predict() self.age += 1 if(self.time_since_update>0): self.hit_streak = 0 self.time_since_update += 1 self.history.append(convert_x_to_bbox(self.kf.x)) # bbox=self.history[-1] # CX = (bbox[0]+bbox[2])/2 # CY = (bbox[1]+bbox[3])/2 # self.centroidarr.append((CX,CY)) return self.history[-1] def get_state(self): """ Returns the current bounding box estimate # test arr1 = np.array([[1,2,3,4]]) arr2 = np.array([0]) arr3 = np.expand_dims(arr2, 0) np.concatenate((arr1,arr3), axis=1) """ arr_detclass = np.expand_dims(np.array([self.detclass]), 0) arr_u_dot = np.expand_dims(self.kf.x[4],0) arr_v_dot = np.expand_dims(self.kf.x[5],0) arr_s_dot = np.expand_dims(self.kf.x[6],0) return np.concatenate((convert_x_to_bbox(self.kf.x), arr_detclass, arr_u_dot, arr_v_dot, arr_s_dot), axis=1) def associate_detections_to_trackers(detections, trackers, iou_threshold = 0.3): """ Assigns detections to tracked object (both represented as bounding boxes) Returns 3 lists of 1. matches, 2. unmatched_detections 3. unmatched_trackers """ if(len(trackers)==0): return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int) iou_matrix = iou_batch(detections, trackers) if min(iou_matrix.shape) > 0: a = (iou_matrix > iou_threshold).astype(np.int32) if a.sum(1).max() == 1 and a.sum(0).max() ==1: matched_indices = np.stack(np.where(a), axis=1) else: matched_indices = linear_assignment(-iou_matrix) else: matched_indices = np.empty(shape=(0,2)) unmatched_detections = [] for d, det in enumerate(detections): if(d not in matched_indices[:,0]): unmatched_detections.append(d) unmatched_trackers = [] for t, trk in enumerate(trackers): if(t not in matched_indices[:,1]): unmatched_trackers.append(t) #filter out matched with low IOU matches = [] for m in matched_indices: if(iou_matrix[m[0], m[1]]= self.min_hits or self.frame_count <= self.min_hits): ret.append(np.concatenate((d, [trk.id+1])).reshape(1,-1)) #+1'd because MOT benchmark requires positive value i -= 1 #remove dead tracklet if(trk.time_since_update >self.max_age): self.trackers.pop(i) if(len(ret) > 0): return np.concatenate(ret) return np.empty((0,6)) def parse_args(): """Parse input arguments.""" parser = argparse.ArgumentParser(description='SORT demo') parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true') parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data') parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train') parser.add_argument("--max_age", help="Maximum number of frames to keep alive a track without associated detections.", type=int, default=1) parser.add_argument("--min_hits", help="Minimum number of associated detections before track is initialised.", type=int, default=3) parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3) args = parser.parse_args() return args if __name__ == '__main__': # all train args = parse_args() display = args.display phase = args.phase total_time = 0.0 total_frames = 0 colours = np.random.rand(32, 3) #used only for display if(display): if not os.path.exists('mot_benchmark'): print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n') exit() plt.ion() fig = plt.figure() ax1 = fig.add_subplot(111, aspect='equal') if not os.path.exists('output'): os.makedirs('output') pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt') for seq_dets_fn in glob.glob(pattern): mot_tracker = Sort(max_age=args.max_age, min_hits=args.min_hits, iou_threshold=args.iou_threshold) #create instance of the SORT tracker seq_dets = np.loadtxt(seq_dets_fn, delimiter=',') seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0] with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file: print("Processing %s."%(seq)) for frame in range(int(seq_dets[:,0].max())): frame += 1 #detection and frame numbers begin at 1 dets = seq_dets[seq_dets[:, 0]==frame, 2:7] dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2] total_frames += 1 if(display): fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame)) im =io.imread(fn) ax1.imshow(im) plt.title(seq + ' Tracked Targets') start_time = time.time() trackers = mot_tracker.update(dets) cycle_time = time.time() - start_time total_time += cycle_time for d in trackers: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file) if(display): d = d.astype(np.int32) ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:])) if(display): fig.canvas.flush_events() plt.draw() ax1.cla() print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time)) if(display): print("Note: to get real runtime results run without the option: --display") ================================================ FILE: yolo/engine/trainer.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Simple training loop; Boilerplate that could apply to any arbitrary neural network, """ import os import subprocess import time from collections import defaultdict from copy import deepcopy from datetime import datetime from pathlib import Path import numpy as np import torch import torch.distributed as dist import torch.nn as nn from omegaconf import OmegaConf # noqa from omegaconf import open_dict from torch.cuda import amp from torch.nn.parallel import DistributedDataParallel as DDP from torch.optim import lr_scheduler from tqdm import tqdm import yolo.utils as utils from ultralytics import __version__ from nn.tasks import attempt_load_one_weight from yolo.configs import get_config from yolo.data.utils import check_dataset, check_dataset_yaml from yolo.utils import (DEFAULT_CONFIG, LOGGER, RANK, SETTINGS, TQDM_BAR_FORMAT, callbacks, colorstr, yaml_save) from yolo.utils.autobatch import check_train_batch_size from yolo.utils.checks import check_file, print_args from yolo.utils.dist import ddp_cleanup, generate_ddp_command from yolo.utils.files import get_latest_run, increment_path from yolo.utils.torch_utils import ModelEMA, de_parallel, init_seeds, one_cycle, strip_optimizer class BaseTrainer: """ BaseTrainer > A base class for creating trainers. Attributes: args (OmegaConf): Configuration for the trainer. check_resume (method): Method to check if training should be resumed from a saved checkpoint. console (logging.Logger): Logger instance. validator (BaseValidator): Validator instance. model (nn.Module): Model instance. callbacks (defaultdict): Dictionary of callbacks. save_dir (Path): Directory to save results. wdir (Path): Directory to save weights. last (Path): Path to last checkpoint. best (Path): Path to best checkpoint. batch_size (int): Batch size for training. epochs (int): Number of epochs to train for. start_epoch (int): Starting epoch for training. device (torch.device): Device to use for training. amp (bool): Flag to enable AMP (Automatic Mixed Precision). scaler (amp.GradScaler): Gradient scaler for AMP. data (str): Path to data. trainset (torch.utils.data.Dataset): Training dataset. testset (torch.utils.data.Dataset): Testing dataset. ema (nn.Module): EMA (Exponential Moving Average) of the model. lf (nn.Module): Loss function. scheduler (torch.optim.lr_scheduler._LRScheduler): Learning rate scheduler. best_fitness (float): The best fitness value achieved. fitness (float): Current fitness value. loss (float): Current loss value. tloss (float): Total loss value. loss_names (list): List of loss names. csv (Path): Path to results CSV file. """ def __init__(self, config=DEFAULT_CONFIG, overrides=None): """ > Initializes the BaseTrainer class. Args: config (str, optional): Path to a configuration file. Defaults to DEFAULT_CONFIG. overrides (dict, optional): Configuration overrides. Defaults to None. """ if overrides is None: overrides = {} self.args = get_config(config, overrides) self.check_resume() self.console = LOGGER self.validator = None self.model = None self.callbacks = defaultdict(list) init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic) # Dirs project = self.args.project or Path(SETTINGS['runs_dir']) / self.args.task name = self.args.name or f"{self.args.mode}" self.save_dir = Path( self.args.get( "save_dir", increment_path(Path(project) / name, exist_ok=self.args.exist_ok if RANK in {-1, 0} else True))) self.wdir = self.save_dir / 'weights' # weights dir if RANK in {-1, 0}: self.wdir.mkdir(parents=True, exist_ok=True) # make dir with open_dict(self.args): self.args.save_dir = str(self.save_dir) yaml_save(self.save_dir / 'args.yaml', OmegaConf.to_container(self.args, resolve=True)) # save run args self.last, self.best = self.wdir / 'last.pt', self.wdir / 'best.pt' # checkpoint paths self.batch_size = self.args.batch self.epochs = self.args.epochs self.start_epoch = 0 if RANK == -1: print_args(dict(self.args)) # Device self.device = utils.torch_utils.select_device(self.args.device, self.batch_size) self.amp = self.device.type != 'cpu' self.scaler = amp.GradScaler(enabled=self.amp) if self.device.type == 'cpu': self.args.workers = 0 # faster CPU training as time dominated by inference, not dataloading # Model and Dataloaders. self.model = self.args.model self.data = self.args.data if self.data.endswith(".yaml"): self.data = check_dataset_yaml(self.data) else: self.data = check_dataset(self.data) self.trainset, self.testset = self.get_dataset(self.data) self.ema = None # Optimization utils init self.lf = None self.scheduler = None # Epoch level metrics self.best_fitness = None self.fitness = None self.loss = None self.tloss = None self.loss_names = ['Loss'] self.csv = self.save_dir / 'results.csv' self.plot_idx = [0, 1, 2] # Callbacks self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()}) # add callbacks if RANK in {0, -1}: callbacks.add_integration_callbacks(self) def add_callback(self, event: str, callback): """ > Appends the given callback. """ self.callbacks[event].append(callback) def set_callback(self, event: str, callback): """ > Overrides the existing callbacks with the given callback. """ self.callbacks[event] = [callback] def run_callbacks(self, event: str): for callback in self.callbacks.get(event, []): callback(self) def train(self): world_size = torch.cuda.device_count() if world_size > 1 and "LOCAL_RANK" not in os.environ: command = generate_ddp_command(world_size, self) try: subprocess.run(command) except Exception as e: self.console(e) finally: ddp_cleanup(command, self) else: self._do_train(int(os.getenv("RANK", -1)), world_size) def _setup_ddp(self, rank, world_size): # os.environ['MASTER_ADDR'] = 'localhost' # os.environ['MASTER_PORT'] = '9020' torch.cuda.set_device(rank) self.device = torch.device('cuda', rank) self.console.info(f"DDP settings: RANK {rank}, WORLD_SIZE {world_size}, DEVICE {self.device}") dist.init_process_group("nccl" if dist.is_nccl_available() else "gloo", rank=rank, world_size=world_size) def _setup_train(self, rank, world_size): """ > Builds dataloaders and optimizer on correct rank process. """ # model self.run_callbacks("on_pretrain_routine_start") ckpt = self.setup_model() self.model = self.model.to(self.device) self.set_model_attributes() if world_size > 1: self.model = DDP(self.model, device_ids=[rank]) # Batch size if self.batch_size == -1: if RANK == -1: # single-GPU only, estimate best batch size self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp) else: SyntaxError('batch=-1 to use AutoBatch is only available in Single-GPU training. ' 'Please pass a valid batch size value for Multi-GPU DDP training, i.e. batch=16') # Optimizer self.accumulate = max(round(self.args.nbs / self.batch_size), 1) # accumulate loss before optimizing self.args.weight_decay *= self.batch_size * self.accumulate / self.args.nbs # scale weight_decay self.optimizer = self.build_optimizer(model=self.model, name=self.args.optimizer, lr=self.args.lr0, momentum=self.args.momentum, decay=self.args.weight_decay) # Scheduler if self.args.cos_lr: self.lf = one_cycle(1, self.args.lrf, self.epochs) # cosine 1->hyp['lrf'] else: self.lf = lambda x: (1 - x / self.epochs) * (1.0 - self.args.lrf) + self.args.lrf # linear self.scheduler = lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf) self.scheduler.last_epoch = self.start_epoch - 1 # do not move # dataloaders batch_size = self.batch_size // world_size if world_size > 1 else self.batch_size self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=rank, mode="train") if rank in {0, -1}: self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode="val") self.validator = self.get_validator() metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix="val") self.metrics = dict(zip(metric_keys, [0] * len(metric_keys))) # TODO: init metrics for plot_results()? self.ema = ModelEMA(self.model) self.resume_training(ckpt) self.run_callbacks("on_pretrain_routine_end") def _do_train(self, rank=-1, world_size=1): if world_size > 1: self._setup_ddp(rank, world_size) self._setup_train(rank, world_size) self.epoch_time = None self.epoch_time_start = time.time() self.train_time_start = time.time() nb = len(self.train_loader) # number of batches nw = max(round(self.args.warmup_epochs * nb), 100) # number of warmup iterations last_opt_step = -1 self.run_callbacks("on_train_start") self.log(f"Image sizes {self.args.imgsz} train, {self.args.imgsz} val\n" f'Using {self.train_loader.num_workers * (world_size or 1)} dataloader workers\n' f"Logging results to {colorstr('bold', self.save_dir)}\n" f"Starting training for {self.epochs} epochs...") if self.args.close_mosaic: base_idx = (self.epochs - self.args.close_mosaic) * nb self.plot_idx.extend([base_idx, base_idx + 1, base_idx + 2]) for epoch in range(self.start_epoch, self.epochs): self.epoch = epoch self.run_callbacks("on_train_epoch_start") self.model.train() if rank != -1: self.train_loader.sampler.set_epoch(epoch) pbar = enumerate(self.train_loader) # Update dataloader attributes (optional) if epoch == (self.epochs - self.args.close_mosaic): self.console.info("Closing dataloader mosaic") if hasattr(self.train_loader.dataset, 'mosaic'): self.train_loader.dataset.mosaic = False if hasattr(self.train_loader.dataset, 'close_mosaic'): self.train_loader.dataset.close_mosaic(hyp=self.args) if rank in {-1, 0}: self.console.info(self.progress_string()) pbar = tqdm(enumerate(self.train_loader), total=nb, bar_format=TQDM_BAR_FORMAT) self.tloss = None self.optimizer.zero_grad() for i, batch in pbar: self.run_callbacks("on_train_batch_start") # Warmup ni = i + nb * epoch if ni <= nw: xi = [0, nw] # x interp self.accumulate = max(1, np.interp(ni, xi, [1, self.args.nbs / self.batch_size]).round()) for j, x in enumerate(self.optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp( ni, xi, [self.args.warmup_bias_lr if j == 0 else 0.0, x['initial_lr'] * self.lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [self.args.warmup_momentum, self.args.momentum]) # Forward with torch.cuda.amp.autocast(self.amp): batch = self.preprocess_batch(batch) preds = self.model(batch["img"]) self.loss, self.loss_items = self.criterion(preds, batch) if rank != -1: self.loss *= world_size self.tloss = (self.tloss * i + self.loss_items) / (i + 1) if self.tloss is not None \ else self.loss_items # Backward self.scaler.scale(self.loss).backward() # Optimize - https://pytorch.org/docs/master/notes/amp_examples.html if ni - last_opt_step >= self.accumulate: self.optimizer_step() last_opt_step = ni # Log mem = f'{torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0:.3g}G' # (GB) loss_len = self.tloss.shape[0] if len(self.tloss.size()) else 1 losses = self.tloss if loss_len > 1 else torch.unsqueeze(self.tloss, 0) if rank in {-1, 0}: pbar.set_description( ('%11s' * 2 + '%11.4g' * (2 + loss_len)) % (f'{epoch + 1}/{self.epochs}', mem, *losses, batch["cls"].shape[0], batch["img"].shape[-1])) self.run_callbacks('on_batch_end') if self.args.plots and ni in self.plot_idx: self.plot_training_samples(batch, ni) self.run_callbacks("on_train_batch_end") self.lr = {f"lr/pg{ir}": x['lr'] for ir, x in enumerate(self.optimizer.param_groups)} # for loggers self.scheduler.step() self.run_callbacks("on_train_epoch_end") if rank in {-1, 0}: # Validation self.ema.update_attr(self.model, include=['yaml', 'nc', 'args', 'names', 'stride', 'class_weights']) final_epoch = (epoch + 1 == self.epochs) if self.args.val or final_epoch: self.metrics, self.fitness = self.validate() self.save_metrics(metrics={**self.label_loss_items(self.tloss), **self.metrics, **self.lr}) # Save model if self.args.save or (epoch + 1 == self.epochs): self.save_model() self.run_callbacks('on_model_save') tnow = time.time() self.epoch_time = tnow - self.epoch_time_start self.epoch_time_start = tnow self.run_callbacks("on_fit_epoch_end") # TODO: termination condition if rank in {-1, 0}: # Do final val with best.pt self.log(f'\n{epoch - self.start_epoch + 1} epochs completed in ' f'{(time.time() - self.train_time_start) / 3600:.3f} hours.') self.final_eval() if self.args.plots: self.plot_metrics() self.log(f"Results saved to {colorstr('bold', self.save_dir)}") self.run_callbacks('on_train_end') torch.cuda.empty_cache() self.run_callbacks('teardown') def save_model(self): ckpt = { 'epoch': self.epoch, 'best_fitness': self.best_fitness, 'model': deepcopy(de_parallel(self.model)).half(), 'ema': deepcopy(self.ema.ema).half(), 'updates': self.ema.updates, 'optimizer': self.optimizer.state_dict(), 'train_args': self.args, 'date': datetime.now().isoformat(), 'version': __version__} # Save last, best and delete torch.save(ckpt, self.last) if self.best_fitness == self.fitness: torch.save(ckpt, self.best) del ckpt def get_dataset(self, data): """ > Get train, val path from data dict if it exists. Returns None if data format is not recognized. """ return data["train"], data.get("val") or data.get("test") def setup_model(self): """ > load/create/download model for any task. """ if isinstance(self.model, torch.nn.Module): # if model is loaded beforehand. No setup needed return model, weights = self.model, None ckpt = None if str(model).endswith(".pt"): weights, ckpt = attempt_load_one_weight(model) cfg = ckpt["model"].yaml else: cfg = model self.model = self.get_model(cfg=cfg, weights=weights) # calls Model(cfg, weights) return ckpt def optimizer_step(self): self.scaler.unscale_(self.optimizer) # unscale gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=10.0) # clip gradients self.scaler.step(self.optimizer) self.scaler.update() self.optimizer.zero_grad() if self.ema: self.ema.update(self.model) def preprocess_batch(self, batch): """ > Allows custom preprocessing model inputs and ground truths depending on task type. """ return batch def validate(self): """ > Runs validation on test set using self.validator. The returned dict is expected to contain "fitness" key. """ metrics = self.validator(self) fitness = metrics.pop("fitness", -self.loss.detach().cpu().numpy()) # use loss as fitness measure if not found if not self.best_fitness or self.best_fitness < fitness: self.best_fitness = fitness return metrics, fitness def log(self, text, rank=-1): """ > Logs the given text to given ranks process if provided, otherwise logs to all ranks. Args" text (str): text to log rank (List[Int]): process rank """ if rank in {-1, 0}: self.console.info(text) def get_model(self, cfg=None, weights=None, verbose=True): raise NotImplementedError("This task trainer doesn't support loading cfg files") def get_validator(self): raise NotImplementedError("get_validator function not implemented in trainer") def get_dataloader(self, dataset_path, batch_size=16, rank=0): """ > Returns dataloader derived from torch.data.Dataloader. """ raise NotImplementedError("get_dataloader function not implemented in trainer") def criterion(self, preds, batch): """ > Returns loss and individual loss items as Tensor. """ raise NotImplementedError("criterion function not implemented in trainer") def label_loss_items(self, loss_items=None, prefix="train"): """ Returns a loss dict with labelled training loss items tensor """ # Not needed for classification but necessary for segmentation & detection return {"loss": loss_items} if loss_items is not None else ["loss"] def set_model_attributes(self): """ To set or update model parameters before training. """ self.model.names = self.data["names"] def build_targets(self, preds, targets): pass def progress_string(self): return "" # TODO: may need to put these following functions into callback def plot_training_samples(self, batch, ni): pass def save_metrics(self, metrics): keys, vals = list(metrics.keys()), list(metrics.values()) n = len(metrics) + 1 # number of cols s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header with open(self.csv, 'a') as f: f.write(s + ('%23.5g,' * n % tuple([self.epoch] + vals)).rstrip(',') + '\n') def plot_metrics(self): pass def final_eval(self): for f in self.last, self.best: if f.exists(): strip_optimizer(f) # strip optimizers if f is self.best: self.console.info(f'\nValidating {f}...') self.validator.args.save_json = True self.metrics = self.validator(model=f) self.metrics.pop('fitness', None) self.run_callbacks('on_fit_epoch_end') def check_resume(self): resume = self.args.resume if resume: last = Path(check_file(resume) if isinstance(resume, str) else get_latest_run()) args_yaml = last.parent.parent / 'args.yaml' # train options yaml if args_yaml.is_file(): args = get_config(args_yaml) # replace args.model, resume = str(last), True # reinstate self.args = args self.resume = resume def resume_training(self, ckpt): if ckpt is None: return best_fitness = 0.0 start_epoch = ckpt['epoch'] + 1 if ckpt['optimizer'] is not None: self.optimizer.load_state_dict(ckpt['optimizer']) # optimizer best_fitness = ckpt['best_fitness'] if self.ema and ckpt.get('ema'): self.ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) # EMA self.ema.updates = ckpt['updates'] if self.resume: assert start_epoch > 0, \ f'{self.args.model} training to {self.epochs} epochs is finished, nothing to resume.\n' \ f"Start a new training without --resume, i.e. 'yolo task=... mode=train model={self.args.model}'" LOGGER.info( f'Resuming training from {self.args.model} from epoch {start_epoch} to {self.epochs} total epochs') if self.epochs < start_epoch: LOGGER.info( f"{self.model} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {self.epochs} more epochs.") self.epochs += ckpt['epoch'] # finetune additional epochs self.best_fitness = best_fitness self.start_epoch = start_epoch @staticmethod def build_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5): """ > Builds an optimizer with the specified parameters and parameter groups. Args: model (nn.Module): model to optimize name (str): name of the optimizer to use lr (float): learning rate momentum (float): momentum decay (float): weight decay Returns: optimizer (torch.optim.Optimizer): the built optimizer """ g = [], [], [] # optimizer parameter groups bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() for v in model.modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias (no decay) g[2].append(v.bias) if isinstance(v, bn): # weight (no decay) g[1].append(v.weight) elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) g[0].append(v.weight) if name == 'Adam': optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999)) # adjust beta1 to momentum elif name == 'AdamW': optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0) elif name == 'RMSProp': optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum) elif name == 'SGD': optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True) else: raise NotImplementedError(f'Optimizer {name} not implemented.') optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights) LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}) with parameter groups " f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias") return optimizer ================================================ FILE: yolo/engine/validator.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import json from collections import defaultdict from pathlib import Path import torch from omegaconf import OmegaConf # noqa from tqdm import tqdm from nn.autobackend import AutoBackend from yolo.data.utils import check_dataset, check_dataset_yaml from yolo.utils import DEFAULT_CONFIG, LOGGER, RANK, SETTINGS, TQDM_BAR_FORMAT, callbacks from yolo.utils.checks import check_imgsz from yolo.utils.files import increment_path from yolo.utils.ops import Profile from yolo.utils.torch_utils import de_parallel, select_device, smart_inference_mode class BaseValidator: """ BaseValidator A base class for creating validators. Attributes: dataloader (DataLoader): Dataloader to use for validation. pbar (tqdm): Progress bar to update during validation. logger (logging.Logger): Logger to use for validation. args (OmegaConf): Configuration for the validator. model (nn.Module): Model to validate. data (dict): Data dictionary. device (torch.device): Device to use for validation. batch_i (int): Current batch index. training (bool): Whether the model is in training mode. speed (float): Batch processing speed in seconds. jdict (dict): Dictionary to store validation results. save_dir (Path): Directory to save results. """ def __init__(self, dataloader=None, save_dir=None, pbar=None, logger=None, args=None): """ Initializes a BaseValidator instance. Args: dataloader (torch.utils.data.DataLoader): Dataloader to be used for validation. save_dir (Path): Directory to save results. pbar (tqdm.tqdm): Progress bar for displaying progress. logger (logging.Logger): Logger to log messages. args (OmegaConf): Configuration for the validator. """ self.dataloader = dataloader self.pbar = pbar self.logger = logger or LOGGER self.args = args or OmegaConf.load(DEFAULT_CONFIG) self.model = None self.data = None self.device = None self.batch_i = None self.training = True self.speed = None self.jdict = None project = self.args.project or Path(SETTINGS['runs_dir']) / self.args.task name = self.args.name or f"{self.args.mode}" self.save_dir = save_dir or increment_path(Path(project) / name, exist_ok=self.args.exist_ok if RANK in {-1, 0} else True) (self.save_dir / 'labels' if self.args.save_txt else self.save_dir).mkdir(parents=True, exist_ok=True) if self.args.conf is None: self.args.conf = 0.001 # default conf=0.001 self.callbacks = defaultdict(list, {k: [v] for k, v in callbacks.default_callbacks.items()}) # add callbacks @smart_inference_mode() def __call__(self, trainer=None, model=None): """ Supports validation of a pre-trained model if passed or a model being trained if trainer is passed (trainer gets priority). """ self.training = trainer is not None if self.training: self.device = trainer.device self.data = trainer.data model = trainer.ema.ema or trainer.model self.args.half = self.device.type != 'cpu' # force FP16 val during training model = model.half() if self.args.half else model.float() self.model = model self.loss = torch.zeros_like(trainer.loss_items, device=trainer.device) self.args.plots = trainer.epoch == trainer.epochs - 1 # always plot final epoch model.eval() else: callbacks.add_integration_callbacks(self) self.run_callbacks('on_val_start') assert model is not None, "Either trainer or model is needed for validation" self.device = select_device(self.args.device, self.args.batch) self.args.half &= self.device.type != 'cpu' model = AutoBackend(model, device=self.device, dnn=self.args.dnn, fp16=self.args.half) self.model = model stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine imgsz = check_imgsz(self.args.imgsz, stride=stride) if engine: self.args.batch = model.batch_size else: self.device = model.device if not pt and not jit: self.args.batch = 1 # export.py models default to batch-size 1 self.logger.info( f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') if isinstance(self.args.data, str) and self.args.data.endswith(".yaml"): self.data = check_dataset_yaml(self.args.data) else: self.data = check_dataset(self.args.data) if self.device.type == 'cpu': self.args.workers = 0 # faster CPU val as time dominated by inference, not dataloading self.dataloader = self.dataloader or \ self.get_dataloader(self.data.get("val") or self.data.set("test"), self.args.batch) model.eval() model.warmup(imgsz=(1 if pt else self.args.batch, 3, imgsz, imgsz)) # warmup dt = Profile(), Profile(), Profile(), Profile() n_batches = len(self.dataloader) desc = self.get_desc() # NOTE: keeping `not self.training` in tqdm will eliminate pbar after segmentation evaluation during training, # which may affect classification task since this arg is in yolov5/classify/val.py. # bar = tqdm(self.dataloader, desc, n_batches, not self.training, bar_format=TQDM_BAR_FORMAT) bar = tqdm(self.dataloader, desc, n_batches, bar_format=TQDM_BAR_FORMAT) self.init_metrics(de_parallel(model)) self.jdict = [] # empty before each val for batch_i, batch in enumerate(bar): self.run_callbacks('on_val_batch_start') self.batch_i = batch_i # pre-process with dt[0]: batch = self.preprocess(batch) # inference with dt[1]: preds = model(batch["img"]) # loss with dt[2]: if self.training: self.loss += trainer.criterion(preds, batch)[1] # pre-process predictions with dt[3]: preds = self.postprocess(preds) self.update_metrics(preds, batch) if self.args.plots and batch_i < 3: self.plot_val_samples(batch, batch_i) self.plot_predictions(batch, preds, batch_i) self.run_callbacks('on_val_batch_end') stats = self.get_stats() self.check_stats(stats) self.print_results() self.speed = tuple(x.t / len(self.dataloader.dataset) * 1E3 for x in dt) # speeds per image self.run_callbacks('on_val_end') if self.training: model.float() results = {**stats, **trainer.label_loss_items(self.loss.cpu() / len(self.dataloader), prefix="val")} return {k: round(float(v), 5) for k, v in results.items()} # return results as 5 decimal place floats else: self.logger.info('Speed: %.1fms pre-process, %.1fms inference, %.1fms loss, %.1fms post-process per image' % self.speed) if self.args.save_json and self.jdict: with open(str(self.save_dir / "predictions.json"), 'w') as f: self.logger.info(f"Saving {f.name}...") json.dump(self.jdict, f) # flatten and save stats = self.eval_json(stats) # update stats return stats def run_callbacks(self, event: str): for callback in self.callbacks.get(event, []): callback(self) def get_dataloader(self, dataset_path, batch_size): raise NotImplementedError("get_dataloader function not implemented for this validator") def preprocess(self, batch): return batch def postprocess(self, preds): return preds def init_metrics(self, model): pass def update_metrics(self, preds, batch): pass def get_stats(self): return {} def check_stats(self, stats): pass def print_results(self): pass def get_desc(self): pass @property def metric_keys(self): return [] # TODO: may need to put these following functions into callback def plot_val_samples(self, batch, ni): pass def plot_predictions(self, batch, preds, ni): pass def pred_to_json(self, preds, batch): pass def eval_json(self, stats): pass ================================================ FILE: yolo/utils/__init__.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import contextlib import inspect import logging.config import os import platform import subprocess import sys import tempfile import threading import uuid from pathlib import Path import cv2 import numpy as np import pandas as pd import torch import yaml # Constants FILE = Path(__file__).resolve() ROOT = FILE.parents[2] # YOLO DEFAULT_CONFIG = ROOT / "yolo/configs/default.yaml" RANK = int(os.getenv('RANK', -1)) NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode FONT = 'Arial.ttf' # https://com/assets/Arial.ttf VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode TQDM_BAR_FORMAT = '{l_bar}{bar:10}{r_bar}' # tqdm bar format LOGGING_NAME = 'yolov5' HELP_MSG = \ """ Usage examples for running YOLOv8: 1. Install the ultralytics package: pip install ultralytics 2. Use the Python SDK: from ultralytics import YOLO model = YOLO('yolov8n.yaml') # build a new model from scratch model = YOLO('yolov8n.pt') # load a pretrained model (recommended for best training results) results = model.train(data='coco128.yaml') # train the model results = model.val() # evaluate model performance on the validation set results = model.predict(source='bus.jpg') # predict on an image success = model.export(format='onnx') # export the model to ONNX format 3. Use the command line interface (CLI): yolo task=detect mode=train model=yolov8n.yaml args... classify predict yolov8n-cls.yaml args... segment val yolov8n-seg.yaml args... export yolov8n.pt format=onnx args... Docs: https://docs.com Community: https://community.com GitHub: https://github.com/ultralytics/ultralytics """ # Settings torch.set_printoptions(linewidth=320, precision=5, profile='long') np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 pd.options.display.max_columns = 10 cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' # for deterministic training # Default config dictionary with open(DEFAULT_CONFIG, errors='ignore') as f: DEFAULT_CONFIG_DICT = yaml.safe_load(f) DEFAULT_CONFIG_KEYS = DEFAULT_CONFIG_DICT.keys() def is_colab(): """ Check if the current script is running inside a Google Colab notebook. Returns: bool: True if running inside a Colab notebook, False otherwise. """ # Check if the google.colab module is present in sys.modules return 'google.colab' in sys.modules def is_kaggle(): """ Check if the current script is running inside a Kaggle kernel. Returns: bool: True if running inside a Kaggle kernel, False otherwise. """ return os.environ.get('PWD') == '/kaggle/working' and os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com' def is_jupyter_notebook(): """ Check if the current script is running inside a Jupyter Notebook. Verified on Colab, Jupyterlab, Kaggle, Paperspace. Returns: bool: True if running inside a Jupyter Notebook, False otherwise. """ # Check if the get_ipython function exists # (it does not exist when running as a standalone script) try: from IPython import get_ipython return get_ipython() is not None except ImportError: return False def is_docker() -> bool: """ Determine if the script is running inside a Docker container. Returns: bool: True if the script is running inside a Docker container, False otherwise. """ with open('/proc/self/cgroup') as f: return 'docker' in f.read() def is_git_directory() -> bool: """ Check if the current working directory is inside a git repository. Returns: bool: True if the current working directory is inside a git repository, False otherwise. """ from git import Repo try: # Check if the current working directory is a git repository Repo(search_parent_directories=True) return True except Exception: return False def is_pip_package(filepath: str = __name__) -> bool: """ Determines if the file at the given filepath is part of a pip package. Args: filepath (str): The filepath to check. Returns: bool: True if the file is part of a pip package, False otherwise. """ import importlib.util # Get the spec for the module spec = importlib.util.find_spec(filepath) # Return whether the spec is not None and the origin is not None (indicating it is a package) return spec is not None and spec.origin is not None def is_dir_writeable(dir_path: str) -> bool: """ Check if a directory is writeable. Args: dir_path (str): The path to the directory. Returns: bool: True if the directory is writeable, False otherwise. """ try: with tempfile.TemporaryFile(dir=dir_path): pass return True except OSError: return False def get_git_root_dir(): """ Determines whether the current file is part of a git repository and if so, returns the repository root directory. If the current file is not part of a git repository, returns None. """ try: output = subprocess.run(["git", "rev-parse", "--git-dir"], capture_output=True, check=True) return Path(output.stdout.strip().decode('utf-8')).parent # parent/.git except subprocess.CalledProcessError: return None def get_default_args(func): # Get func() default arguments signature = inspect.signature(func) return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty} def get_user_config_dir(sub_dir='Ultralytics'): """ Get the user config directory. Args: sub_dir (str): The name of the subdirectory to create. Returns: Path: The path to the user config directory. """ # Get the operating system name os_name = platform.system() # Return the appropriate config directory for each operating system if os_name == 'Windows': path = Path.home() / 'AppData' / 'Roaming' / sub_dir elif os_name == 'Darwin': # macOS path = Path.home() / 'Library' / 'Application Support' / sub_dir elif os_name == 'Linux': path = Path.home() / '.config' / sub_dir else: raise ValueError(f'Unsupported operating system: {os_name}') # GCP and AWS lambda fix, only /tmp is writeable if not is_dir_writeable(str(path.parent)): path = Path('/tmp') / sub_dir # Create the subdirectory if it does not exist path.mkdir(parents=True, exist_ok=True) return path USER_CONFIG_DIR = get_user_config_dir() # Ultralytics settings dir def emojis(string=''): # Return platform-dependent emoji-safe version of string return string.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else string def colorstr(*input): # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') *args, string = input if len(input) > 1 else ("blue", "bold", input[0]) # color arguments, string colors = { "black": "\033[30m", # basic colors "red": "\033[31m", "green": "\033[32m", "yellow": "\033[33m", "blue": "\033[34m", "magenta": "\033[35m", "cyan": "\033[36m", "white": "\033[37m", "bright_black": "\033[90m", # bright colors "bright_red": "\033[91m", "bright_green": "\033[92m", "bright_yellow": "\033[93m", "bright_blue": "\033[94m", "bright_magenta": "\033[95m", "bright_cyan": "\033[96m", "bright_white": "\033[97m", "end": "\033[0m", # misc "bold": "\033[1m", "underline": "\033[4m",} return "".join(colors[x] for x in args) + f"{string}" + colors["end"] def set_logging(name=LOGGING_NAME, verbose=True): # sets up logging for the given name rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR logging.config.dictConfig({ "version": 1, "disable_existing_loggers": False, "formatters": { name: { "format": "%(message)s"}}, "handlers": { name: { "class": "logging.StreamHandler", "formatter": name, "level": level,}}, "loggers": { name: { "level": level, "handlers": [name], "propagate": False,}}}) class TryExcept(contextlib.ContextDecorator): # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager def __init__(self, msg=''): self.msg = msg def __enter__(self): pass def __exit__(self, exc_type, value, traceback): if value: print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}")) return True def threaded(func): # Multi-threads a target function and returns thread. Usage: @threaded decorator def wrapper(*args, **kwargs): thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) thread.start() return thread return wrapper def yaml_save(file='data.yaml', data=None): """ Save YAML data to a file. Args: file (str, optional): File name. Default is 'data.yaml'. data (dict, optional): Data to save in YAML format. Default is None. Returns: None: Data is saved to the specified file. """ file = Path(file) if not file.parent.exists(): # Create parent directories if they don't exist file.parent.mkdir(parents=True, exist_ok=True) with open(file, 'w') as f: # Dump data to file in YAML format, converting Path objects to strings yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False) def yaml_load(file='data.yaml', append_filename=False): """ Load YAML data from a file. Args: file (str, optional): File name. Default is 'data.yaml'. append_filename (bool): Add the YAML filename to the YAML dictionary. Default is False. Returns: dict: YAML data and file name. """ with open(file, errors='ignore') as f: # Add YAML filename to dict and return return {**yaml.safe_load(f), 'yaml_file': str(file)} if append_filename else yaml.safe_load(f) def get_settings(file=USER_CONFIG_DIR / 'settings.yaml'): """ Loads a global settings YAML file or creates one with default values if it does not exist. Args: file (Path): Path to the settings YAML file. Defaults to 'settings.yaml' in the USER_CONFIG_DIR. Returns: dict: Dictionary of settings key-value pairs. """ from yolo.utils.torch_utils import torch_distributed_zero_first root = get_git_root_dir() or Path('') # not is_pip_package() defaults = { 'datasets_dir': str(root / 'datasets'), # default datasets directory. 'weights_dir': str(root / 'weights'), # default weights directory. 'runs_dir': str(root / 'runs'), # default runs directory. 'sync': True, # sync analytics to help with YOLO development 'uuid': uuid.getnode()} # device UUID to align analytics with torch_distributed_zero_first(RANK): if not file.exists(): yaml_save(file, defaults) settings = yaml_load(file) # Check that settings keys and types match defaults correct = settings.keys() == defaults.keys() and \ all(type(a) == type(b) for a, b in zip(settings.values(), defaults.values())) if not correct: LOGGER.warning('WARNING ⚠️ Different global settings detected, resetting to defaults. ' 'This may be due to an ultralytics package update. ' f'View and update your global settings directly in {file}') settings = defaults # merge **defaults with **settings (prefer **settings) yaml_save(file, settings) # save updated defaults return settings # Run below code on utils init ----------------------------------------------------------------------------------------- # Set logger set_logging(LOGGING_NAME) # run before defining LOGGER LOGGER = logging.getLogger(LOGGING_NAME) # define globally (used in train.py, val.py, detect.py, etc.) if platform.system() == 'Windows': for fn in LOGGER.info, LOGGER.warning: setattr(LOGGER, fn.__name__, lambda x: fn(emojis(x))) # emoji safe logging # Check first-install steps SETTINGS = get_settings() DATASETS_DIR = Path(SETTINGS['datasets_dir']) # global datasets directory def set_settings(kwargs, file=USER_CONFIG_DIR / 'settings.yaml'): """ Function that runs on a first-time ultralytics package installation to set up global settings and create necessary directories. """ SETTINGS.update(kwargs) yaml_save(file, SETTINGS) ================================================ FILE: yolo/utils/autobatch.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Auto-batch utils """ from copy import deepcopy import numpy as np import torch from yolo.utils import LOGGER, colorstr from yolo.utils.torch_utils import profile def check_train_batch_size(model, imgsz=640, amp=True): # Check YOLOv5 training batch size with torch.cuda.amp.autocast(amp): return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size def autobatch(model, imgsz=640, fraction=0.7, batch_size=16): # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory # Usage: # import torch # from utils.autobatch import autobatch # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) # print(autobatch(model)) # Check device prefix = colorstr('AutoBatch: ') LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') device = next(model.parameters()).device # get model device if device.type == 'cpu': LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') return batch_size if torch.backends.cudnn.benchmark: LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}') return batch_size # Inspect CUDA memory gb = 1 << 30 # bytes to GiB (1024 ** 3) d = str(device).upper() # 'CUDA:0' properties = torch.cuda.get_device_properties(device) # device properties t = properties.total_memory / gb # GiB total r = torch.cuda.memory_reserved(device) / gb # GiB reserved a = torch.cuda.memory_allocated(device) / gb # GiB allocated f = t - (r + a) # GiB free LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') # Profile batch sizes batch_sizes = [1, 2, 4, 8, 16] try: img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] results = profile(img, model, n=3, device=device) except Exception as e: LOGGER.warning(f'{prefix}{e}') # Fit a solution y = [x[2] for x in results if x] # memory [2] p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) if None in results: # some sizes failed i = results.index(None) # first fail index if b >= batch_sizes[i]: # y intercept above failure point b = batch_sizes[max(i - 1, 0)] # select prior safe point if b < 1 or b > 1024: # b outside of safe range b = batch_size LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.') fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') return b ================================================ FILE: yolo/utils/callbacks/__init__.py ================================================ from .base import add_integration_callbacks, default_callbacks ================================================ FILE: yolo/utils/callbacks/base.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Base callbacks """ # Trainer callbacks ---------------------------------------------------------------------------------------------------- def on_pretrain_routine_start(trainer): pass def on_pretrain_routine_end(trainer): pass def on_train_start(trainer): pass def on_train_epoch_start(trainer): pass def on_train_batch_start(trainer): pass def optimizer_step(trainer): pass def on_before_zero_grad(trainer): pass def on_train_batch_end(trainer): pass def on_train_epoch_end(trainer): pass def on_fit_epoch_end(trainer): pass def on_model_save(trainer): pass def on_train_end(trainer): pass def on_params_update(trainer): pass def teardown(trainer): pass # Validator callbacks -------------------------------------------------------------------------------------------------- def on_val_start(validator): pass def on_val_batch_start(validator): pass def on_val_batch_end(validator): pass def on_val_end(validator): pass # Predictor callbacks -------------------------------------------------------------------------------------------------- def on_predict_start(predictor): pass def on_predict_batch_start(predictor): pass def on_predict_batch_end(predictor): pass def on_predict_end(predictor): pass # Exporter callbacks --------------------------------------------------------------------------------------------------- def on_export_start(exporter): pass def on_export_end(exporter): pass default_callbacks = { # Run in trainer 'on_pretrain_routine_start': on_pretrain_routine_start, 'on_pretrain_routine_end': on_pretrain_routine_end, 'on_train_start': on_train_start, 'on_train_epoch_start': on_train_epoch_start, 'on_train_batch_start': on_train_batch_start, 'optimizer_step': optimizer_step, 'on_before_zero_grad': on_before_zero_grad, 'on_train_batch_end': on_train_batch_end, 'on_train_epoch_end': on_train_epoch_end, 'on_fit_epoch_end': on_fit_epoch_end, # fit = train + val 'on_model_save': on_model_save, 'on_train_end': on_train_end, 'on_params_update': on_params_update, 'teardown': teardown, # Run in validator 'on_val_start': on_val_start, 'on_val_batch_start': on_val_batch_start, 'on_val_batch_end': on_val_batch_end, 'on_val_end': on_val_end, # Run in predictor 'on_predict_start': on_predict_start, 'on_predict_batch_start': on_predict_batch_start, 'on_predict_batch_end': on_predict_batch_end, 'on_predict_end': on_predict_end, # Run in exporter 'on_export_start': on_export_start, 'on_export_end': on_export_end} def add_integration_callbacks(instance): from .clearml import callbacks as clearml_callbacks from .comet import callbacks as comet_callbacks from .hub import callbacks as hub_callbacks from .tensorboard import callbacks as tb_callbacks for x in clearml_callbacks, comet_callbacks, hub_callbacks, tb_callbacks: for k, v in x.items(): instance.callbacks[k].append(v) # callback[name].append(func) ================================================ FILE: yolo/utils/callbacks/clearml.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from yolo.utils.torch_utils import get_flops, get_num_params try: import clearml from clearml import Task assert hasattr(clearml, '__version__') except (ImportError, AssertionError): clearml = None def _log_images(imgs_dict, group="", step=0): task = Task.current_task() if task: for k, v in imgs_dict.items(): task.get_logger().report_image(group, k, step, v) def on_pretrain_routine_start(trainer): # TODO: reuse existing task task = Task.init(project_name=trainer.args.project or "YOLOv8", task_name=trainer.args.name, tags=['YOLOv8'], output_uri=True, reuse_last_task_id=False, auto_connect_frameworks={'pytorch': False}) task.connect(dict(trainer.args), name='General') def on_train_epoch_end(trainer): if trainer.epoch == 1: _log_images({f.stem: str(f) for f in trainer.save_dir.glob('train_batch*.jpg')}, "Mosaic", trainer.epoch) def on_fit_epoch_end(trainer): if trainer.epoch == 0: model_info = { "Parameters": get_num_params(trainer.model), "GFLOPs": round(get_flops(trainer.model), 3), "Inference speed (ms/img)": round(trainer.validator.speed[1], 3)} Task.current_task().connect(model_info, name='Model') def on_train_end(trainer): Task.current_task().update_output_model(model_path=str(trainer.best), model_name=trainer.args.name, auto_delete_file=False) callbacks = { "on_pretrain_routine_start": on_pretrain_routine_start, "on_train_epoch_end": on_train_epoch_end, "on_fit_epoch_end": on_fit_epoch_end, "on_train_end": on_train_end} if clearml else {} ================================================ FILE: yolo/utils/callbacks/comet.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from yolo.utils.torch_utils import get_flops, get_num_params try: import comet_ml except (ModuleNotFoundError, ImportError): comet_ml = None def on_pretrain_routine_start(trainer): experiment = comet_ml.Experiment(project_name=trainer.args.project or "YOLOv8",) experiment.log_parameters(dict(trainer.args)) def on_train_epoch_end(trainer): experiment = comet_ml.get_global_experiment() experiment.log_metrics(trainer.label_loss_items(trainer.tloss, prefix="train"), step=trainer.epoch + 1) if trainer.epoch == 1: for f in trainer.save_dir.glob('train_batch*.jpg'): experiment.log_image(f, name=f.stem, step=trainer.epoch + 1) def on_fit_epoch_end(trainer): experiment = comet_ml.get_global_experiment() experiment.log_metrics(trainer.metrics, step=trainer.epoch + 1) if trainer.epoch == 0: model_info = { "model/parameters": get_num_params(trainer.model), "model/GFLOPs": round(get_flops(trainer.model), 3), "model/speed(ms)": round(trainer.validator.speed[1], 3)} experiment.log_metrics(model_info, step=trainer.epoch + 1) def on_train_end(trainer): experiment = comet_ml.get_global_experiment() experiment.log_model("YOLOv8", file_or_folder=trainer.best, file_name="best.pt", overwrite=True) callbacks = { "on_pretrain_routine_start": on_pretrain_routine_start, "on_train_epoch_end": on_train_epoch_end, "on_fit_epoch_end": on_fit_epoch_end, "on_train_end": on_train_end} if comet_ml else {} ================================================ FILE: yolo/utils/callbacks/hub.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import json from time import time import torch from hub.utils import PREFIX, sync_analytics from yolo.utils import LOGGER def on_pretrain_routine_end(trainer): session = getattr(trainer, 'hub_session', None) if session: # Start timer for upload rate limit LOGGER.info(f"{PREFIX}View model at https://hub.com/models/{session.model_id} 🚀") session.t = {'metrics': time(), 'ckpt': time()} # start timer on self.rate_limit def on_fit_epoch_end(trainer): session = getattr(trainer, 'hub_session', None) if session: session.metrics_queue[trainer.epoch] = json.dumps(trainer.metrics) # json string if time() - session.t['metrics'] > session.rate_limits['metrics']: session.upload_metrics() session.t['metrics'] = time() # reset timer session.metrics_queue = {} # reset queue def on_model_save(trainer): session = getattr(trainer, 'hub_session', None) if session: # Upload checkpoints with rate limiting is_best = trainer.best_fitness == trainer.fitness if time() - session.t['ckpt'] > session.rate_limits['ckpt']: LOGGER.info(f"{PREFIX}Uploading checkpoint {session.model_id}") session.upload_model(trainer.epoch, trainer.last, is_best) session.t['ckpt'] = time() # reset timer def on_train_end(trainer): session = getattr(trainer, 'hub_session', None) if session: # Upload final model and metrics with exponential standoff LOGGER.info(f"{PREFIX}Training completed successfully ✅\n" f"{PREFIX}Uploading final {session.model_id}") session.upload_model(trainer.epoch, trainer.best, map=trainer.metrics['metrics/mAP50-95(B)'], final=True) session.alive = False # stop heartbeats LOGGER.info(f"{PREFIX}View model at https://hub.com/models/{session.model_id} 🚀") def on_train_start(trainer): sync_analytics(trainer.args) def on_val_start(validator): sync_analytics(validator.args) def on_predict_start(predictor): sync_analytics(predictor.args) def on_export_start(exporter): sync_analytics(exporter.args) callbacks = { "on_pretrain_routine_end": on_pretrain_routine_end, "on_fit_epoch_end": on_fit_epoch_end, "on_model_save": on_model_save, "on_train_end": on_train_end, "on_train_start": on_train_start, "on_val_start": on_val_start, "on_predict_start": on_predict_start, "on_export_start": on_export_start} ================================================ FILE: yolo/utils/callbacks/tensorboard.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from torch.utils.tensorboard import SummaryWriter writer = None # TensorBoard SummaryWriter instance def _log_scalars(scalars, step=0): for k, v in scalars.items(): writer.add_scalar(k, v, step) def on_pretrain_routine_start(trainer): global writer writer = SummaryWriter(str(trainer.save_dir)) def on_batch_end(trainer): _log_scalars(trainer.label_loss_items(trainer.tloss, prefix="train"), trainer.epoch + 1) def on_fit_epoch_end(trainer): _log_scalars(trainer.metrics, trainer.epoch + 1) callbacks = { "on_pretrain_routine_start": on_pretrain_routine_start, "on_fit_epoch_end": on_fit_epoch_end, "on_batch_end": on_batch_end} ================================================ FILE: yolo/utils/checks.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import glob import inspect import math import platform import urllib from pathlib import Path from subprocess import check_output from typing import Optional import cv2 import numpy as np import pkg_resources as pkg import torch from yolo.utils import (AUTOINSTALL, FONT, LOGGER, ROOT, USER_CONFIG_DIR, TryExcept, colorstr, emojis, is_docker, is_jupyter_notebook) def is_ascii(s) -> bool: """ Check if a string is composed of only ASCII characters. Args: s (str): String to be checked. Returns: bool: True if the string is composed only of ASCII characters, False otherwise. """ # Convert list, tuple, None, etc. to string s = str(s) # Check if the string is composed of only ASCII characters return all(ord(c) < 128 for c in s) def check_imgsz(imgsz, stride=32, min_dim=1, floor=0): """ Verify image size is a multiple of the given stride in each dimension. If the image size is not a multiple of the stride, update it to the nearest multiple of the stride that is greater than or equal to the given floor value. Args: imgsz (int or List[int]): Image size. stride (int): Stride value. min_dim (int): Minimum number of dimensions. floor (int): Minimum allowed value for image size. Returns: List[int]: Updated image size. """ # Convert stride to integer if it is a tensor stride = int(stride.max() if isinstance(stride, torch.Tensor) else stride) # Convert image size to list if it is an integer if isinstance(imgsz, int): imgsz = [imgsz] # Make image size a multiple of the stride sz = [max(math.ceil(x / stride) * stride, floor) for x in imgsz] # Print warning message if image size was updated if sz != imgsz: LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {stride}, updating to {sz}') # Add missing dimensions if necessary sz = [sz[0], sz[0]] if min_dim == 2 and len(sz) == 1 else sz[0] if min_dim == 1 and len(sz) == 1 else sz return sz def check_version(current: str = "0.0.0", minimum: str = "0.0.0", name: str = "version ", pinned: bool = False, hard: bool = False, verbose: bool = False) -> bool: """ Check current version against the required minimum version. Args: current (str): Current version. minimum (str): Required minimum version. name (str): Name to be used in warning message. pinned (bool): If True, versions must match exactly. If False, minimum version must be satisfied. hard (bool): If True, raise an AssertionError if the minimum version is not met. verbose (bool): If True, print warning message if minimum version is not met. Returns: bool: True if minimum version is met, False otherwise. """ from pkg_resources import parse_version current, minimum = (parse_version(x) for x in (current, minimum)) result = (current == minimum) if pinned else (current >= minimum) # bool warning_message = f"WARNING ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed" if hard: assert result, emojis(warning_message) # assert min requirements met if verbose and not result: LOGGER.warning(warning_message) return result def check_font(font: str = FONT, progress: bool = False) -> None: """ Download font file to the user's configuration directory if it does not already exist. Args: font (str): Path to font file. progress (bool): If True, display a progress bar during the download. Returns: None """ font = Path(font) # Destination path for the font file file = USER_CONFIG_DIR / font.name # Check if font file exists at the source or destination path if not font.exists() and not file.exists(): # Download font file url = f'https://com/assets/{font.name}' LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, str(file), progress=progress) def check_online() -> bool: """ Check internet connectivity by attempting to connect to a known online host. Returns: bool: True if connection is successful, False otherwise. """ import socket try: # Check host accessibility by attempting to establish a connection socket.create_connection(("1.1.1.1", 443), timeout=5) return True except OSError: return False def check_python(minimum: str = '3.7.0') -> bool: """ Check current python version against the required minimum version. Args: minimum (str): Required minimum version of python. Returns: None """ check_version(platform.python_version(), minimum, name='Python ', hard=True) @TryExcept() def check_requirements(requirements=ROOT.parent / 'requirements.txt', exclude=(), install=True, cmds=''): # Check installed dependencies meet YOLOv5 requirements (pass *.txt file or list of packages or single package str) prefix = colorstr('red', 'bold', 'requirements:') check_python() # check python version if isinstance(requirements, Path): # requirements.txt file file = requirements.resolve() assert file.exists(), f"{prefix} {file} not found, check failed." with file.open() as f: requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude] elif isinstance(requirements, str): requirements = [requirements] s = '' n = 0 for r in requirements: try: pkg.require(r) except (pkg.VersionConflict, pkg.DistributionNotFound): # exception if requirements not met s += f'"{r}" ' n += 1 if s and install and AUTOINSTALL: # check environment variable LOGGER.info(f"{prefix} YOLOv5 requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...") try: assert check_online(), "AutoUpdate skipped (offline)" LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode()) source = file if 'file' in locals() else requirements s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \ f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n" LOGGER.info(s) except Exception as e: LOGGER.warning(f'{prefix} ❌ {e}') def check_suffix(file='yolov8n.pt', suffix=('.pt',), msg=''): # Check file(s) for acceptable suffix if file and suffix: if isinstance(suffix, str): suffix = [suffix] for f in file if isinstance(file, (list, tuple)) else [file]: s = Path(f).suffix.lower() # file suffix if len(s): assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}" def check_file(file, suffix=''): # Search/download file (if necessary) and return path check_suffix(file, suffix) # optional file = str(file) # convert to str() if Path(file).is_file() or not file: # exists return file elif file.startswith(('http:/', 'https:/')): # download url = file # warning: Pathlib turns :// -> :/ file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth if Path(file).is_file(): LOGGER.info(f'Found {url} locally at {file}') # file already exists else: LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, file) assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check return file else: # search files = [] for d in 'models', 'yolo/data': # search directories files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file assert len(files), f'File not found: {file}' # assert file was found assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique return files[0] # return file def check_yaml(file, suffix=('.yaml', '.yml')): # Search/download YAML file (if necessary) and return path, checking suffix return check_file(file, suffix) def check_imshow(warn=False): # Check if environment supports image displays try: assert not is_jupyter_notebook() assert not is_docker() cv2.imshow('test', np.zeros((1, 1, 3))) cv2.waitKey(1) cv2.destroyAllWindows() cv2.waitKey(1) return True except Exception as e: if warn: LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show()\n{e}') return False def git_describe(path=ROOT): # path must be a directory # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe try: assert (Path(path) / '.git').is_dir() return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1] except Exception: return '' def print_args(args: Optional[dict] = None, show_file=True, show_func=False): # Print function arguments (optional args dict) x = inspect.currentframe().f_back # previous frame file, _, func, _, _ = inspect.getframeinfo(x) if args is None: # get args automatically args, _, _, frm = inspect.getargvalues(x) args = {k: v for k, v in frm.items() if k in args} try: file = Path(file).resolve().relative_to(ROOT).with_suffix('') except ValueError: file = Path(file).stem s = (f'{file}: ' if show_file else '') + (f'{func}: ' if show_func else '') LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items())) ================================================ FILE: yolo/utils/dist.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import os import shutil import socket import sys import tempfile from . import USER_CONFIG_DIR def find_free_network_port() -> int: # https://github.com/Lightning-AI/lightning/blob/master/src/lightning_lite/plugins/environments/lightning.py """Finds a free port on localhost. It is useful in single-node training when we don't want to connect to a real main node but have to set the `MASTER_PORT` environment variable. """ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(("", 0)) port = s.getsockname()[1] s.close() return port def generate_ddp_file(trainer): import_path = '.'.join(str(trainer.__class__).split(".")[1:-1]) if not trainer.resume: shutil.rmtree(trainer.save_dir) # remove the save_dir content = f'''config = {dict(trainer.args)} \nif __name__ == "__main__": from {import_path} import {trainer.__class__.__name__} trainer = {trainer.__class__.__name__}(config=config) trainer.train()''' (USER_CONFIG_DIR / 'DDP').mkdir(exist_ok=True) with tempfile.NamedTemporaryFile(prefix="_temp_", suffix=f"{id(trainer)}.py", mode="w+", encoding='utf-8', dir=USER_CONFIG_DIR / 'DDP', delete=False) as file: file.write(content) return file.name def generate_ddp_command(world_size, trainer): import __main__ # noqa local import to avoid https://github.com/Lightning-AI/lightning/issues/15218 file_name = os.path.abspath(sys.argv[0]) using_cli = not file_name.endswith(".py") if using_cli: file_name = generate_ddp_file(trainer) return [ sys.executable, "-m", "torch.distributed.run", "--nproc_per_node", f"{world_size}", "--master_port", f"{find_free_network_port()}", file_name] + sys.argv[1:] def ddp_cleanup(command, trainer): # delete temp file if created tempfile_suffix = f"{id(trainer)}.py" if tempfile_suffix in "".join(command): for chunk in command: if tempfile_suffix in chunk: os.remove(chunk) break ================================================ FILE: yolo/utils/downloads.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import logging import os import subprocess import urllib from itertools import repeat from multiprocessing.pool import ThreadPool from pathlib import Path from zipfile import ZipFile import requests import torch from yolo.utils import LOGGER def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes file = Path(file) assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" try: # url1 LOGGER.info(f'Downloading {url} to {file}...') torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO) assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check except Exception as e: # url2 if file.exists(): file.unlink() # remove partial downloads LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail finally: if not file.exists() or file.stat().st_size < min_bytes: # check if file.exists(): file.unlink() # remove partial downloads LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}") LOGGER.info('') def is_url(url, check=True): # Check if string is URL and check if URL exists try: url = str(url) result = urllib.parse.urlparse(url) assert all([result.scheme, result.netloc]) # check if is url return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online except (AssertionError, urllib.request.HTTPError): return False def attempt_download(file, repo='ultralytics/assets', release='v0.0.0'): # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc. def github_assets(repository, version='latest'): # Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov5m.pt', ...]) # Return GitHub repo tag and assets (i.e. ['yolov8n.pt', 'yolov8s.pt', ...]) if version != 'latest': version = f'tags/{version}' # i.e. tags/v6.2 response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets file = Path(str(file).strip().replace("'", '')) if not file.exists(): # URL specified name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. if str(file).startswith(('http:/', 'https:/')): # download url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... if Path(file).is_file(): LOGGER.info(f'Found {url} locally at {file}') # file already exists else: safe_download(file=file, url=url, min_bytes=1E5) return file # GitHub assets assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default assets = [f'yolov8{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default try: tag, assets = github_assets(repo, release) except Exception: try: tag, assets = github_assets(repo) # latest release except Exception: try: tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] except Exception: tag = release file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) if name in assets: url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror safe_download( file, url=f'https://github.com/{repo}/releases/download/{tag}/{name}', min_bytes=1E5, error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') return str(file) def download(url, dir=Path.cwd(), unzip=True, delete=True, curl=False, threads=1, retry=3): # Multithreaded file download and unzip function, used in data.yaml for autodownload def download_one(url, dir): # Download 1 file success = True if Path(url).is_file(): f = Path(url) # filename else: # does not exist f = dir / Path(url).name LOGGER.info(f'Downloading {url} to {f}...') for i in range(retry + 1): if curl: s = 'sS' if threads > 1 else '' # silent r = os.system( f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue success = r == 0 else: torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download success = f.is_file() if success: break elif i < retry: LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...') else: LOGGER.warning(f'❌ Failed to download {url}...') if unzip and success and f.suffix in ('.zip', '.tar', '.gz'): LOGGER.info(f'Unzipping {f}...') if f.suffix == '.zip': ZipFile(f).extractall(path=dir) # unzip elif f.suffix == '.tar': os.system(f'tar xf {f} --directory {f.parent}') # unzip elif f.suffix == '.gz': os.system(f'tar xfz {f} --directory {f.parent}') # unzip if delete: f.unlink() # remove zip dir = Path(dir) dir.mkdir(parents=True, exist_ok=True) # make directory if threads > 1: pool = ThreadPool(threads) pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multithreaded pool.close() pool.join() else: for u in [url] if isinstance(url, (str, Path)) else url: download_one(u, dir) ================================================ FILE: yolo/utils/files.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import contextlib import glob import os import urllib from datetime import datetime from pathlib import Path from zipfile import ZipFile class WorkingDirectory(contextlib.ContextDecorator): # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager def __init__(self, new_dir): self.dir = new_dir # new dir self.cwd = Path.cwd().resolve() # current dir def __enter__(self): os.chdir(self.dir) def __exit__(self, exc_type, exc_val, exc_tb): os.chdir(self.cwd) def increment_path(path, exist_ok=False, sep='', mkdir=False): """ Increments a file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. If the path exists and exist_ok is not set to True, the path will be incremented by appending a number and sep to the end of the path. If the path is a file, the file extension will be preserved. If the path is a directory, the number will be appended directly to the end of the path. If mkdir is set to True, the path will be created as a directory if it does not already exist. Args: path (str or pathlib.Path): Path to increment. exist_ok (bool, optional): If True, the path will not be incremented and will be returned as-is. Defaults to False. sep (str, optional): Separator to use between the path and the incrementation number. Defaults to an empty string. mkdir (bool, optional): If True, the path will be created as a directory if it does not exist. Defaults to False. Returns: pathlib.Path: Incremented path. """ path = Path(path) # os-agnostic if path.exists() and not exist_ok: path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') # Method 1 for n in range(2, 9999): p = f'{path}{sep}{n}{suffix}' # increment path if not os.path.exists(p): # break path = Path(p) if mkdir: path.mkdir(parents=True, exist_ok=True) # make directory return path def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')): # Unzip a *.zip file to path/, excluding files containing strings in exclude list if path is None: path = Path(file).parent # default path with ZipFile(file) as zipObj: for f in zipObj.namelist(): # list all archived filenames in the zip if all(x not in f for x in exclude): zipObj.extract(f, path=path) def file_age(path=__file__): # Return days since last file update dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta return dt.days # + dt.seconds / 86400 # fractional days def file_date(path=__file__): # Return human-readable file modification date, i.e. '2021-3-26' t = datetime.fromtimestamp(Path(path).stat().st_mtime) return f'{t.year}-{t.month}-{t.day}' def file_size(path): # Return file/dir size (MB) mb = 1 << 20 # bytes to MiB (1024 ** 2) path = Path(path) if path.is_file(): return path.stat().st_size / mb elif path.is_dir(): return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb else: return 0.0 def url2file(url): # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ return Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth def get_latest_run(search_dir='.'): # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) return max(last_list, key=os.path.getctime) if last_list else '' ================================================ FILE: yolo/utils/instance.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from collections import abc from itertools import repeat from numbers import Number from typing import List import numpy as np from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh # From PyTorch internals def _ntuple(n): def parse(x): return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n)) return parse to_4tuple = _ntuple(4) # `xyxy` means left top and right bottom # `xywh` means center x, center y and width, height(yolo format) # `ltwh` means left top and width, height(coco format) _formats = ["xyxy", "xywh", "ltwh"] __all__ = ["Bboxes"] class Bboxes: """Now only numpy is supported""" def __init__(self, bboxes, format="xyxy") -> None: assert format in _formats bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes assert bboxes.ndim == 2 assert bboxes.shape[1] == 4 self.bboxes = bboxes self.format = format # self.normalized = normalized # def convert(self, format): # assert format in _formats # if self.format == format: # bboxes = self.bboxes # elif self.format == "xyxy": # if format == "xywh": # bboxes = xyxy2xywh(self.bboxes) # else: # bboxes = xyxy2ltwh(self.bboxes) # elif self.format == "xywh": # if format == "xyxy": # bboxes = xywh2xyxy(self.bboxes) # else: # bboxes = xywh2ltwh(self.bboxes) # else: # if format == "xyxy": # bboxes = ltwh2xyxy(self.bboxes) # else: # bboxes = ltwh2xywh(self.bboxes) # # return Bboxes(bboxes, format) def convert(self, format): assert format in _formats if self.format == format: return elif self.format == "xyxy": bboxes = xyxy2xywh(self.bboxes) if format == "xywh" else xyxy2ltwh(self.bboxes) elif self.format == "xywh": bboxes = xywh2xyxy(self.bboxes) if format == "xyxy" else xywh2ltwh(self.bboxes) else: bboxes = ltwh2xyxy(self.bboxes) if format == "xyxy" else ltwh2xywh(self.bboxes) self.bboxes = bboxes self.format = format def areas(self): self.convert("xyxy") return (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1]) # def denormalize(self, w, h): # if not self.normalized: # return # assert (self.bboxes <= 1.0).all() # self.bboxes[:, 0::2] *= w # self.bboxes[:, 1::2] *= h # self.normalized = False # # def normalize(self, w, h): # if self.normalized: # return # assert (self.bboxes > 1.0).any() # self.bboxes[:, 0::2] /= w # self.bboxes[:, 1::2] /= h # self.normalized = True def mul(self, scale): """ Args: scale (tuple | List | int): the scale for four coords. """ if isinstance(scale, Number): scale = to_4tuple(scale) assert isinstance(scale, (tuple, list)) assert len(scale) == 4 self.bboxes[:, 0] *= scale[0] self.bboxes[:, 1] *= scale[1] self.bboxes[:, 2] *= scale[2] self.bboxes[:, 3] *= scale[3] def add(self, offset): """ Args: offset (tuple | List | int): the offset for four coords. """ if isinstance(offset, Number): offset = to_4tuple(offset) assert isinstance(offset, (tuple, list)) assert len(offset) == 4 self.bboxes[:, 0] += offset[0] self.bboxes[:, 1] += offset[1] self.bboxes[:, 2] += offset[2] self.bboxes[:, 3] += offset[3] def __len__(self): return len(self.bboxes) @classmethod def concatenate(cls, boxes_list: List["Bboxes"], axis=0) -> "Bboxes": """ Concatenates a list of Boxes into a single Bboxes Arguments: boxes_list (list[Bboxes]) Returns: Bboxes: the concatenated Boxes """ assert isinstance(boxes_list, (list, tuple)) if not boxes_list: return cls(np.empty(0)) assert all(isinstance(box, Bboxes) for box in boxes_list) if len(boxes_list) == 1: return boxes_list[0] return cls(np.concatenate([b.bboxes for b in boxes_list], axis=axis)) def __getitem__(self, index) -> "Bboxes": """ Args: index: int, slice, or a BoolArray Returns: Bboxes: Create a new :class:`Bboxes` by indexing. """ if isinstance(index, int): return Bboxes(self.bboxes[index].view(1, -1)) b = self.bboxes[index] assert b.ndim == 2, f"Indexing on Bboxes with {index} failed to return a matrix!" return Bboxes(b) class Instances: def __init__(self, bboxes, segments=None, keypoints=None, bbox_format="xywh", normalized=True) -> None: """ Args: bboxes (ndarray): bboxes with shape [N, 4]. segments (list | ndarray): segments. keypoints (ndarray): keypoints with shape [N, 17, 2]. """ if segments is None: segments = [] self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format) self.keypoints = keypoints self.normalized = normalized if len(segments) > 0: # list[np.array(1000, 2)] * num_samples segments = resample_segments(segments) # (N, 1000, 2) segments = np.stack(segments, axis=0) else: segments = np.zeros((0, 1000, 2), dtype=np.float32) self.segments = segments def convert_bbox(self, format): self._bboxes.convert(format=format) def bbox_areas(self): self._bboxes.areas() def scale(self, scale_w, scale_h, bbox_only=False): """this might be similar with denormalize func but without normalized sign""" self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h)) if bbox_only: return self.segments[..., 0] *= scale_w self.segments[..., 1] *= scale_h if self.keypoints is not None: self.keypoints[..., 0] *= scale_w self.keypoints[..., 1] *= scale_h def denormalize(self, w, h): if not self.normalized: return self._bboxes.mul(scale=(w, h, w, h)) self.segments[..., 0] *= w self.segments[..., 1] *= h if self.keypoints is not None: self.keypoints[..., 0] *= w self.keypoints[..., 1] *= h self.normalized = False def normalize(self, w, h): if self.normalized: return self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h)) self.segments[..., 0] /= w self.segments[..., 1] /= h if self.keypoints is not None: self.keypoints[..., 0] /= w self.keypoints[..., 1] /= h self.normalized = True def add_padding(self, padw, padh): # handle rect and mosaic situation assert not self.normalized, "you should add padding with absolute coordinates." self._bboxes.add(offset=(padw, padh, padw, padh)) self.segments[..., 0] += padw self.segments[..., 1] += padh if self.keypoints is not None: self.keypoints[..., 0] += padw self.keypoints[..., 1] += padh def __getitem__(self, index) -> "Instances": """ Args: index: int, slice, or a BoolArray Returns: Instances: Create a new :class:`Instances` by indexing. """ segments = self.segments[index] if len(self.segments) else self.segments keypoints = self.keypoints[index] if self.keypoints is not None else None bboxes = self.bboxes[index] bbox_format = self._bboxes.format return Instances( bboxes=bboxes, segments=segments, keypoints=keypoints, bbox_format=bbox_format, normalized=self.normalized, ) def flipud(self, h): if self._bboxes.format == "xyxy": y1 = self.bboxes[:, 1].copy() y2 = self.bboxes[:, 3].copy() self.bboxes[:, 1] = h - y2 self.bboxes[:, 3] = h - y1 else: self.bboxes[:, 1] = h - self.bboxes[:, 1] self.segments[..., 1] = h - self.segments[..., 1] if self.keypoints is not None: self.keypoints[..., 1] = h - self.keypoints[..., 1] def fliplr(self, w): if self._bboxes.format == "xyxy": x1 = self.bboxes[:, 0].copy() x2 = self.bboxes[:, 2].copy() self.bboxes[:, 0] = w - x2 self.bboxes[:, 2] = w - x1 else: self.bboxes[:, 0] = w - self.bboxes[:, 0] self.segments[..., 0] = w - self.segments[..., 0] if self.keypoints is not None: self.keypoints[..., 0] = w - self.keypoints[..., 0] def clip(self, w, h): ori_format = self._bboxes.format self.convert_bbox(format="xyxy") self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w) self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h) if ori_format != "xyxy": self.convert_bbox(format=ori_format) self.segments[..., 0] = self.segments[..., 0].clip(0, w) self.segments[..., 1] = self.segments[..., 1].clip(0, h) if self.keypoints is not None: self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w) self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h) def update(self, bboxes, segments=None, keypoints=None): new_bboxes = Bboxes(bboxes, format=self._bboxes.format) self._bboxes = new_bboxes if segments is not None: self.segments = segments if keypoints is not None: self.keypoints = keypoints def __len__(self): return len(self.bboxes) @classmethod def concatenate(cls, instances_list: List["Instances"], axis=0) -> "Instances": """ Concatenates a list of Boxes into a single Bboxes Arguments: instances_list (list[Bboxes]) axis Returns: Boxes: the concatenated Boxes """ assert isinstance(instances_list, (list, tuple)) if not instances_list: return cls(np.empty(0)) assert all(isinstance(instance, Instances) for instance in instances_list) if len(instances_list) == 1: return instances_list[0] use_keypoint = instances_list[0].keypoints is not None bbox_format = instances_list[0]._bboxes.format normalized = instances_list[0].normalized cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis) cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis) cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized) @property def bboxes(self): return self._bboxes.bboxes ================================================ FILE: yolo/utils/loss.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import torch import torch.nn as nn import torch.nn.functional as F from .metrics import bbox_iou from .tal import bbox2dist class VarifocalLoss(nn.Module): # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 def __init__(self): super().__init__() def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label with torch.cuda.amp.autocast(enabled=False): loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), reduction="none") * weight).sum() return loss class BboxLoss(nn.Module): def __init__(self, reg_max, use_dfl=False): super().__init__() self.reg_max = reg_max self.use_dfl = use_dfl def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): # IoU loss weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) iou = bbox_iou(pred_bboxes[fg_mask], target_bboxes[fg_mask], xywh=False, CIoU=True) loss_iou = ((1.0 - iou) * weight).sum() / target_scores_sum # DFL loss if self.use_dfl: target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) loss_dfl = self._df_loss(pred_dist[fg_mask].view(-1, self.reg_max + 1), target_ltrb[fg_mask]) * weight loss_dfl = loss_dfl.sum() / target_scores_sum else: loss_dfl = torch.tensor(0.0).to(pred_dist.device) return loss_iou, loss_dfl @staticmethod def _df_loss(pred_dist, target): # Return sum of left and right DFL losses tl = target.long() # target left tr = tl + 1 # target right wl = tr - target # weight left wr = 1 - wl # weight right return (F.cross_entropy(pred_dist, tl.view(-1), reduction="none").view(tl.shape) * wl + F.cross_entropy(pred_dist, tr.view(-1), reduction="none").view(tl.shape) * wr).mean(-1, keepdim=True) ================================================ FILE: yolo/utils/metrics.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license """ Model validation metrics """ import math import warnings from pathlib import Path import matplotlib.pyplot as plt import numpy as np import torch import torch.nn as nn from yolo.utils import TryExcept # boxes def box_area(box): # box = xyxy(4,n) return (box[2] - box[0]) * (box[3] - box[1]) def bbox_ioa(box1, box2, eps=1e-7): """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 box1: np.array of shape(nx4) box2: np.array of shape(mx4) returns: np.array of shape(nxm) """ # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1.T b2_x1, b2_y1, b2_x2, b2_y2 = box2.T # Intersection area inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \ (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0) # box2 area box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps # Intersection over box2 area return inter_area / box2_area def box_iou(box1, box2, eps=1e-7): # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: box1 (Tensor[N, 4]) box2 (Tensor[M, 4]) Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) # IoU = inter / (area1 + area2 - inter) return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps) def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4) # Get the coordinates of bounding boxes if xywh: # transform from xywh to xyxy (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ else: # x1, y1, x2, y2 = box1 b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1) b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1) w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps # Intersection area inter = (b1_x2.minimum(b2_x2) - b1_x1.maximum(b2_x1)).clamp(0) * \ (b1_y2.minimum(b2_y2) - b1_y1.maximum(b2_y1)).clamp(0) # Union Area union = w1 * h1 + w2 * h2 - inter + eps # IoU iou = inter / union if CIoU or DIoU or GIoU: cw = b1_x2.maximum(b2_x2) - b1_x1.minimum(b2_x1) # convex (smallest enclosing box) width ch = b1_y2.maximum(b2_y2) - b1_y1.minimum(b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 v = (4 / math.pi ** 2) * (torch.atan(w2 / h2) - torch.atan(w1 / h1)).pow(2) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU return iou - rho2 / c2 # DIoU c_area = cw * ch + eps # convex area return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf return iou # IoU def mask_iou(mask1, mask2, eps=1e-7): """ mask1: [N, n] m1 means number of predicted objects mask2: [M, n] m2 means number of gt objects Note: n means image_w x image_h return: masks iou, [N, M] """ intersection = torch.matmul(mask1, mask2.t()).clamp(0) union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection return intersection / (union + eps) def masks_iou(mask1, mask2, eps=1e-7): """ mask1: [N, n] m1 means number of predicted objects mask2: [N, n] m2 means number of gt objects Note: n means image_w x image_h return: masks iou, (N, ) """ intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection return intersection / (union + eps) def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps # losses class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha self.reduction = loss_fcn.reduction self.loss_fcn.reduction = 'none' # required to apply FL to each element def forward(self, pred, true): loss = self.loss_fcn(pred, true) # p_t = torch.exp(-loss) # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) modulating_factor = (1.0 - p_t) ** self.gamma loss *= alpha_factor * modulating_factor if self.reduction == 'mean': return loss.mean() elif self.reduction == 'sum': return loss.sum() else: # 'none' return loss class ConfusionMatrix: # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix def __init__(self, nc, conf=0.25, iou_thres=0.45): self.matrix = np.zeros((nc + 1, nc + 1)) self.nc = nc # number of classes self.conf = conf self.iou_thres = iou_thres def process_batch(self, detections, labels): """ Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: detections (Array[N, 6]), x1, y1, x2, y2, conf, class labels (Array[M, 5]), class, x1, y1, x2, y2 Returns: None, updates confusion matrix accordingly """ if detections is None: gt_classes = labels.int() for gc in gt_classes: self.matrix[self.nc, gc] += 1 # background FN return detections = detections[detections[:, 4] > self.conf] gt_classes = labels[:, 0].int() detection_classes = detections[:, 5].int() iou = box_iou(labels[:, 1:], detections[:, :4]) x = torch.where(iou > self.iou_thres) if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] else: matches = np.zeros((0, 3)) n = matches.shape[0] > 0 m0, m1, _ = matches.transpose().astype(int) for i, gc in enumerate(gt_classes): j = m0 == i if n and sum(j) == 1: self.matrix[detection_classes[m1[j]], gc] += 1 # correct else: self.matrix[self.nc, gc] += 1 # true background if n: for i, dc in enumerate(detection_classes): if not any(m1 == i): self.matrix[dc, self.nc] += 1 # predicted background def matrix(self): return self.matrix def tp_fp(self): tp = self.matrix.diagonal() # true positives fp = self.matrix.sum(1) - tp # false positives # fn = self.matrix.sum(0) - tp # false negatives (missed detections) return tp[:-1], fp[:-1] # remove background class @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure') def plot(self, normalize=True, save_dir='', names=()): import seaborn as sn array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True) nc, nn = self.nc, len(names) # number of classes, names sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels ticklabels = (names + ['background']) if labels else "auto" with warnings.catch_warnings(): warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered sn.heatmap(array, ax=ax, annot=nc < 30, annot_kws={ "size": 8}, cmap='Blues', fmt='.2f', square=True, vmin=0.0, xticklabels=ticklabels, yticklabels=ticklabels).set_facecolor((1, 1, 1)) ax.set_ylabel('True') ax.set_ylabel('Predicted') ax.set_title('Confusion Matrix') fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) plt.close(fig) def print(self): for i in range(self.nc + 1): print(' '.join(map(str, self.matrix[i]))) def smooth(y, f=0.05): # Box filter of fraction f nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) p = np.ones(nf // 2) # ones padding yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()): # Precision-recall curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) py = np.stack(py, axis=1) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py.T): ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) else: ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left") ax.set_title('Precision-Recall Curve') fig.savefig(save_dir, dpi=250) plt.close(fig) def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'): # Metric-confidence curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py): ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) else: ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) y = smooth(py.mean(0), 0.05) ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left") ax.set_title(f'{ylabel}-Confidence Curve') fig.savefig(save_dir, dpi=250) plt.close(fig) def compute_ap(recall, precision): """ Compute the average precision, given the recall and precision curves # Arguments recall: The recall curve (list) precision: The precision curve (list) # Returns Average precision, precision curve, recall curve """ # Append sentinel values to beginning and end mrec = np.concatenate(([0.0], recall, [1.0])) mpre = np.concatenate(([1.0], precision, [0.0])) # Compute the precision envelope mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) # Integrate area under curve method = 'interp' # methods: 'continuous', 'interp' if method == 'interp': x = np.linspace(0, 1, 101) # 101-point interp (COCO) ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate else: # 'continuous' i = np.where(mrec[1:] != mrec[:-1])[0] # points where x-axis (recall) changes ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve return ap, mpre, mrec def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=Path(), names=(), eps=1e-16, prefix=""): """ Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments tp: True positives (nparray, nx1 or nx10). conf: Objectness value from 0-1 (nparray). pred_cls: Predicted object classes (nparray). target_cls: True object classes (nparray). plot: Plot precision-recall curve at mAP@0.5 save_dir: Plot save directory # Returns The average precision as computed in py-faster-rcnn. """ # Sort by objectness i = np.argsort(-conf) tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] # Find unique classes unique_classes, nt = np.unique(target_cls, return_counts=True) nc = unique_classes.shape[0] # number of classes, number of detections # Create Precision-Recall curve and compute AP for each class px, py = np.linspace(0, 1, 1000), [] # for plotting ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) for ci, c in enumerate(unique_classes): i = pred_cls == c n_l = nt[ci] # number of labels n_p = i.sum() # number of predictions if n_p == 0 or n_l == 0: continue # Accumulate FPs and TPs fpc = (1 - tp[i]).cumsum(0) tpc = tp[i].cumsum(0) # Recall recall = tpc / (n_l + eps) # recall curve r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases # Precision precision = tpc / (tpc + fpc) # precision curve p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score # AP from recall-precision curve for j in range(tp.shape[1]): ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) if plot and j == 0: py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 # Compute F1 (harmonic mean of precision and recall) f1 = 2 * p * r / (p + r + eps) names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data names = dict(enumerate(names)) # to dict if plot: plot_pr_curve(px, py, ap, save_dir / f'{prefix}PR_curve.png', names) plot_mc_curve(px, f1, save_dir / f'{prefix}F1_curve.png', names, ylabel='F1') plot_mc_curve(px, p, save_dir / f'{prefix}P_curve.png', names, ylabel='Precision') plot_mc_curve(px, r, save_dir / f'{prefix}R_curve.png', names, ylabel='Recall') i = smooth(f1.mean(0), 0.1).argmax() # max F1 index p, r, f1 = p[:, i], r[:, i], f1[:, i] tp = (r * nt).round() # true positives fp = (tp / (p + eps) - tp).round() # false positives return tp, fp, p, r, f1, ap, unique_classes.astype(int) class Metric: def __init__(self) -> None: self.p = [] # (nc, ) self.r = [] # (nc, ) self.f1 = [] # (nc, ) self.all_ap = [] # (nc, 10) self.ap_class_index = [] # (nc, ) @property def ap50(self): """AP@0.5 of all classes. Return: (nc, ) or []. """ return self.all_ap[:, 0] if len(self.all_ap) else [] @property def ap(self): """AP@0.5:0.95 Return: (nc, ) or []. """ return self.all_ap.mean(1) if len(self.all_ap) else [] @property def mp(self): """mean precision of all classes. Return: float. """ return self.p.mean() if len(self.p) else 0.0 @property def mr(self): """mean recall of all classes. Return: float. """ return self.r.mean() if len(self.r) else 0.0 @property def map50(self): """Mean AP@0.5 of all classes. Return: float. """ return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 @property def map(self): """Mean AP@0.5:0.95 of all classes. Return: float. """ return self.all_ap.mean() if len(self.all_ap) else 0.0 def mean_results(self): """Mean of results, return mp, mr, map50, map""" return [self.mp, self.mr, self.map50, self.map] def class_result(self, i): """class-aware result, return p[i], r[i], ap50[i], ap[i]""" return self.p[i], self.r[i], self.ap50[i], self.ap[i] def get_maps(self, nc): maps = np.zeros(nc) + self.map for i, c in enumerate(self.ap_class_index): maps[c] = self.ap[i] return maps def fitness(self): # Model fitness as a weighted combination of metrics w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] return (np.array(self.mean_results()) * w).sum() def update(self, results): """ Args: results: tuple(p, r, ap, f1, ap_class) """ self.p, self.r, self.f1, self.all_ap, self.ap_class_index = results class DetMetrics: def __init__(self, save_dir=Path("."), plot=False, names=()) -> None: self.save_dir = save_dir self.plot = plot self.names = names self.metric = Metric() def process(self, tp, conf, pred_cls, target_cls): results = ap_per_class(tp, conf, pred_cls, target_cls, plot=self.plot, save_dir=self.save_dir, names=self.names)[2:] self.metric.update(results) @property def keys(self): return ["metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)"] def mean_results(self): return self.metric.mean_results() def class_result(self, i): return self.metric.class_result(i) def get_maps(self, nc): return self.metric.get_maps(nc) @property def fitness(self): return self.metric.fitness() @property def ap_class_index(self): return self.metric.ap_class_index @property def results_dict(self): return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness])) class SegmentMetrics: def __init__(self, save_dir=Path("."), plot=False, names=()) -> None: self.save_dir = save_dir self.plot = plot self.names = names self.metric_box = Metric() self.metric_mask = Metric() def process(self, tp_m, tp_b, conf, pred_cls, target_cls): results_mask = ap_per_class(tp_m, conf, pred_cls, target_cls, plot=self.plot, save_dir=self.save_dir, names=self.names, prefix="Mask")[2:] self.metric_mask.update(results_mask) results_box = ap_per_class(tp_b, conf, pred_cls, target_cls, plot=self.plot, save_dir=self.save_dir, names=self.names, prefix="Box")[2:] self.metric_box.update(results_box) @property def keys(self): return [ "metrics/precision(B)", "metrics/recall(B)", "metrics/mAP50(B)", "metrics/mAP50-95(B)", "metrics/precision(M)", "metrics/recall(M)", "metrics/mAP50(M)", "metrics/mAP50-95(M)"] def mean_results(self): return self.metric_box.mean_results() + self.metric_mask.mean_results() def class_result(self, i): return self.metric_box.class_result(i) + self.metric_mask.class_result(i) def get_maps(self, nc): return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) @property def fitness(self): return self.metric_mask.fitness() + self.metric_box.fitness() @property def ap_class_index(self): # boxes and masks have the same ap_class_index return self.metric_box.ap_class_index @property def results_dict(self): return dict(zip(self.keys + ["fitness"], self.mean_results() + [self.fitness])) class ClassifyMetrics: def __init__(self) -> None: self.top1 = 0 self.top5 = 0 def process(self, targets, pred): # target classes and predicted classes pred, targets = torch.cat(pred), torch.cat(targets) correct = (targets[:, None] == pred).float() acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy self.top1, self.top5 = acc.mean(0).tolist() @property def fitness(self): return self.top5 @property def results_dict(self): return dict(zip(self.keys + ["fitness"], [self.top1, self.top5, self.fitness])) @property def keys(self): return ["metrics/accuracy_top1", "metrics/accuracy_top5"] ================================================ FILE: yolo/utils/ops.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import contextlib import math import re import time import cv2 import numpy as np import torch import torch.nn.functional as F import torchvision from yolo.utils import LOGGER from .metrics import box_iou class Profile(contextlib.ContextDecorator): # YOLOv5 Profile class. Usage: @Profile() decorator or 'with Profile():' context manager def __init__(self, t=0.0): self.t = t self.cuda = torch.cuda.is_available() def __enter__(self): self.start = self.time() return self def __exit__(self, type, value, traceback): self.dt = self.time() - self.start # delta-time self.t += self.dt # accumulate dt def time(self): if self.cuda: torch.cuda.synchronize() return time.time() def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet return [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] def segment2box(segment, width=640, height=640): """ > Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) Args: segment: the segment label width: the width of the image. Defaults to 640 height: The height of the image. Defaults to 640 Returns: the minimum and maximum x and y values of the segment. """ # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) x, y = segment.T # segment xy inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) x, y, = x[inside], y[inside] return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros(4) # xyxy def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): """ > Rescale boxes (xyxy) from img1_shape to img0_shape Args: img1_shape: The shape of the image that the bounding boxes are for. boxes: the bounding boxes of the objects in the image img0_shape: the shape of the original image ratio_pad: a tuple of (ratio, pad) Returns: The boxes are being returned. """ # if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] boxes[..., [0, 2]] -= pad[0] # x padding boxes[..., [1, 3]] -= pad[1] # y padding boxes[..., :4] /= gain clip_boxes(boxes, img0_shape) return boxes def make_divisible(x, divisor): # Returns nearest x divisible by divisor if isinstance(divisor, torch.Tensor): divisor = int(divisor.max()) # to int return math.ceil(x / divisor) * divisor def non_max_suppression( prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, labels=(), max_det=300, nm=0, # number of masks ): """ > Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box. Arguments: prediction (torch.Tensor): A tensor of shape (batch_size, num_boxes, num_classes + 4 + num_masks) containing the predicted boxes, classes, and masks. The tensor should be in the format output by a model, such as YOLO. conf_thres (float): The confidence threshold below which boxes will be filtered out. Valid values are between 0.0 and 1.0. iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS. Valid values are between 0.0 and 1.0. classes (List[int]): A list of class indices to consider. If None, all classes will be considered. agnostic (bool): If True, the model is agnostic to the number of classes, and all classes will be considered as one. multi_label (bool): If True, each box may have multiple labels. labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner list contains the apriori labels for a given image. The list should be in the format output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2). max_det (int): The maximum number of boxes to keep after NMS. nm (int): The number of masks output by the model. Returns: List[torch.Tensor]: A list of length batch_size, where each element is a tensor of shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns (x1, y1, x2, y2, confidence, class, mask1, mask2, ...). """ # Checks assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' if isinstance(prediction, (list, tuple)): # YOLOv5 model in validation model, output = (inference_out, loss_out) prediction = prediction[0] # select only inference output device = prediction.device mps = 'mps' in device.type # Apple MPS if mps: # MPS not fully supported yet, convert tensors to CPU before NMS prediction = prediction.cpu() bs = prediction.shape[0] # batch size nc = prediction.shape[1] - nm - 4 # number of classes mi = 4 + nc # mask start index xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates # Settings # min_wh = 2 # (pixels) minimum box width and height max_wh = 7680 # (pixels) maximum box width and height max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 0.5 + 0.05 * bs # seconds to quit after redundant = True # require redundant detections multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) merge = False # use merge-NMS t = time.time() output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x.transpose(0, -1)[xc[xi]] # confidence # Cat apriori labels if autolabelling if labels and len(labels[xi]): lb = labels[xi] v = torch.zeros((len(lb), nc + nm + 5), device=x.device) v[:, :4] = lb[:, 1:5] # box v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image if not x.shape[0]: continue # Detections matrix nx6 (xyxy, conf, cls) box, cls, mask = x.split((4, nc, nm), 1) box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2) if multi_label: i, j = (cls > conf_thres).nonzero(as_tuple=False).T x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) else: # best class only conf, j = cls.max(1, keepdim=True) x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # Check shape n = x.shape[0] # number of boxes if not n: # no boxes continue x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS i = i[:max_det] # limit detections if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy output[xi] = x[i] if mps: output[xi] = output[xi].to(device) if (time.time() - t) > time_limit: LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') break # time limit exceeded return output def clip_boxes(boxes, shape): """ > It takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape Args: boxes: the bounding boxes to clip shape: the shape of the image """ if isinstance(boxes, torch.Tensor): # faster individually boxes[..., 0].clamp_(0, shape[1]) # x1 boxes[..., 1].clamp_(0, shape[0]) # y1 boxes[..., 2].clamp_(0, shape[1]) # x2 boxes[..., 3].clamp_(0, shape[0]) # y2 else: # np.array (faster grouped) boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2 boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2 def clip_coords(boxes, shape): # Clip bounding xyxy bounding boxes to image shape (height, width) if isinstance(boxes, torch.Tensor): # faster individually boxes[:, 0].clamp_(0, shape[1]) # x1 boxes[:, 1].clamp_(0, shape[0]) # y1 boxes[:, 2].clamp_(0, shape[1]) # x2 boxes[:, 3].clamp_(0, shape[0]) # y2 else: # np.array (faster grouped) boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): """ > It takes a mask, and resizes it to the original image size Args: im1_shape: model input shape, [h, w] masks: [h, w, num] im0_shape: the original image shape ratio_pad: the ratio of the padding to the original image. Returns: The masks are being returned. """ # Rescale coordinates (xyxy) from im1_shape to im0_shape if ratio_pad is None: # calculate from im0_shape gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding else: pad = ratio_pad[1] top, left = int(pad[1]), int(pad[0]) # y, x bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) if len(masks.shape) < 2: raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') masks = masks[top:bottom, left:right] # masks = masks.permute(2, 0, 1).contiguous() # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] # masks = masks.permute(1, 2, 0).contiguous() masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) if len(masks.shape) == 2: masks = masks[:, :, None] return masks def xyxy2xywh(x): """ > It takes a list of bounding boxes, and converts them from the format [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right Args: x: the input tensor Returns: the center of the box, the width and the height of the box. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center y[..., 2] = x[..., 2] - x[..., 0] # width y[..., 3] = x[..., 3] - x[..., 1] # height return y def xywh2xyxy(x): """ > It converts the bounding box from x,y,w,h to x1,y1,x2,y2 where xy1=top-left, xy2=bottom-right Args: x: the input tensor Returns: the top left and bottom right coordinates of the bounding box. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y return y def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): """ > It converts the normalized coordinates to the actual coordinates [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right Args: x: the bounding box coordinates w: width of the image. Defaults to 640 h: height of the image. Defaults to 640 padw: padding width. Defaults to 0 padh: height of the padding. Defaults to 0 Returns: the xyxy coordinates of the bounding box. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y return y def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): """ > It takes in a list of bounding boxes, and returns a list of bounding boxes, but with the x and y coordinates normalized to the width and height of the image Args: x: the bounding box coordinates w: width of the image. Defaults to 640 h: height of the image. Defaults to 640 clip: If True, the boxes will be clipped to the image boundaries. Defaults to False eps: the minimum value of the box's width and height. Returns: the xywhn format of the bounding boxes. """ if clip: clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center y[..., 2] = (x[..., 2] - x[..., 0]) / w # width y[..., 3] = (x[..., 3] - x[..., 1]) / h # height return y def xyn2xy(x, w=640, h=640, padw=0, padh=0): """ > It converts normalized segments into pixel segments of shape (n,2) Args: x: the normalized coordinates of the bounding box w: width of the image. Defaults to 640 h: height of the image. Defaults to 640 padw: padding width. Defaults to 0 padh: padding height. Defaults to 0 Returns: the x and y coordinates of the top left corner of the bounding box. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[..., 0] = w * x[..., 0] + padw # top left x y[..., 1] = h * x[..., 1] + padh # top left y return y def xywh2ltwh(x): """ > It converts the bounding box from [x, y, w, h] to [x1, y1, w, h] where xy1=top-left Args: x: the x coordinate of the center of the bounding box Returns: the top left x and y coordinates of the bounding box. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y return y def xyxy2ltwh(x): """ > Convert nx4 boxes from [x1, y1, x2, y2] to [x1, y1, w, h] where xy1=top-left, xy2=bottom-right Args: x: the input tensor Returns: the xyxy2ltwh function. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 2] = x[:, 2] - x[:, 0] # width y[:, 3] = x[:, 3] - x[:, 1] # height return y def ltwh2xywh(x): """ > Convert nx4 boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center Args: x: the input tensor """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 0] = x[:, 0] + x[:, 2] / 2 # center x y[:, 1] = x[:, 1] + x[:, 3] / 2 # center y return y def ltwh2xyxy(x): """ > It converts the bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right Args: x: the input image Returns: the xyxy coordinates of the bounding boxes. """ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 2] = x[:, 2] + x[:, 0] # width y[:, 3] = x[:, 3] + x[:, 1] # height return y def segments2boxes(segments): """ > It converts segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) Args: segments: list of segments, each segment is a list of points, each point is a list of x, y coordinates Returns: the xywh coordinates of the bounding boxes. """ boxes = [] for s in segments: x, y = s.T # segment xy boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy return xyxy2xywh(np.array(boxes)) # cls, xywh def resample_segments(segments, n=1000): """ > It takes a list of segments (n,2) and returns a list of segments (n,2) where each segment has been up-sampled to n points Args: segments: a list of (n,2) arrays, where n is the number of points in the segment. n: number of points to resample the segment to. Defaults to 1000 Returns: the resampled segments. """ for i, s in enumerate(segments): s = np.concatenate((s, s[0:1, :]), axis=0) x = np.linspace(0, len(s) - 1, n) xp = np.arange(len(s)) segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy return segments def crop_mask(masks, boxes): """ > It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box Args: masks: [h, w, n] tensor of masks boxes: [n, 4] tensor of bbox coords in relative point form Returns: The masks are being cropped to the bounding box. """ n, h, w = masks.shape x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) def process_mask_upsample(protos, masks_in, bboxes, shape): """ > It takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality but is slower. Args: protos: [mask_dim, mask_h, mask_w] masks_in: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms shape: the size of the input image Returns: mask """ c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW masks = crop_mask(masks, bboxes) # CHW return masks.gt_(0.5) def process_mask(protos, masks_in, bboxes, shape, upsample=False): """ > It takes the output of the mask head, and applies the mask to the bounding boxes. This is faster but produces downsampled quality of mask Args: protos: [mask_dim, mask_h, mask_w] masks_in: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms shape: the size of the input image Returns: mask """ c, mh, mw = protos.shape # CHW ih, iw = shape masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW downsampled_bboxes = bboxes.clone() downsampled_bboxes[:, 0] *= mw / iw downsampled_bboxes[:, 2] *= mw / iw downsampled_bboxes[:, 3] *= mh / ih downsampled_bboxes[:, 1] *= mh / ih masks = crop_mask(masks, downsampled_bboxes) # CHW if upsample: masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW return masks.gt_(0.5) def process_mask_native(protos, masks_in, bboxes, shape): """ > It takes the output of the mask head, and crops it after upsampling to the bounding boxes. Args: protos: [mask_dim, mask_h, mask_w] masks_in: [n, mask_dim], n is number of masks after nms bboxes: [n, 4], n is number of masks after nms shape: input_image_size, (h, w) Returns: masks: [h, w, n] """ c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) gain = min(mh / shape[0], mw / shape[1]) # gain = old / new pad = (mw - shape[1] * gain) / 2, (mh - shape[0] * gain) / 2 # wh padding top, left = int(pad[1]), int(pad[0]) # y, x bottom, right = int(mh - pad[1]), int(mw - pad[0]) masks = masks[:, top:bottom, left:right] masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW masks = crop_mask(masks, bboxes) # CHW return masks.gt_(0.5) def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False): """ > Rescale segment coords (xyxy) from img1_shape to img0_shape Args: img1_shape: The shape of the image that the segments are from. segments: the segments to be scaled img0_shape: the shape of the image that the segmentation is being applied to ratio_pad: the ratio of the image size to the padded image size. normalize: If True, the coordinates will be normalized to the range [0, 1]. Defaults to False Returns: the segmented image. """ if ratio_pad is None: # calculate from img0_shape gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] segments[:, 0] -= pad[0] # x padding segments[:, 1] -= pad[1] # y padding segments /= gain clip_segments(segments, img0_shape) if normalize: segments[:, 0] /= img0_shape[1] # width segments[:, 1] /= img0_shape[0] # height return segments def masks2segments(masks, strategy='largest'): """ > It takes a list of masks(n,h,w) and returns a list of segments(n,xy) Args: masks: the output of the model, which is a tensor of shape (batch_size, 160, 160) strategy: 'concat' or 'largest'. Defaults to largest Returns: segments (List): list of segment masks """ segments = [] for x in masks.int().cpu().numpy().astype('uint8'): c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] if c: if strategy == 'concat': # concatenate all segments c = np.concatenate([x.reshape(-1, 2) for x in c]) elif strategy == 'largest': # select largest segment c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) else: c = np.zeros((0, 2)) # no segments found segments.append(c.astype('float32')) return segments def clip_segments(segments, shape): """ > It takes a list of line segments (x1,y1,x2,y2) and clips them to the image shape (height, width) Args: segments: a list of segments, each segment is a list of points, each point is a list of x,y coordinates shape: the shape of the image """ if isinstance(segments, torch.Tensor): # faster individually segments[:, 0].clamp_(0, shape[1]) # x segments[:, 1].clamp_(0, shape[0]) # y else: # np.array (faster grouped) segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y def clean_str(s): # Cleans a string by replacing special characters with underscore _ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) ================================================ FILE: yolo/utils/plotting.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import contextlib import math from pathlib import Path from urllib.error import URLError import cv2 import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch from PIL import Image, ImageDraw, ImageFont from yolo.utils import FONT, USER_CONFIG_DIR, threaded from .checks import check_font, check_requirements, is_ascii from .files import increment_path from .ops import clip_coords, scale_image, xywh2xyxy, xyxy2xywh class Colors: # Ultralytics color palette https://com/ def __init__(self): # hex = matplotlib.colors.TABLEAU_COLORS.values() hexs = ('7fff00', '7fff00', '7fff00', '7fff00', '7fff00', '7fff00', '7fff00', '7fff00', '1A9334', '00D4BB', '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') self.palette = [self.hex2rgb(f'#{c}') for c in hexs] self.n = len(self.palette) def __call__(self, i, bgr=False): c = self.palette[int(i) % self.n] return (c[2], c[1], c[0]) if bgr else c @staticmethod def hex2rgb(h): # rgb order (PIL) return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) colors = Colors() # create instance for 'from utils.plots import colors' class Annotator: # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'): assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.' non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic self.pil = pil or non_ascii if self.pil: # use PIL self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.draw = ImageDraw.Draw(self.im) self.font = check_pil_font(font='Arial.Unicode.ttf' if non_ascii else font, size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12)) else: # use cv2 self.im = im self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): # Add one xyxy box to image with label if self.pil or not is_ascii(label): self.draw.rectangle(box, width=self.lw, outline=color) # box if label: w, h = self.font.getsize(label) # text width, height outside = box[1] - h >= 0 # label fits outside box self.draw.rectangle( (box[0], box[1] - h if outside else box[1], box[0] + w + 1, box[1] + 1 if outside else box[1] + h + 1), fill=color, ) # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font) else: # cv2 p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) cv2.rectangle(self.im, p1, p2, (0,255,127), thickness=self.lw, lineType=cv2.LINE_AA) if label: tf = max(self.lw - 1, 1) # font thickness w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height outside = p1[1] - h >= 3 p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled cv2.putText(self.im, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, self.lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA) def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False): """Plot masks at once. Args: masks (tensor): predicted masks on cuda, shape: [n, h, w] colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque """ if self.pil: # convert to numpy first self.im = np.asarray(self.im).copy() if len(masks) == 0: self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0 colors = colors[:, None, None] # shape(n,1,1,3) masks = masks.unsqueeze(3) # shape(n,h,w,1) masks_color = masks * (colors * alpha) # shape(n,h,w,3) inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3) im_gpu = im_gpu.flip(dims=[0]) # flip channel im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) im_gpu = im_gpu * inv_alph_masks[-1] + mcs im_mask = (im_gpu * 255) im_mask_np = im_mask.byte().cpu().numpy() self.im[:] = im_mask_np if retina_masks else scale_image(im_gpu.shape, im_mask_np, self.im.shape) if self.pil: # convert im back to PIL and update draw self.fromarray(self.im) def rectangle(self, xy, fill=None, outline=None, width=1): # Add rectangle to image (PIL-only) self.draw.rectangle(xy, fill, outline, width) def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'): # Add text to image (PIL-only) if anchor == 'bottom': # start y from font bottom w, h = self.font.getsize(text) # text width, height xy[1] += 1 - h self.draw.text(xy, text, fill=txt_color, font=self.font) def fromarray(self, im): # Update self.im from a numpy array self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) self.draw = ImageDraw.Draw(self.im) def result(self): # Return annotated image as array return np.asarray(self.im) def check_pil_font(font=FONT, size=10): # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary font = Path(font) font = font if font.exists() else (USER_CONFIG_DIR / font.name) try: return ImageFont.truetype(str(font) if font.exists() else font.name, size) except Exception: # download if missing try: check_font(font) return ImageFont.truetype(str(font), size) except TypeError: check_requirements('Pillow>=8.4.0') # known issue https://github.com/ultralytics/yolov5/issues/5374 except URLError: # not online return ImageFont.load_default() def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True): # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop xyxy = torch.tensor(xyxy).view(-1, 4) b = xyxy2xywh(xyxy) # boxes if square: b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad xyxy = xywh2xyxy(b).long() clip_coords(xyxy, im.shape) crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] if save: file.parent.mkdir(parents=True, exist_ok=True) # make directory f = str(increment_path(file).with_suffix('.jpg')) # cv2.imwrite(f, crop) # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0) # save RGB return crop @threaded def plot_images(images, batch_idx, cls, bboxes, masks=np.zeros(0, dtype=np.uint8), paths=None, fname='images.jpg', names=None): # Plot image grid with labels if isinstance(images, torch.Tensor): images = images.cpu().float().numpy() if isinstance(cls, torch.Tensor): cls = cls.cpu().numpy() if isinstance(bboxes, torch.Tensor): bboxes = bboxes.cpu().numpy() if isinstance(masks, torch.Tensor): masks = masks.cpu().numpy().astype(int) if isinstance(batch_idx, torch.Tensor): batch_idx = batch_idx.cpu().numpy() max_size = 1920 # max image size max_subplots = 16 # max image subplots, i.e. 4x4 bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) if np.max(images[0]) <= 1: images *= 255 # de-normalise (optional) # Build Image mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init for i, im in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin im = im.transpose(1, 2, 0) mosaic[y:y + h, x:x + w, :] = im # Resize (optional) scale = max_size / ns / max(h, w) if scale < 1: h = math.ceil(scale * h) w = math.ceil(scale * w) mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) # Annotate fs = int((h + w) * ns * 0.01) # font size annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) for i in range(i + 1): x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders if paths: annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames if len(cls) > 0: idx = batch_idx == i boxes = xywh2xyxy(bboxes[idx, :4]).T classes = cls[idx].astype('int') labels = bboxes.shape[1] == 4 # labels if no conf column conf = None if labels else bboxes[idx, 4] # check for confidence presence (label vs pred) if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 boxes[[0, 2]] *= w # scale to pixels boxes[[1, 3]] *= h elif scale < 1: # absolute coords need scale if image scales boxes *= scale boxes[[0, 2]] += x boxes[[1, 3]] += y for j, box in enumerate(boxes.T.tolist()): c = classes[j] color = colors(c) c = names[c] if names else c if labels or conf[j] > 0.25: # 0.25 conf thresh label = f'{c}' if labels else f'{c} {conf[j]:.1f}' annotator.box_label(box, label, color=color) # Plot masks if len(masks): if masks.max() > 1.0: # mean that masks are overlap image_masks = masks[[i]] # (1, 640, 640) nl = idx.sum() index = np.arange(nl).reshape(nl, 1, 1) + 1 image_masks = np.repeat(image_masks, nl, axis=0) image_masks = np.where(image_masks == index, 1.0, 0.0) else: image_masks = masks[idx] im = np.asarray(annotator.im).copy() for j, box in enumerate(boxes.T.tolist()): if labels or conf[j] > 0.25: # 0.25 conf thresh color = colors(classes[j]) mh, mw = image_masks[j].shape if mh != h or mw != w: mask = image_masks[j].astype(np.uint8) mask = cv2.resize(mask, (w, h)) mask = mask.astype(bool) else: mask = image_masks[j].astype(bool) with contextlib.suppress(Exception): im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 annotator.fromarray(im) annotator.im.save(fname) # save def plot_results(file='path/to/results.csv', dir='', segment=False): # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') save_dir = Path(file).parent if file else Path(dir) if segment: fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) index = [1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12] else: fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) index = [1, 2, 3, 4, 5, 8, 9, 10, 6, 7] ax = ax.ravel() files = list(save_dir.glob('results*.csv')) assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.' for f in files: try: data = pd.read_csv(f) s = [x.strip() for x in data.columns] x = data.values[:, 0] for i, j in enumerate(index): y = data.values[:, j].astype('float') # y[y == 0] = np.nan # don't show zero values ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8) ax[i].set_title(s[j], fontsize=12) # if j in [8, 9, 10]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) except Exception as e: print(f'Warning: Plotting error for {f}: {e}') ax[1].legend() fig.savefig(save_dir / 'results.png', dpi=200) plt.close() def output_to_target(output, max_det=300): # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting targets = [] for i, o in enumerate(output): box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1) j = torch.full((conf.shape[0], 1), i) targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1)) targets = torch.cat(targets, 0).numpy() return targets[:, 0], targets[:, 1], targets[:, 2:] ================================================ FILE: yolo/utils/tal.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import torch import torch.nn as nn import torch.nn.functional as F from .checks import check_version from .metrics import bbox_iou TORCH_1_10 = check_version(torch.__version__, '1.10.0') def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): """select the positive anchor center in gt Args: xy_centers (Tensor): shape(h*w, 4) gt_bboxes (Tensor): shape(b, n_boxes, 4) Return: (Tensor): shape(b, n_boxes, h*w) """ n_anchors = xy_centers.shape[0] bs, n_boxes, _ = gt_bboxes.shape lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) return bbox_deltas.amin(3).gt_(eps) def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): """if an anchor box is assigned to multiple gts, the one with the highest iou will be selected. Args: mask_pos (Tensor): shape(b, n_max_boxes, h*w) overlaps (Tensor): shape(b, n_max_boxes, h*w) Return: target_gt_idx (Tensor): shape(b, h*w) fg_mask (Tensor): shape(b, h*w) mask_pos (Tensor): shape(b, n_max_boxes, h*w) """ # (b, n_max_boxes, h*w) -> (b, h*w) fg_mask = mask_pos.sum(-2) if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) max_overlaps_idx = overlaps.argmax(1) # (b, h*w) is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) fg_mask = mask_pos.sum(-2) # find each grid serve which gt(index) target_gt_idx = mask_pos.argmax(-2) # (b, h*w) return target_gt_idx, fg_mask, mask_pos class TaskAlignedAssigner(nn.Module): def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): super().__init__() self.topk = topk self.num_classes = num_classes self.bg_idx = num_classes self.alpha = alpha self.beta = beta self.eps = eps @torch.no_grad() def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): """This code referenced to https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py Args: pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) anc_points (Tensor): shape(num_total_anchors, 2) gt_labels (Tensor): shape(bs, n_max_boxes, 1) gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) mask_gt (Tensor): shape(bs, n_max_boxes, 1) Returns: target_labels (Tensor): shape(bs, num_total_anchors) target_bboxes (Tensor): shape(bs, num_total_anchors, 4) target_scores (Tensor): shape(bs, num_total_anchors, num_classes) fg_mask (Tensor): shape(bs, num_total_anchors) """ self.bs = pd_scores.size(0) self.n_max_boxes = gt_bboxes.size(1) if self.n_max_boxes == 0: device = gt_bboxes.device return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), torch.zeros_like(pd_bboxes).to(device), torch.zeros_like(pd_scores).to(device), torch.zeros_like(pd_scores[..., 0]).to(device), torch.zeros_like(pd_scores[..., 0]).to(device)) mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt) target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) # assigned target target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) # normalize align_metric *= mask_pos pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) target_scores = target_scores * norm_align_metric return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): # get anchor_align metric, (b, max_num_obj, h*w) align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) # get in_gts mask, (b, max_num_obj, h*w) mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) # get topk_metric mask, (b, max_num_obj, h*w) mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) # merge all mask to a final mask, (b, max_num_obj, h*w) mask_pos = mask_topk * mask_in_gts * mask_gt return mask_pos, align_metric, overlaps def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj ind[1] = gt_labels.long().squeeze(-1) # b, max_num_obj # get the scores of each grid for each gt cls bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) return align_metric, overlaps def select_topk_candidates(self, metrics, largest=True, topk_mask=None): """ Args: metrics: (b, max_num_obj, h*w). topk_mask: (b, max_num_obj, topk) or None """ num_anchors = metrics.shape[-1] # h*w # (b, max_num_obj, topk) topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) if topk_mask is None: topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) # (b, max_num_obj, topk) topk_idxs = torch.where(topk_mask, topk_idxs, 0) # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) # filter invalid bboxes is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) return is_in_topk.to(metrics.dtype) def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): """ Args: gt_labels: (b, max_num_obj, 1) gt_bboxes: (b, max_num_obj, 4) target_gt_idx: (b, h*w) fg_mask: (b, h*w) """ # assigned target labels, (b, 1) batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] # assigned target scores target_labels.clamp(0) target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) return target_labels, target_bboxes, target_scores def make_anchors(feats, strides, grid_cell_offset=0.5): """Generate anchors from features.""" anchor_points, stride_tensor = [], [] assert feats is not None dtype, device = feats[0].dtype, feats[0].device for i, stride in enumerate(strides): _, _, h, w = feats[i].shape sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) return torch.cat(anchor_points), torch.cat(stride_tensor) def dist2bbox(distance, anchor_points, xywh=True, dim=-1): """Transform distance(ltrb) to box(xywh or xyxy).""" lt, rb = torch.split(distance, 2, dim) x1y1 = anchor_points - lt x2y2 = anchor_points + rb if xywh: c_xy = (x1y1 + x2y2) / 2 wh = x2y2 - x1y1 return torch.cat((c_xy, wh), dim) # xywh bbox return torch.cat((x1y1, x2y2), dim) # xyxy bbox def bbox2dist(anchor_points, bbox, reg_max): """Transform bbox(xyxy) to dist(ltrb).""" x1y1, x2y2 = torch.split(bbox, 2, -1) return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) ================================================ FILE: yolo/utils/torch_utils.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import math import os import platform import random import time from contextlib import contextmanager from copy import deepcopy from pathlib import Path import numpy as np import thop import torch import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F from torch.nn.parallel import DistributedDataParallel as DDP import ultralytics from yolo.utils import DEFAULT_CONFIG_DICT, DEFAULT_CONFIG_KEYS, LOGGER from yolo.utils.checks import git_describe from .checks import check_version LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) @contextmanager def torch_distributed_zero_first(local_rank: int): # Decorator to make all processes in distributed training wait for each local_master to do something initialized = torch.distributed.is_initialized() # prevent 'Default process group has not been initialized' errors if initialized and local_rank not in {-1, 0}: dist.barrier(device_ids=[local_rank]) yield if initialized and local_rank == 0: dist.barrier(device_ids=[0]) def smart_inference_mode(torch_1_9=check_version(torch.__version__, '1.9.0')): # Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator def decorate(fn): return (torch.inference_mode if torch_1_9 else torch.no_grad)()(fn) return decorate def DDP_model(model): # Model DDP creation with checks assert not check_version(torch.__version__, '1.12.0', pinned=True), \ 'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \ 'Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395' if check_version(torch.__version__, '1.11.0'): return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) else: return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) def select_device(device='', batch_size=0, newline=False): # device = None or 'cpu' or 0 or '0' or '0,1,2,3' ver = git_describe() or __version__ # git commit or pip package version s = f'Ultralytics YOLOv{ver} 🚀 Python-{platform.python_version()} torch-{torch.__version__} ' device = str(device).strip().lower().replace('cuda:', '').replace('none', '') # to string, 'cuda:0' to '0' cpu = device == 'cpu' mps = device == 'mps' # Apple Metal Performance Shaders (MPS) if cpu or mps: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False elif device: # non-cpu device requested os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available() assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \ f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)" if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7 n = len(devices) # device count if n > 1 and batch_size > 0: # check batch_size is divisible by device_count assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' space = ' ' * (len(s) + 1) for i, d in enumerate(devices): p = torch.cuda.get_device_properties(i) s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB arg = 'cuda:0' elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available(): # prefer MPS if available s += 'MPS\n' arg = 'mps' else: # revert to CPU s += 'CPU\n' arg = 'cpu' if RANK == -1: LOGGER.info(s if newline else s.rstrip()) return torch.device(arg) def time_sync(): # PyTorch-accurate time if torch.cuda.is_available(): torch.cuda.synchronize() return time.time() def fuse_conv_and_bn(conv, bn): # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ fusedconv = nn.Conv2d(conv.in_channels, conv.out_channels, kernel_size=conv.kernel_size, stride=conv.stride, padding=conv.padding, dilation=conv.dilation, groups=conv.groups, bias=True).requires_grad_(False).to(conv.weight.device) # Prepare filters w_conv = conv.weight.clone().view(conv.out_channels, -1) w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) # Prepare spatial bias b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) return fusedconv def model_info(model, verbose=False, imgsz=640): # Model information. imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320] n_p = get_num_params(model) n_g = get_num_gradients(model) # number gradients if verbose: print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}") for i, (name, p) in enumerate(model.named_parameters()): name = name.replace('module_list.', '') print('%5g %40s %9s %12g %20s %10.3g %10.3g' % (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) flops = get_flops(model, imgsz) fs = f', {flops:.1f} GFLOPs' if flops else '' m = Path(getattr(model, 'yaml_file', '') or model.yaml.get('yaml_file', '')).stem.replace('yolo', 'YOLO') or 'Model' LOGGER.info(f"{m} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") def get_num_params(model): return sum(x.numel() for x in model.parameters()) def get_num_gradients(model): return sum(x.numel() for x in model.parameters() if x.requires_grad) def get_flops(model, imgsz=640): try: model = de_parallel(model) p = next(model.parameters()) stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format flops = thop.profile(deepcopy(model), inputs=(im,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float flops = flops * imgsz[0] / stride * imgsz[1] / stride # 640x640 GFLOPs return flops except Exception: return 0 def initialize_weights(model): for m in model.modules(): t = type(m) if t is nn.Conv2d: pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif t is nn.BatchNorm2d: m.eps = 1e-3 m.momentum = 0.03 elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: m.inplace = True def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) # Scales img(bs,3,y,x) by ratio constrained to gs-multiple if ratio == 1.0: return img h, w = img.shape[2:] s = (int(h * ratio), int(w * ratio)) # new size img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize if not same_shape: # pad/crop img h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w)) return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean def make_divisible(x, divisor): # Returns nearest x divisible by divisor if isinstance(divisor, torch.Tensor): divisor = int(divisor.max()) # to int return math.ceil(x / divisor) * divisor def copy_attr(a, b, include=(), exclude=()): # Copy attributes from b to a, options to only include [...] and to exclude [...] for k, v in b.__dict__.items(): if (len(include) and k not in include) or k.startswith('_') or k in exclude: continue else: setattr(a, k, v) def intersect_dicts(da, db, exclude=()): # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape} def is_parallel(model): # Returns True if model is of type DP or DDP return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) def de_parallel(model): # De-parallelize a model: returns single-GPU model if model is of type DP or DDP return model.module if is_parallel(model) else model def one_cycle(y1=0.0, y2=1.0, steps=100): # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 def init_seeds(seed=0, deterministic=False): # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe # torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287 if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 torch.use_deterministic_algorithms(True) torch.backends.cudnn.deterministic = True os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' os.environ['PYTHONHASHSEED'] = str(seed) class ModelEMA: """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models Keeps a moving average of everything in the model state_dict (parameters and buffers) For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage """ def __init__(self, model, decay=0.9999, tau=2000, updates=0): # Create EMA self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA self.updates = updates # number of EMA updates self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs) for p in self.ema.parameters(): p.requires_grad_(False) def update(self, model): # Update EMA parameters self.updates += 1 d = self.decay(self.updates) msd = de_parallel(model).state_dict() # model state_dict for k, v in self.ema.state_dict().items(): if v.dtype.is_floating_point: # true for FP16 and FP32 v *= d v += (1 - d) * msd[k].detach() # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32' def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): # Update EMA attributes copy_attr(self.ema, model, include, exclude) def strip_optimizer(f='best.pt', s=''): """ Strip optimizer from 'f' to finalize training, optionally save as 's'. Usage: from yolo.utils.torch_utils import strip_optimizer from pathlib import Path for f in Path('/Users/glennjocher/Downloads/weights').glob('*.pt'): strip_optimizer(f) Args: f (str): file path to model state to strip the optimizer from. Default is 'best.pt'. s (str): file path to save the model with stripped optimizer to. Default is ''. If not provided, the original file will be overwritten. Returns: None """ x = torch.load(f, map_location=torch.device('cpu')) args = {**DEFAULT_CONFIG_DICT, **x['train_args']} # combine model args with default args, preferring model args if x.get('ema'): x['model'] = x['ema'] # replace model with ema for k in 'optimizer', 'best_fitness', 'ema', 'updates': # keys x[k] = None x['epoch'] = -1 x['model'].half() # to FP16 for p in x['model'].parameters(): p.requires_grad = False x['train_args'] = {k: v for k, v in args.items() if k in DEFAULT_CONFIG_KEYS} # strip non-default keys torch.save(x, s or f) mb = os.path.getsize(s or f) / 1E6 # filesize LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB") def guess_task_from_head(head): task = None if head.lower() in ["classify", "classifier", "cls", "fc"]: task = "classify" if head.lower() in ["detect"]: task = "detect" if head.lower() in ["segment"]: task = "segment" if not task: raise SyntaxError("task or model not recognized! Please refer the docs at : ") # TODO: add docs links return task def profile(input, ops, n=10, device=None): """ YOLOv5 speed/memory/FLOPs profiler Usage: input = torch.randn(16, 3, 640, 640) m1 = lambda x: x * torch.sigmoid(x) m2 = nn.SiLU() profile(input, [m1, m2], n=100) # profile over 100 iterations """ results = [] if not isinstance(device, torch.device): device = select_device(device) print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}" f"{'input':>24s}{'output':>24s}") for x in input if isinstance(input, list) else [input]: x = x.to(device) x.requires_grad = True for m in ops if isinstance(ops, list) else [ops]: m = m.to(device) if hasattr(m, 'to') else m # device m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward try: flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs except Exception: flops = 0 try: for _ in range(n): t[0] = time_sync() y = m(x) t[1] = time_sync() try: _ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() t[2] = time_sync() except Exception: # no backward method # print(e) # for debug t[2] = float('nan') tf += (t[1] - t[0]) * 1000 / n # ms per op forward tb += (t[2] - t[1]) * 1000 / n # ms per op backward mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB) s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}') results.append([p, flops, mem, tf, tb, s_in, s_out]) except Exception as e: print(e) results.append(None) torch.cuda.empty_cache() return results ================================================ FILE: yolo/v8/__init__.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from pathlib import Path from yolo.v8 import classify, detect, segment ROOT = Path(__file__).parents[0] # yolov8 ROOT __all__ = ["classify", "segment", "detect"] from yolo.configs import hydra_patch # noqa (patch hydra cli) ================================================ FILE: yolo/v8/detect/__init__.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from .predict import DetectionPredictor, predict from .train import DetectionTrainer, train from .val import DetectionValidator, val ================================================ FILE: yolo/v8/detect/detect_and_trk.py ================================================ import hydra import torch import cv2 from random import randint from sort import * from ultralytics.yolo.engine.predictor import BasePredictor from ultralytics.yolo.utils import DEFAULT_CONFIG, ROOT, ops from ultralytics.yolo.utils.checks import check_imgsz from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box tracker = None def init_tracker(): global tracker sort_max_age = 5 sort_min_hits = 2 sort_iou_thresh = 0.2 tracker =Sort(max_age=sort_max_age,min_hits=sort_min_hits,iou_threshold=sort_iou_thresh) rand_color_list = [] def draw_boxes(img, bbox, identities=None, categories=None, names=None, offset=(0, 0)): for i, box in enumerate(bbox): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] id = int(identities[i]) if identities is not None else 0 box_center = (int((box[0]+box[2])/2),(int((box[1]+box[3])/2))) label = str(id) (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 253), 2) cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), (255,144,30), -1) cv2.putText(img, label, (x1, y1 - 5),cv2.FONT_HERSHEY_SIMPLEX, 0.6, [255, 255, 255], 1) return img def random_color_list(): global rand_color_list rand_color_list = [] for i in range(0,5005): r = randint(0, 255) g = randint(0, 255) b = randint(0, 255) rand_color = (r, g, b) rand_color_list.append(rand_color) #...................................... class DetectionPredictor(BasePredictor): def get_annotator(self, img): return Annotator(img, line_width=self.args.line_thickness, example=str(self.model.names)) def preprocess(self, img): img = torch.from_numpy(img).to(self.model.device) img = img.half() if self.model.fp16 else img.float() # uint8 to fp16/32 img /= 255 # 0 - 255 to 0.0 - 1.0 return img def postprocess(self, preds, img, orig_img): preds = ops.non_max_suppression(preds, self.args.conf, self.args.iou, agnostic=self.args.agnostic_nms, max_det=self.args.max_det) for i, pred in enumerate(preds): shape = orig_img[i].shape if self.webcam else orig_img.shape pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round() return preds def write_results(self, idx, preds, batch): p, im, im0 = batch log_string = "" if len(im.shape) == 3: im = im[None] # expand for batch dim self.seen += 1 im0 = im0.copy() if self.webcam: # batch_size >= 1 log_string += f'{idx}: ' frame = self.dataset.count else: frame = getattr(self.dataset, 'frame', 0) # tracker self.data_path = p save_path = str(self.save_dir / p.name) # im.jpg self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}') log_string += '%gx%g ' % im.shape[2:] # print string self.annotator = self.get_annotator(im0) det = preds[idx] self.all_outputs.append(det) if len(det) == 0: return log_string for c in det[:, 5].unique(): n = (det[:, 5] == c).sum() # detections per class log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, " # #..................USE TRACK FUNCTION.................... dets_to_sort = np.empty((0,6)) for x1,y1,x2,y2,conf,detclass in det.cpu().detach().numpy(): dets_to_sort = np.vstack((dets_to_sort, np.array([x1, y1, x2, y2, conf, detclass]))) tracked_dets = tracker.update(dets_to_sort) tracks =tracker.getTrackers() for track in tracks: [cv2.line(im0, (int(track.centroidarr[i][0]), int(track.centroidarr[i][1])), (int(track.centroidarr[i+1][0]), int(track.centroidarr[i+1][1])), rand_color_list[track.id], thickness=3) for i,_ in enumerate(track.centroidarr) if i < len(track.centroidarr)-1 ] if len(tracked_dets)>0: bbox_xyxy = tracked_dets[:,:4] identities = tracked_dets[:, 8] categories = tracked_dets[:, 4] draw_boxes(im0, bbox_xyxy, identities, categories, self.model.names) gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh return log_string @hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name) def predict(cfg): init_tracker() random_color_list() cfg.model = cfg.model or "yolov8n.pt" cfg.imgsz = check_imgsz(cfg.imgsz, min_dim=2) # check image size cfg.source = cfg.source if cfg.source is not None else ROOT / "assets" predictor = DetectionPredictor(cfg) predictor() if __name__ == "__main__": predict() ================================================ FILE: yolo/v8/detect/predict.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import hydra import torch from ultralytics.yolo.engine.predictor import BasePredictor from ultralytics.yolo.utils import DEFAULT_CONFIG, ROOT, ops from ultralytics.yolo.utils.checks import check_imgsz from ultralytics.yolo.utils.plotting import Annotator, colors, save_one_box class DetectionPredictor(BasePredictor): def get_annotator(self, img): return Annotator(img, line_width=self.args.line_thickness, example=str(self.model.names)) def preprocess(self, img): img = torch.from_numpy(img).to(self.model.device) img = img.half() if self.model.fp16 else img.float() # uint8 to fp16/32 img /= 255 # 0 - 255 to 0.0 - 1.0 return img def postprocess(self, preds, img, orig_img): preds = ops.non_max_suppression(preds, self.args.conf, self.args.iou, agnostic=self.args.agnostic_nms, max_det=self.args.max_det) for i, pred in enumerate(preds): shape = orig_img[i].shape if self.webcam else orig_img.shape pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], shape).round() return preds def write_results(self, idx, preds, batch): p, im, im0 = batch log_string = "" if len(im.shape) == 3: im = im[None] # expand for batch dim self.seen += 1 im0 = im0.copy() if self.webcam: # batch_size >= 1 log_string += f'{idx}: ' frame = self.dataset.count else: frame = getattr(self.dataset, 'frame', 0) self.data_path = p # save_path = str(self.save_dir / p.name) # im.jpg self.txt_path = str(self.save_dir / 'labels' / p.stem) + ('' if self.dataset.mode == 'image' else f'_{frame}') log_string += '%gx%g ' % im.shape[2:] # print string self.annotator = self.get_annotator(im0) det = preds[idx] self.all_outputs.append(det) if len(det) == 0: return log_string for c in det[:, 5].unique(): n = (det[:, 5] == c).sum() # detections per class log_string += f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, " # write gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh for *xyxy, conf, cls in reversed(det): if self.args.save_txt: # Write to file xywh = (ops.xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if self.args.save_conf else (cls, *xywh) # label format with open(f'{self.txt_path}.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if self.args.save or self.args.save_crop or self.args.show: # Add bbox to image c = int(cls) # integer class label = None if self.args.hide_labels else ( self.model.names[c] if self.args.hide_conf else f'{self.model.names[c]} {conf:.2f}') self.annotator.box_label(xyxy, label, color=colors(c, True)) if self.args.save_crop: imc = im0.copy() save_one_box(xyxy, imc, file=self.save_dir / 'crops' / self.model.model.names[c] / f'{self.data_path.stem}.jpg', BGR=True) return log_string @hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name) def predict(cfg): cfg.model = cfg.model or "yolov8n.pt" cfg.imgsz = check_imgsz(cfg.imgsz, min_dim=2) # check image size cfg.source = cfg.source or ROOT / "assets" predictor = DetectionPredictor(cfg) predictor() if __name__ == "__main__": predict() ================================================ FILE: yolo/v8/detect/sort.py ================================================ from __future__ import print_function import os import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.patches as patches from skimage import io import glob import time import argparse from filterpy.kalman import KalmanFilter np.random.seed(0) def linear_assignment(cost_matrix): try: import lap #linear assignment problem solver _, x, y = lap.lapjv(cost_matrix, extend_cost = True) return np.array([[y[i],i] for i in x if i>=0]) except ImportError: from scipy.optimize import linear_sum_assignment x,y = linear_sum_assignment(cost_matrix) return np.array(list(zip(x,y))) """From SORT: Computes IOU between two boxes in the form [x1,y1,x2,y2]""" def iou_batch(bb_test, bb_gt): bb_gt = np.expand_dims(bb_gt, 0) bb_test = np.expand_dims(bb_test, 1) xx1 = np.maximum(bb_test[...,0], bb_gt[..., 0]) yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1]) xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2]) yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3]) w = np.maximum(0., xx2 - xx1) h = np.maximum(0., yy2 - yy1) wh = w * h o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh) return(o) """Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form [x,y,s,r] where x,y is the center of the box and s is the scale/area and r is the aspect ratio""" def convert_bbox_to_z(bbox): w = bbox[2] - bbox[0] h = bbox[3] - bbox[1] x = bbox[0] + w/2. y = bbox[1] + h/2. s = w * h #scale is just area r = w / float(h) return np.array([x, y, s, r]).reshape((4, 1)) """Takes a bounding box in the centre form [x,y,s,r] and returns it in the form [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right""" def convert_x_to_bbox(x, score=None): w = np.sqrt(x[2] * x[3]) h = x[2] / w if(score==None): return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4)) else: return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5)) """This class represents the internal state of individual tracked objects observed as bbox.""" class KalmanBoxTracker(object): count = 0 def __init__(self, bbox): """ Initialize a tracker using initial bounding box Parameter 'bbox' must have 'detected class' int number at the -1 position. """ self.kf = KalmanFilter(dim_x=7, dim_z=4) self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],[0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]]) self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]]) self.kf.R[2:,2:] *= 10. # R: Covariance matrix of measurement noise (set to high for noisy inputs -> more 'inertia' of boxes') self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities self.kf.P *= 10. self.kf.Q[-1,-1] *= 0.5 # Q: Covariance matrix of process noise (set to high for erratically moving things) self.kf.Q[4:,4:] *= 0.5 self.kf.x[:4] = convert_bbox_to_z(bbox) # STATE VECTOR self.time_since_update = 0 self.id = KalmanBoxTracker.count KalmanBoxTracker.count += 1 self.history = [] self.hits = 0 self.hit_streak = 0 self.age = 0 self.centroidarr = [] CX = (bbox[0]+bbox[2])//2 CY = (bbox[1]+bbox[3])//2 self.centroidarr.append((CX,CY)) #keep yolov5 detected class information self.detclass = bbox[5] # If we want to store bbox self.bbox_history = [bbox] def update(self, bbox): """ Updates the state vector with observed bbox """ self.time_since_update = 0 self.history = [] self.hits += 1 self.hit_streak += 1 self.kf.update(convert_bbox_to_z(bbox)) self.detclass = bbox[5] CX = (bbox[0]+bbox[2])//2 CY = (bbox[1]+bbox[3])//2 self.centroidarr.append((CX,CY)) self.bbox_history.append(bbox) def predict(self): """ Advances the state vector and returns the predicted bounding box estimate """ if((self.kf.x[6]+self.kf.x[2])<=0): self.kf.x[6] *= 0.0 self.kf.predict() self.age += 1 if(self.time_since_update>0): self.hit_streak = 0 self.time_since_update += 1 self.history.append(convert_x_to_bbox(self.kf.x)) # bbox=self.history[-1] # CX = (bbox[0]+bbox[2])/2 # CY = (bbox[1]+bbox[3])/2 # self.centroidarr.append((CX,CY)) return self.history[-1] def get_state(self): """ Returns the current bounding box estimate # test arr1 = np.array([[1,2,3,4]]) arr2 = np.array([0]) arr3 = np.expand_dims(arr2, 0) np.concatenate((arr1,arr3), axis=1) """ arr_detclass = np.expand_dims(np.array([self.detclass]), 0) arr_u_dot = np.expand_dims(self.kf.x[4],0) arr_v_dot = np.expand_dims(self.kf.x[5],0) arr_s_dot = np.expand_dims(self.kf.x[6],0) return np.concatenate((convert_x_to_bbox(self.kf.x), arr_detclass, arr_u_dot, arr_v_dot, arr_s_dot), axis=1) def associate_detections_to_trackers(detections, trackers, iou_threshold = 0.3): """ Assigns detections to tracked object (both represented as bounding boxes) Returns 3 lists of 1. matches, 2. unmatched_detections 3. unmatched_trackers """ if(len(trackers)==0): return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int) iou_matrix = iou_batch(detections, trackers) if min(iou_matrix.shape) > 0: a = (iou_matrix > iou_threshold).astype(np.int32) if a.sum(1).max() == 1 and a.sum(0).max() ==1: matched_indices = np.stack(np.where(a), axis=1) else: matched_indices = linear_assignment(-iou_matrix) else: matched_indices = np.empty(shape=(0,2)) unmatched_detections = [] for d, det in enumerate(detections): if(d not in matched_indices[:,0]): unmatched_detections.append(d) unmatched_trackers = [] for t, trk in enumerate(trackers): if(t not in matched_indices[:,1]): unmatched_trackers.append(t) #filter out matched with low IOU matches = [] for m in matched_indices: if(iou_matrix[m[0], m[1]]= self.min_hits or self.frame_count <= self.min_hits): ret.append(np.concatenate((d, [trk.id+1])).reshape(1,-1)) #+1'd because MOT benchmark requires positive value i -= 1 #remove dead tracklet if(trk.time_since_update >self.max_age): self.trackers.pop(i) if(len(ret) > 0): return np.concatenate(ret) return np.empty((0,6)) def parse_args(): """Parse input arguments.""" parser = argparse.ArgumentParser(description='SORT demo') parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true') parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data') parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train') parser.add_argument("--max_age", help="Maximum number of frames to keep alive a track without associated detections.", type=int, default=1) parser.add_argument("--min_hits", help="Minimum number of associated detections before track is initialised.", type=int, default=3) parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3) args = parser.parse_args() return args if __name__ == '__main__': # all train args = parse_args() display = args.display phase = args.phase total_time = 0.0 total_frames = 0 colours = np.random.rand(32, 3) #used only for display if(display): if not os.path.exists('mot_benchmark'): print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n') exit() plt.ion() fig = plt.figure() ax1 = fig.add_subplot(111, aspect='equal') if not os.path.exists('output'): os.makedirs('output') pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt') for seq_dets_fn in glob.glob(pattern): mot_tracker = Sort(max_age=args.max_age, min_hits=args.min_hits, iou_threshold=args.iou_threshold) #create instance of the SORT tracker seq_dets = np.loadtxt(seq_dets_fn, delimiter=',') seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0] with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file: print("Processing %s."%(seq)) for frame in range(int(seq_dets[:,0].max())): frame += 1 #detection and frame numbers begin at 1 dets = seq_dets[seq_dets[:, 0]==frame, 2:7] dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2] total_frames += 1 if(display): fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame)) im =io.imread(fn) ax1.imshow(im) plt.title(seq + ' Tracked Targets') start_time = time.time() trackers = mot_tracker.update(dets) cycle_time = time.time() - start_time total_time += cycle_time for d in trackers: print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file) if(display): d = d.astype(np.int32) ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:])) if(display): fig.canvas.flush_events() plt.draw() ax1.cla() print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time)) if(display): print("Note: to get real runtime results run without the option: --display") ================================================ FILE: yolo/v8/detect/train.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license from copy import copy import hydra import torch import torch.nn as nn from ultralytics.nn.tasks import DetectionModel from ultralytics.yolo import v8 from ultralytics.yolo.data import build_dataloader from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader from ultralytics.yolo.engine.trainer import BaseTrainer from ultralytics.yolo.utils import DEFAULT_CONFIG, colorstr from ultralytics.yolo.utils.loss import BboxLoss from ultralytics.yolo.utils.ops import xywh2xyxy from ultralytics.yolo.utils.plotting import plot_images, plot_results from ultralytics.yolo.utils.tal import TaskAlignedAssigner, dist2bbox, make_anchors from ultralytics.yolo.utils.torch_utils import de_parallel # BaseTrainer python usage class DetectionTrainer(BaseTrainer): def get_dataloader(self, dataset_path, batch_size, mode="train", rank=0): # TODO: manage splits differently # calculate stride - check if model is initialized gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32) return create_dataloader(path=dataset_path, imgsz=self.args.imgsz, batch_size=batch_size, stride=gs, hyp=dict(self.args), augment=mode == "train", cache=self.args.cache, pad=0 if mode == "train" else 0.5, rect=self.args.rect, rank=rank, workers=self.args.workers, close_mosaic=self.args.close_mosaic != 0, prefix=colorstr(f'{mode}: '), shuffle=mode == "train", seed=self.args.seed)[0] if self.args.v5loader else \ build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, rank=rank, mode=mode)[0] def preprocess_batch(self, batch): batch["img"] = batch["img"].to(self.device, non_blocking=True).float() / 255 return batch def set_model_attributes(self): nl = de_parallel(self.model).model[-1].nl # number of detection layers (to scale hyps) self.args.box *= 3 / nl # scale to layers # self.args.cls *= self.data["nc"] / 80 * 3 / nl # scale to classes and layers self.args.cls *= (self.args.imgsz / 640) ** 2 * 3 / nl # scale to image size and layers self.model.nc = self.data["nc"] # attach number of classes to model self.model.args = self.args # attach hyperparameters to model # TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc self.model.names = self.data["names"] def get_model(self, cfg=None, weights=None, verbose=True): model = DetectionModel(cfg, ch=3, nc=self.data["nc"], verbose=verbose) if weights: model.load(weights) return model def get_validator(self): self.loss_names = 'box_loss', 'cls_loss', 'dfl_loss' return v8.detect.DetectionValidator(self.test_loader, save_dir=self.save_dir, logger=self.console, args=copy(self.args)) def criterion(self, preds, batch): if not hasattr(self, 'compute_loss'): self.compute_loss = Loss(de_parallel(self.model)) return self.compute_loss(preds, batch) def label_loss_items(self, loss_items=None, prefix="train"): """ Returns a loss dict with labelled training loss items tensor """ # Not needed for classification but necessary for segmentation & detection keys = [f"{prefix}/{x}" for x in self.loss_names] if loss_items is not None: loss_items = [round(float(x), 5) for x in loss_items] # convert tensors to 5 decimal place floats return dict(zip(keys, loss_items)) else: return keys def progress_string(self): return ('\n' + '%11s' * (4 + len(self.loss_names))) % ('Epoch', 'GPU_mem', *self.loss_names, 'Instances', 'Size') def plot_training_samples(self, batch, ni): plot_images(images=batch["img"], batch_idx=batch["batch_idx"], cls=batch["cls"].squeeze(-1), bboxes=batch["bboxes"], paths=batch["im_file"], fname=self.save_dir / f"train_batch{ni}.jpg") def plot_metrics(self): plot_results(file=self.csv) # save results.png # Criterion class for computing training losses class Loss: def __init__(self, model): # model must be de-paralleled device = next(model.parameters()).device # get model device h = model.args # hyperparameters m = model.model[-1] # Detect() module self.bce = nn.BCEWithLogitsLoss(reduction='none') self.hyp = h self.stride = m.stride # model strides self.nc = m.nc # number of classes self.no = m.no self.reg_max = m.reg_max self.device = device self.use_dfl = m.reg_max > 1 self.assigner = TaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0) self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device) self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device) def preprocess(self, targets, batch_size, scale_tensor): if targets.shape[0] == 0: out = torch.zeros(batch_size, 0, 5, device=self.device) else: i = targets[:, 0] # image index _, counts = i.unique(return_counts=True) out = torch.zeros(batch_size, counts.max(), 5, device=self.device) for j in range(batch_size): matches = i == j n = matches.sum() if n: out[j, :n] = targets[matches, 1:] out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) return out def bbox_decode(self, anchor_points, pred_dist): if self.use_dfl: b, a, c = pred_dist.shape # batch, anchors, channels pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) return dist2bbox(pred_dist, anchor_points, xywh=False) def __call__(self, preds, batch): loss = torch.zeros(3, device=self.device) # box, cls, dfl feats = preds[1] if isinstance(preds, tuple) else preds pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( (self.reg_max * 4, self.nc), 1) pred_scores = pred_scores.permute(0, 2, 1).contiguous() pred_distri = pred_distri.permute(0, 2, 1).contiguous() dtype = pred_scores.dtype batch_size = pred_scores.shape[0] imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) # targets targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1) targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) # pboxes pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) _, target_bboxes, target_scores, fg_mask, _ = self.assigner( pred_scores.detach().sigmoid(), (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), anchor_points * stride_tensor, gt_labels, gt_bboxes, mask_gt) target_bboxes /= stride_tensor target_scores_sum = target_scores.sum() # cls loss # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE # bbox loss if fg_mask.sum(): loss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask) loss[0] *= self.hyp.box # box gain loss[1] *= self.hyp.cls # cls gain loss[2] *= self.hyp.dfl # dfl gain return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) @hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name) def train(cfg): cfg.model = cfg.model or "yolov8n.yaml" cfg.data = cfg.data or "coco128.yaml" # or yolo.ClassificationDataset("mnist") # trainer = DetectionTrainer(cfg) # trainer.train() from ultralytics import YOLO model = YOLO(cfg.model) model.train(**cfg) if __name__ == "__main__": """ CLI usage: python ultralytics/yolo/v8/detect/train.py model=yolov8n.yaml data=coco128 epochs=100 imgsz=640 TODO: yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100 """ train() ================================================ FILE: yolo/v8/detect/val.py ================================================ # Ultralytics YOLO 🚀, GPL-3.0 license import os from pathlib import Path import hydra import numpy as np import torch from ultralytics.yolo.data import build_dataloader from ultralytics.yolo.data.dataloaders.v5loader import create_dataloader from ultralytics.yolo.engine.validator import BaseValidator from ultralytics.yolo.utils import DEFAULT_CONFIG, colorstr, ops, yaml_load from ultralytics.yolo.utils.checks import check_file, check_requirements from ultralytics.yolo.utils.metrics import ConfusionMatrix, DetMetrics, box_iou from ultralytics.yolo.utils.plotting import output_to_target, plot_images from ultralytics.yolo.utils.torch_utils import de_parallel class DetectionValidator(BaseValidator): def __init__(self, dataloader=None, save_dir=None, pbar=None, logger=None, args=None): super().__init__(dataloader, save_dir, pbar, logger, args) self.data_dict = yaml_load(check_file(self.args.data), append_filename=True) if self.args.data else None self.is_coco = False self.class_map = None self.metrics = DetMetrics(save_dir=self.save_dir, plot=self.args.plots) self.iouv = torch.linspace(0.5, 0.95, 10) # iou vector for mAP@0.5:0.95 self.niou = self.iouv.numel() def preprocess(self, batch): batch["img"] = batch["img"].to(self.device, non_blocking=True) batch["img"] = (batch["img"].half() if self.args.half else batch["img"].float()) / 255 for k in ["batch_idx", "cls", "bboxes"]: batch[k] = batch[k].to(self.device) nb, _, height, width = batch["img"].shape batch["bboxes"] *= torch.tensor((width, height, width, height), device=self.device) # to pixels self.lb = [torch.cat([batch["cls"], batch["bboxes"]], dim=-1)[batch["batch_idx"] == i] for i in range(nb)] if self.args.save_hybrid else [] # for autolabelling return batch def init_metrics(self, model): head = model.model[-1] if self.training else model.model.model[-1] self.is_coco = self.data.get('val', '').endswith(f'coco{os.sep}val2017.txt') # is COCO dataset self.class_map = ops.coco80_to_coco91_class() if self.is_coco else list(range(1000)) self.args.save_json |= self.is_coco and not self.training # run on final val if training COCO self.nc = head.nc self.names = model.names self.metrics.names = self.names self.confusion_matrix = ConfusionMatrix(nc=self.nc) self.seen = 0 self.jdict = [] self.stats = [] def get_desc(self): return ('%22s' + '%11s' * 6) % ('Class', 'Images', 'Instances', 'Box(P', "R", "mAP50", "mAP50-95)") def postprocess(self, preds): preds = ops.non_max_suppression(preds, self.args.conf, self.args.iou, labels=self.lb, multi_label=True, agnostic=self.args.single_cls, max_det=self.args.max_det) return preds def update_metrics(self, preds, batch): # Metrics for si, pred in enumerate(preds): idx = batch["batch_idx"] == si cls = batch["cls"][idx] bbox = batch["bboxes"][idx] nl, npr = cls.shape[0], pred.shape[0] # number of labels, predictions shape = batch["ori_shape"][si] correct_bboxes = torch.zeros(npr, self.niou, dtype=torch.bool, device=self.device) # init self.seen += 1 if npr == 0: if nl: self.stats.append((correct_bboxes, *torch.zeros((2, 0), device=self.device), cls.squeeze(-1))) if self.args.plots: self.confusion_matrix.process_batch(detections=None, labels=cls.squeeze(-1)) continue # Predictions if self.args.single_cls: pred[:, 5] = 0 predn = pred.clone() ops.scale_boxes(batch["img"][si].shape[1:], predn[:, :4], shape, ratio_pad=batch["ratio_pad"][si]) # native-space pred # Evaluate if nl: tbox = ops.xywh2xyxy(bbox) # target boxes ops.scale_boxes(batch["img"][si].shape[1:], tbox, shape, ratio_pad=batch["ratio_pad"][si]) # native-space labels labelsn = torch.cat((cls, tbox), 1) # native-space labels correct_bboxes = self._process_batch(predn, labelsn) # TODO: maybe remove these `self.` arguments as they already are member variable if self.args.plots: self.confusion_matrix.process_batch(predn, labelsn) self.stats.append((correct_bboxes, pred[:, 4], pred[:, 5], cls.squeeze(-1))) # (conf, pcls, tcls) # Save if self.args.save_json: self.pred_to_json(predn, batch["im_file"][si]) # if self.args.save_txt: # save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / f'{path.stem}.txt') def get_stats(self): stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*self.stats)] # to numpy if len(stats) and stats[0].any(): self.metrics.process(*stats) self.nt_per_class = np.bincount(stats[-1].astype(int), minlength=self.nc) # number of targets per class return self.metrics.results_dict def print_results(self): pf = '%22s' + '%11i' * 2 + '%11.3g' * len(self.metrics.keys) # print format self.logger.info(pf % ("all", self.seen, self.nt_per_class.sum(), *self.metrics.mean_results())) if self.nt_per_class.sum() == 0: self.logger.warning( f'WARNING ⚠️ no labels found in {self.args.task} set, can not compute metrics without labels') # Print results per class if (self.args.verbose or not self.training) and self.nc > 1 and len(self.stats): for i, c in enumerate(self.metrics.ap_class_index): self.logger.info(pf % (self.names[c], self.seen, self.nt_per_class[c], *self.metrics.class_result(i))) if self.args.plots: self.confusion_matrix.plot(save_dir=self.save_dir, names=list(self.names.values())) def _process_batch(self, detections, labels): """ Return correct prediction matrix Arguments: detections (array[N, 6]), x1, y1, x2, y2, conf, class labels (array[M, 5]), class, x1, y1, x2, y2 Returns: correct (array[N, 10]), for 10 IoU levels """ iou = box_iou(labels[:, 1:], detections[:, :4]) correct = np.zeros((detections.shape[0], self.iouv.shape[0])).astype(bool) correct_class = labels[:, 0:1] == detections[:, 5] for i in range(len(self.iouv)): x = torch.where((iou >= self.iouv[i]) & correct_class) # IoU > threshold and classes match if x[0].shape[0]: matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 1], return_index=True)[1]] # matches = matches[matches[:, 2].argsort()[::-1]] matches = matches[np.unique(matches[:, 0], return_index=True)[1]] correct[matches[:, 1].astype(int), i] = True return torch.tensor(correct, dtype=torch.bool, device=detections.device) def get_dataloader(self, dataset_path, batch_size): # TODO: manage splits differently # calculate stride - check if model is initialized gs = max(int(de_parallel(self.model).stride if self.model else 0), 32) return create_dataloader(path=dataset_path, imgsz=self.args.imgsz, batch_size=batch_size, stride=gs, hyp=dict(self.args), cache=False, pad=0.5, rect=True, workers=self.args.workers, prefix=colorstr(f'{self.args.mode}: '), shuffle=False, seed=self.args.seed)[0] if self.args.v5loader else \ build_dataloader(self.args, batch_size, img_path=dataset_path, stride=gs, mode="val")[0] def plot_val_samples(self, batch, ni): plot_images(batch["img"], batch["batch_idx"], batch["cls"].squeeze(-1), batch["bboxes"], paths=batch["im_file"], fname=self.save_dir / f"val_batch{ni}_labels.jpg", names=self.names) def plot_predictions(self, batch, preds, ni): plot_images(batch["img"], *output_to_target(preds, max_det=15), paths=batch["im_file"], fname=self.save_dir / f'val_batch{ni}_pred.jpg', names=self.names) # pred def pred_to_json(self, predn, filename): stem = Path(filename).stem image_id = int(stem) if stem.isnumeric() else stem box = ops.xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(predn.tolist(), box.tolist()): self.jdict.append({ 'image_id': image_id, 'category_id': self.class_map[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) def eval_json(self, stats): if self.args.save_json and self.is_coco and len(self.jdict): anno_json = self.data['path'] / "annotations/instances_val2017.json" # annotations pred_json = self.save_dir / "predictions.json" # predictions self.logger.info(f'\nEvaluating pycocotools mAP using {pred_json} and {anno_json}...') try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb check_requirements('pycocotools>=2.0.6') from pycocotools.coco import COCO # noqa from pycocotools.cocoeval import COCOeval # noqa for x in anno_json, pred_json: assert x.is_file(), f"{x} file not found" anno = COCO(str(anno_json)) # init annotations api pred = anno.loadRes(str(pred_json)) # init predictions api (must pass string, not Path) eval = COCOeval(anno, pred, 'bbox') if self.is_coco: eval.params.imgIds = [int(Path(x).stem) for x in self.dataloader.dataset.im_files] # images to eval eval.evaluate() eval.accumulate() eval.summarize() stats[self.metrics.keys[-1]], stats[self.metrics.keys[-2]] = eval.stats[:2] # update mAP50-95 and mAP50 except Exception as e: self.logger.warning(f'pycocotools unable to run: {e}') return stats @hydra.main(version_base=None, config_path=str(DEFAULT_CONFIG.parent), config_name=DEFAULT_CONFIG.name) def val(cfg): cfg.data = cfg.data or "coco128.yaml" validator = DetectionValidator(args=cfg) validator(model=cfg.model) if __name__ == "__main__": val()